diff options
Diffstat (limited to 'src')
133 files changed, 5118 insertions, 2427 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 312a49f42..5e3a74c0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -113,6 +113,9 @@ else() | |||
| 113 | 113 | ||
| 114 | $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> | 114 | $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> |
| 115 | $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> | 115 | $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> |
| 116 | $<$<CXX_COMPILER_ID:Clang>:-Werror=shadow-uncaptured-local> | ||
| 117 | $<$<CXX_COMPILER_ID:Clang>:-Werror=implicit-fallthrough> | ||
| 118 | $<$<CXX_COMPILER_ID:Clang>:-Werror=type-limits> | ||
| 116 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> | 119 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> |
| 117 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> | 120 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> |
| 118 | ) | 121 | ) |
diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index ad869facb..53b258c4f 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp | |||
| @@ -436,10 +436,7 @@ void System::Stop() { | |||
| 436 | } | 436 | } |
| 437 | 437 | ||
| 438 | if (execution_mode == ExecutionMode::Auto) { | 438 | if (execution_mode == ExecutionMode::Auto) { |
| 439 | // Should wait for the system to terminate here, but core timing (should have) already | 439 | terminate_event.Wait(); |
| 440 | // stopped, so this isn't needed. Find a way to make this definite. | ||
| 441 | |||
| 442 | // terminate_event.Wait(); | ||
| 443 | } | 440 | } |
| 444 | } | 441 | } |
| 445 | 442 | ||
diff --git a/src/audio_core/sink/sdl2_sink.cpp b/src/audio_core/sink/sdl2_sink.cpp index ee1a0652f..c1529d1f9 100644 --- a/src/audio_core/sink/sdl2_sink.cpp +++ b/src/audio_core/sink/sdl2_sink.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <span> | 4 | #include <span> |
| 5 | #include <vector> | 5 | #include <vector> |
| 6 | #include <SDL.h> | ||
| 6 | 7 | ||
| 7 | #include "audio_core/common/common.h" | 8 | #include "audio_core/common/common.h" |
| 8 | #include "audio_core/sink/sdl2_sink.h" | 9 | #include "audio_core/sink/sdl2_sink.h" |
| @@ -10,16 +11,6 @@ | |||
| 10 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 11 | #include "core/core.h" | 12 | #include "core/core.h" |
| 12 | 13 | ||
| 13 | // Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307 | ||
| 14 | #ifdef __clang__ | ||
| 15 | #pragma clang diagnostic push | ||
| 16 | #pragma clang diagnostic ignored "-Wimplicit-fallthrough" | ||
| 17 | #endif | ||
| 18 | #include <SDL.h> | ||
| 19 | #ifdef __clang__ | ||
| 20 | #pragma clang diagnostic pop | ||
| 21 | #endif | ||
| 22 | |||
| 23 | namespace AudioCore::Sink { | 14 | namespace AudioCore::Sink { |
| 24 | /** | 15 | /** |
| 25 | * SDL sink stream, responsible for sinking samples to hardware. | 16 | * SDL sink stream, responsible for sinking samples to hardware. |
diff --git a/src/common/address_space.inc b/src/common/address_space.inc index 2195dabd5..1ee82df53 100644 --- a/src/common/address_space.inc +++ b/src/common/address_space.inc | |||
| @@ -72,7 +72,7 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf | |||
| 72 | } | 72 | } |
| 73 | }()}; | 73 | }()}; |
| 74 | 74 | ||
| 75 | if (block_end_predecessor->virt >= virt) { | 75 | if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) { |
| 76 | // If this block's start would be overlapped by the map then reuse it as a tail | 76 | // If this block's start would be overlapped by the map then reuse it as a tail |
| 77 | // block | 77 | // block |
| 78 | block_end_predecessor->virt = virt_end; | 78 | block_end_predecessor->virt = virt_end; |
| @@ -336,7 +336,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) { | |||
| 336 | ASSERT_MSG(false, "Unexpected allocator state!"); | 336 | ASSERT_MSG(false, "Unexpected allocator state!"); |
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | auto search_predecessor{this->blocks.begin()}; | 339 | auto search_predecessor{std::next(this->blocks.begin())}; |
| 340 | auto search_successor{std::next(search_predecessor)}; | 340 | auto search_successor{std::next(search_predecessor)}; |
| 341 | 341 | ||
| 342 | while (search_successor != this->blocks.end() && | 342 | while (search_successor != this->blocks.end() && |
diff --git a/src/common/input.h b/src/common/input.h index 51b277c1f..66fb15f0a 100644 --- a/src/common/input.h +++ b/src/common/input.h | |||
| @@ -111,6 +111,8 @@ struct AnalogProperties { | |||
| 111 | float offset{}; | 111 | float offset{}; |
| 112 | // Invert direction of the sensor data | 112 | // Invert direction of the sensor data |
| 113 | bool inverted{}; | 113 | bool inverted{}; |
| 114 | // Invert the state if it's converted to a button | ||
| 115 | bool inverted_button{}; | ||
| 114 | // Press once to activate, press again to release | 116 | // Press once to activate, press again to release |
| 115 | bool toggle{}; | 117 | bool toggle{}; |
| 116 | }; | 118 | }; |
diff --git a/src/common/intrusive_list.h b/src/common/intrusive_list.h new file mode 100644 index 000000000..d330dc1c2 --- /dev/null +++ b/src/common/intrusive_list.h | |||
| @@ -0,0 +1,631 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/parent_of_member.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | // Forward declare implementation class for Node. | ||
| 12 | namespace impl { | ||
| 13 | |||
| 14 | class IntrusiveListImpl; | ||
| 15 | |||
| 16 | } | ||
| 17 | |||
| 18 | class IntrusiveListNode { | ||
| 19 | YUZU_NON_COPYABLE(IntrusiveListNode); | ||
| 20 | |||
| 21 | private: | ||
| 22 | friend class impl::IntrusiveListImpl; | ||
| 23 | |||
| 24 | IntrusiveListNode* m_prev; | ||
| 25 | IntrusiveListNode* m_next; | ||
| 26 | |||
| 27 | public: | ||
| 28 | constexpr IntrusiveListNode() : m_prev(this), m_next(this) {} | ||
| 29 | |||
| 30 | constexpr bool IsLinked() const { | ||
| 31 | return m_next != this; | ||
| 32 | } | ||
| 33 | |||
| 34 | private: | ||
| 35 | constexpr void LinkPrev(IntrusiveListNode* node) { | ||
| 36 | // We can't link an already linked node. | ||
| 37 | ASSERT(!node->IsLinked()); | ||
| 38 | this->SplicePrev(node, node); | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr void SplicePrev(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 42 | // Splice a range into the list. | ||
| 43 | auto last_prev = last->m_prev; | ||
| 44 | first->m_prev = m_prev; | ||
| 45 | last_prev->m_next = this; | ||
| 46 | m_prev->m_next = first; | ||
| 47 | m_prev = last_prev; | ||
| 48 | } | ||
| 49 | |||
| 50 | constexpr void LinkNext(IntrusiveListNode* node) { | ||
| 51 | // We can't link an already linked node. | ||
| 52 | ASSERT(!node->IsLinked()); | ||
| 53 | return this->SpliceNext(node, node); | ||
| 54 | } | ||
| 55 | |||
| 56 | constexpr void SpliceNext(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 57 | // Splice a range into the list. | ||
| 58 | auto last_prev = last->m_prev; | ||
| 59 | first->m_prev = this; | ||
| 60 | last_prev->m_next = m_next; | ||
| 61 | m_next->m_prev = last_prev; | ||
| 62 | m_next = first; | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr void Unlink() { | ||
| 66 | this->Unlink(m_next); | ||
| 67 | } | ||
| 68 | |||
| 69 | constexpr void Unlink(IntrusiveListNode* last) { | ||
| 70 | // Unlink a node from a next node. | ||
| 71 | auto last_prev = last->m_prev; | ||
| 72 | m_prev->m_next = last; | ||
| 73 | last->m_prev = m_prev; | ||
| 74 | last_prev->m_next = this; | ||
| 75 | m_prev = last_prev; | ||
| 76 | } | ||
| 77 | |||
| 78 | constexpr IntrusiveListNode* GetPrev() { | ||
| 79 | return m_prev; | ||
| 80 | } | ||
| 81 | |||
| 82 | constexpr const IntrusiveListNode* GetPrev() const { | ||
| 83 | return m_prev; | ||
| 84 | } | ||
| 85 | |||
| 86 | constexpr IntrusiveListNode* GetNext() { | ||
| 87 | return m_next; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr const IntrusiveListNode* GetNext() const { | ||
| 91 | return m_next; | ||
| 92 | } | ||
| 93 | }; | ||
| 94 | // DEPRECATED: static_assert(std::is_literal_type<IntrusiveListNode>::value); | ||
| 95 | |||
| 96 | namespace impl { | ||
| 97 | |||
| 98 | class IntrusiveListImpl { | ||
| 99 | YUZU_NON_COPYABLE(IntrusiveListImpl); | ||
| 100 | |||
| 101 | private: | ||
| 102 | IntrusiveListNode m_root_node; | ||
| 103 | |||
| 104 | public: | ||
| 105 | template <bool Const> | ||
| 106 | class Iterator; | ||
| 107 | |||
| 108 | using value_type = IntrusiveListNode; | ||
| 109 | using size_type = size_t; | ||
| 110 | using difference_type = ptrdiff_t; | ||
| 111 | using pointer = value_type*; | ||
| 112 | using const_pointer = const value_type*; | ||
| 113 | using reference = value_type&; | ||
| 114 | using const_reference = const value_type&; | ||
| 115 | using iterator = Iterator<false>; | ||
| 116 | using const_iterator = Iterator<true>; | ||
| 117 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 118 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 119 | |||
| 120 | template <bool Const> | ||
| 121 | class Iterator { | ||
| 122 | public: | ||
| 123 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 124 | using value_type = typename IntrusiveListImpl::value_type; | ||
| 125 | using difference_type = typename IntrusiveListImpl::difference_type; | ||
| 126 | using pointer = | ||
| 127 | std::conditional_t<Const, IntrusiveListImpl::const_pointer, IntrusiveListImpl::pointer>; | ||
| 128 | using reference = std::conditional_t<Const, IntrusiveListImpl::const_reference, | ||
| 129 | IntrusiveListImpl::reference>; | ||
| 130 | |||
| 131 | private: | ||
| 132 | pointer m_node; | ||
| 133 | |||
| 134 | public: | ||
| 135 | constexpr explicit Iterator(pointer n) : m_node(n) {} | ||
| 136 | |||
| 137 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 138 | return m_node == rhs.m_node; | ||
| 139 | } | ||
| 140 | |||
| 141 | constexpr pointer operator->() const { | ||
| 142 | return m_node; | ||
| 143 | } | ||
| 144 | |||
| 145 | constexpr reference operator*() const { | ||
| 146 | return *m_node; | ||
| 147 | } | ||
| 148 | |||
| 149 | constexpr Iterator& operator++() { | ||
| 150 | m_node = m_node->m_next; | ||
| 151 | return *this; | ||
| 152 | } | ||
| 153 | |||
| 154 | constexpr Iterator& operator--() { | ||
| 155 | m_node = m_node->m_prev; | ||
| 156 | return *this; | ||
| 157 | } | ||
| 158 | |||
| 159 | constexpr Iterator operator++(int) { | ||
| 160 | const Iterator it{*this}; | ||
| 161 | ++(*this); | ||
| 162 | return it; | ||
| 163 | } | ||
| 164 | |||
| 165 | constexpr Iterator operator--(int) { | ||
| 166 | const Iterator it{*this}; | ||
| 167 | --(*this); | ||
| 168 | return it; | ||
| 169 | } | ||
| 170 | |||
| 171 | constexpr operator Iterator<true>() const { | ||
| 172 | return Iterator<true>(m_node); | ||
| 173 | } | ||
| 174 | |||
| 175 | constexpr Iterator<false> GetNonConstIterator() const { | ||
| 176 | return Iterator<false>(const_cast<IntrusiveListImpl::pointer>(m_node)); | ||
| 177 | } | ||
| 178 | }; | ||
| 179 | |||
| 180 | public: | ||
| 181 | constexpr IntrusiveListImpl() : m_root_node() {} | ||
| 182 | |||
| 183 | // Iterator accessors. | ||
| 184 | constexpr iterator begin() { | ||
| 185 | return iterator(m_root_node.GetNext()); | ||
| 186 | } | ||
| 187 | |||
| 188 | constexpr const_iterator begin() const { | ||
| 189 | return const_iterator(m_root_node.GetNext()); | ||
| 190 | } | ||
| 191 | |||
| 192 | constexpr iterator end() { | ||
| 193 | return iterator(std::addressof(m_root_node)); | ||
| 194 | } | ||
| 195 | |||
| 196 | constexpr const_iterator end() const { | ||
| 197 | return const_iterator(std::addressof(m_root_node)); | ||
| 198 | } | ||
| 199 | |||
| 200 | constexpr iterator iterator_to(reference v) { | ||
| 201 | // Only allow iterator_to for values in lists. | ||
| 202 | ASSERT(v.IsLinked()); | ||
| 203 | return iterator(std::addressof(v)); | ||
| 204 | } | ||
| 205 | |||
| 206 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 207 | // Only allow iterator_to for values in lists. | ||
| 208 | ASSERT(v.IsLinked()); | ||
| 209 | return const_iterator(std::addressof(v)); | ||
| 210 | } | ||
| 211 | |||
| 212 | // Content management. | ||
| 213 | constexpr bool empty() const { | ||
| 214 | return !m_root_node.IsLinked(); | ||
| 215 | } | ||
| 216 | |||
| 217 | constexpr size_type size() const { | ||
| 218 | return static_cast<size_type>(std::distance(this->begin(), this->end())); | ||
| 219 | } | ||
| 220 | |||
| 221 | constexpr reference back() { | ||
| 222 | return *m_root_node.GetPrev(); | ||
| 223 | } | ||
| 224 | |||
| 225 | constexpr const_reference back() const { | ||
| 226 | return *m_root_node.GetPrev(); | ||
| 227 | } | ||
| 228 | |||
| 229 | constexpr reference front() { | ||
| 230 | return *m_root_node.GetNext(); | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr const_reference front() const { | ||
| 234 | return *m_root_node.GetNext(); | ||
| 235 | } | ||
| 236 | |||
| 237 | constexpr void push_back(reference node) { | ||
| 238 | m_root_node.LinkPrev(std::addressof(node)); | ||
| 239 | } | ||
| 240 | |||
| 241 | constexpr void push_front(reference node) { | ||
| 242 | m_root_node.LinkNext(std::addressof(node)); | ||
| 243 | } | ||
| 244 | |||
| 245 | constexpr void pop_back() { | ||
| 246 | m_root_node.GetPrev()->Unlink(); | ||
| 247 | } | ||
| 248 | |||
| 249 | constexpr void pop_front() { | ||
| 250 | m_root_node.GetNext()->Unlink(); | ||
| 251 | } | ||
| 252 | |||
| 253 | constexpr iterator insert(const_iterator pos, reference node) { | ||
| 254 | pos.GetNonConstIterator()->LinkPrev(std::addressof(node)); | ||
| 255 | return iterator(std::addressof(node)); | ||
| 256 | } | ||
| 257 | |||
| 258 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o) { | ||
| 259 | splice_impl(pos, o.begin(), o.end()); | ||
| 260 | } | ||
| 261 | |||
| 262 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first) { | ||
| 263 | const_iterator last(first); | ||
| 264 | std::advance(last, 1); | ||
| 265 | splice_impl(pos, first, last); | ||
| 266 | } | ||
| 267 | |||
| 268 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first, | ||
| 269 | const_iterator last) { | ||
| 270 | splice_impl(pos, first, last); | ||
| 271 | } | ||
| 272 | |||
| 273 | constexpr iterator erase(const_iterator pos) { | ||
| 274 | if (pos == this->end()) { | ||
| 275 | return this->end(); | ||
| 276 | } | ||
| 277 | iterator it(pos.GetNonConstIterator()); | ||
| 278 | (it++)->Unlink(); | ||
| 279 | return it; | ||
| 280 | } | ||
| 281 | |||
| 282 | constexpr void clear() { | ||
| 283 | while (!this->empty()) { | ||
| 284 | this->pop_front(); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | private: | ||
| 289 | constexpr void splice_impl(const_iterator _pos, const_iterator _first, const_iterator _last) { | ||
| 290 | if (_first == _last) { | ||
| 291 | return; | ||
| 292 | } | ||
| 293 | iterator pos(_pos.GetNonConstIterator()); | ||
| 294 | iterator first(_first.GetNonConstIterator()); | ||
| 295 | iterator last(_last.GetNonConstIterator()); | ||
| 296 | first->Unlink(std::addressof(*last)); | ||
| 297 | pos->SplicePrev(std::addressof(*first), std::addressof(*first)); | ||
| 298 | } | ||
| 299 | }; | ||
| 300 | |||
| 301 | } // namespace impl | ||
| 302 | |||
| 303 | template <class T, class Traits> | ||
| 304 | class IntrusiveList { | ||
| 305 | YUZU_NON_COPYABLE(IntrusiveList); | ||
| 306 | |||
| 307 | private: | ||
| 308 | impl::IntrusiveListImpl m_impl; | ||
| 309 | |||
| 310 | public: | ||
| 311 | template <bool Const> | ||
| 312 | class Iterator; | ||
| 313 | |||
| 314 | using value_type = T; | ||
| 315 | using size_type = size_t; | ||
| 316 | using difference_type = ptrdiff_t; | ||
| 317 | using pointer = value_type*; | ||
| 318 | using const_pointer = const value_type*; | ||
| 319 | using reference = value_type&; | ||
| 320 | using const_reference = const value_type&; | ||
| 321 | using iterator = Iterator<false>; | ||
| 322 | using const_iterator = Iterator<true>; | ||
| 323 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 324 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 325 | |||
| 326 | template <bool Const> | ||
| 327 | class Iterator { | ||
| 328 | public: | ||
| 329 | friend class Common::IntrusiveList<T, Traits>; | ||
| 330 | |||
| 331 | using ImplIterator = | ||
| 332 | std::conditional_t<Const, Common::impl::IntrusiveListImpl::const_iterator, | ||
| 333 | Common::impl::IntrusiveListImpl::iterator>; | ||
| 334 | |||
| 335 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 336 | using value_type = typename IntrusiveList::value_type; | ||
| 337 | using difference_type = typename IntrusiveList::difference_type; | ||
| 338 | using pointer = | ||
| 339 | std::conditional_t<Const, IntrusiveList::const_pointer, IntrusiveList::pointer>; | ||
| 340 | using reference = | ||
| 341 | std::conditional_t<Const, IntrusiveList::const_reference, IntrusiveList::reference>; | ||
| 342 | |||
| 343 | private: | ||
| 344 | ImplIterator m_iterator; | ||
| 345 | |||
| 346 | private: | ||
| 347 | constexpr explicit Iterator(ImplIterator it) : m_iterator(it) {} | ||
| 348 | |||
| 349 | constexpr ImplIterator GetImplIterator() const { | ||
| 350 | return m_iterator; | ||
| 351 | } | ||
| 352 | |||
| 353 | public: | ||
| 354 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 355 | return m_iterator == rhs.m_iterator; | ||
| 356 | } | ||
| 357 | |||
| 358 | constexpr pointer operator->() const { | ||
| 359 | return std::addressof(Traits::GetParent(*m_iterator)); | ||
| 360 | } | ||
| 361 | |||
| 362 | constexpr reference operator*() const { | ||
| 363 | return Traits::GetParent(*m_iterator); | ||
| 364 | } | ||
| 365 | |||
| 366 | constexpr Iterator& operator++() { | ||
| 367 | ++m_iterator; | ||
| 368 | return *this; | ||
| 369 | } | ||
| 370 | |||
| 371 | constexpr Iterator& operator--() { | ||
| 372 | --m_iterator; | ||
| 373 | return *this; | ||
| 374 | } | ||
| 375 | |||
| 376 | constexpr Iterator operator++(int) { | ||
| 377 | const Iterator it{*this}; | ||
| 378 | ++m_iterator; | ||
| 379 | return it; | ||
| 380 | } | ||
| 381 | |||
| 382 | constexpr Iterator operator--(int) { | ||
| 383 | const Iterator it{*this}; | ||
| 384 | --m_iterator; | ||
| 385 | return it; | ||
| 386 | } | ||
| 387 | |||
| 388 | constexpr operator Iterator<true>() const { | ||
| 389 | return Iterator<true>(m_iterator); | ||
| 390 | } | ||
| 391 | }; | ||
| 392 | |||
| 393 | private: | ||
| 394 | static constexpr IntrusiveListNode& GetNode(reference ref) { | ||
| 395 | return Traits::GetNode(ref); | ||
| 396 | } | ||
| 397 | |||
| 398 | static constexpr IntrusiveListNode const& GetNode(const_reference ref) { | ||
| 399 | return Traits::GetNode(ref); | ||
| 400 | } | ||
| 401 | |||
| 402 | static constexpr reference GetParent(IntrusiveListNode& node) { | ||
| 403 | return Traits::GetParent(node); | ||
| 404 | } | ||
| 405 | |||
| 406 | static constexpr const_reference GetParent(IntrusiveListNode const& node) { | ||
| 407 | return Traits::GetParent(node); | ||
| 408 | } | ||
| 409 | |||
| 410 | public: | ||
| 411 | constexpr IntrusiveList() : m_impl() {} | ||
| 412 | |||
| 413 | // Iterator accessors. | ||
| 414 | constexpr iterator begin() { | ||
| 415 | return iterator(m_impl.begin()); | ||
| 416 | } | ||
| 417 | |||
| 418 | constexpr const_iterator begin() const { | ||
| 419 | return const_iterator(m_impl.begin()); | ||
| 420 | } | ||
| 421 | |||
| 422 | constexpr iterator end() { | ||
| 423 | return iterator(m_impl.end()); | ||
| 424 | } | ||
| 425 | |||
| 426 | constexpr const_iterator end() const { | ||
| 427 | return const_iterator(m_impl.end()); | ||
| 428 | } | ||
| 429 | |||
| 430 | constexpr const_iterator cbegin() const { | ||
| 431 | return this->begin(); | ||
| 432 | } | ||
| 433 | |||
| 434 | constexpr const_iterator cend() const { | ||
| 435 | return this->end(); | ||
| 436 | } | ||
| 437 | |||
| 438 | constexpr reverse_iterator rbegin() { | ||
| 439 | return reverse_iterator(this->end()); | ||
| 440 | } | ||
| 441 | |||
| 442 | constexpr const_reverse_iterator rbegin() const { | ||
| 443 | return const_reverse_iterator(this->end()); | ||
| 444 | } | ||
| 445 | |||
| 446 | constexpr reverse_iterator rend() { | ||
| 447 | return reverse_iterator(this->begin()); | ||
| 448 | } | ||
| 449 | |||
| 450 | constexpr const_reverse_iterator rend() const { | ||
| 451 | return const_reverse_iterator(this->begin()); | ||
| 452 | } | ||
| 453 | |||
| 454 | constexpr const_reverse_iterator crbegin() const { | ||
| 455 | return this->rbegin(); | ||
| 456 | } | ||
| 457 | |||
| 458 | constexpr const_reverse_iterator crend() const { | ||
| 459 | return this->rend(); | ||
| 460 | } | ||
| 461 | |||
| 462 | constexpr iterator iterator_to(reference v) { | ||
| 463 | return iterator(m_impl.iterator_to(GetNode(v))); | ||
| 464 | } | ||
| 465 | |||
| 466 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 467 | return const_iterator(m_impl.iterator_to(GetNode(v))); | ||
| 468 | } | ||
| 469 | |||
| 470 | // Content management. | ||
| 471 | constexpr bool empty() const { | ||
| 472 | return m_impl.empty(); | ||
| 473 | } | ||
| 474 | |||
| 475 | constexpr size_type size() const { | ||
| 476 | return m_impl.size(); | ||
| 477 | } | ||
| 478 | |||
| 479 | constexpr reference back() { | ||
| 480 | return GetParent(m_impl.back()); | ||
| 481 | } | ||
| 482 | |||
| 483 | constexpr const_reference back() const { | ||
| 484 | return GetParent(m_impl.back()); | ||
| 485 | } | ||
| 486 | |||
| 487 | constexpr reference front() { | ||
| 488 | return GetParent(m_impl.front()); | ||
| 489 | } | ||
| 490 | |||
| 491 | constexpr const_reference front() const { | ||
| 492 | return GetParent(m_impl.front()); | ||
| 493 | } | ||
| 494 | |||
| 495 | constexpr void push_back(reference ref) { | ||
| 496 | m_impl.push_back(GetNode(ref)); | ||
| 497 | } | ||
| 498 | |||
| 499 | constexpr void push_front(reference ref) { | ||
| 500 | m_impl.push_front(GetNode(ref)); | ||
| 501 | } | ||
| 502 | |||
| 503 | constexpr void pop_back() { | ||
| 504 | m_impl.pop_back(); | ||
| 505 | } | ||
| 506 | |||
| 507 | constexpr void pop_front() { | ||
| 508 | m_impl.pop_front(); | ||
| 509 | } | ||
| 510 | |||
| 511 | constexpr iterator insert(const_iterator pos, reference ref) { | ||
| 512 | return iterator(m_impl.insert(pos.GetImplIterator(), GetNode(ref))); | ||
| 513 | } | ||
| 514 | |||
| 515 | constexpr void splice(const_iterator pos, IntrusiveList& o) { | ||
| 516 | m_impl.splice(pos.GetImplIterator(), o.m_impl); | ||
| 517 | } | ||
| 518 | |||
| 519 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first) { | ||
| 520 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator()); | ||
| 521 | } | ||
| 522 | |||
| 523 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first, | ||
| 524 | const_iterator last) { | ||
| 525 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator(), | ||
| 526 | last.GetImplIterator()); | ||
| 527 | } | ||
| 528 | |||
| 529 | constexpr iterator erase(const_iterator pos) { | ||
| 530 | return iterator(m_impl.erase(pos.GetImplIterator())); | ||
| 531 | } | ||
| 532 | |||
| 533 | constexpr void clear() { | ||
| 534 | m_impl.clear(); | ||
| 535 | } | ||
| 536 | }; | ||
| 537 | |||
| 538 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 539 | class IntrusiveListMemberTraits; | ||
| 540 | |||
| 541 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 542 | class IntrusiveListMemberTraits<Member, Derived> { | ||
| 543 | public: | ||
| 544 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 545 | |||
| 546 | private: | ||
| 547 | friend class IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 548 | |||
| 549 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 550 | return parent.*Member; | ||
| 551 | } | ||
| 552 | |||
| 553 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 554 | return parent.*Member; | ||
| 555 | } | ||
| 556 | |||
| 557 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 558 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 559 | } | ||
| 560 | |||
| 561 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 562 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 563 | } | ||
| 564 | }; | ||
| 565 | |||
| 566 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 567 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf; | ||
| 568 | |||
| 569 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 570 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf<Member, Derived> { | ||
| 571 | public: | ||
| 572 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 573 | |||
| 574 | private: | ||
| 575 | friend class IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 576 | |||
| 577 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 578 | return parent.*Member; | ||
| 579 | } | ||
| 580 | |||
| 581 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 582 | return parent.*Member; | ||
| 583 | } | ||
| 584 | |||
| 585 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 586 | return *reinterpret_cast<Derived*>(reinterpret_cast<char*>(std::addressof(node)) - | ||
| 587 | GetOffset()); | ||
| 588 | } | ||
| 589 | |||
| 590 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 591 | return *reinterpret_cast<const Derived*>( | ||
| 592 | reinterpret_cast<const char*>(std::addressof(node)) - GetOffset()); | ||
| 593 | } | ||
| 594 | |||
| 595 | static uintptr_t GetOffset() { | ||
| 596 | return reinterpret_cast<uintptr_t>(std::addressof(reinterpret_cast<Derived*>(0)->*Member)); | ||
| 597 | } | ||
| 598 | }; | ||
| 599 | |||
| 600 | template <class Derived> | ||
| 601 | class IntrusiveListBaseNode : public IntrusiveListNode {}; | ||
| 602 | |||
| 603 | template <class Derived> | ||
| 604 | class IntrusiveListBaseTraits { | ||
| 605 | public: | ||
| 606 | using ListType = IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 607 | |||
| 608 | private: | ||
| 609 | friend class IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 610 | |||
| 611 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 612 | return static_cast<IntrusiveListNode&>( | ||
| 613 | static_cast<IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 614 | } | ||
| 615 | |||
| 616 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 617 | return static_cast<const IntrusiveListNode&>( | ||
| 618 | static_cast<const IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 619 | } | ||
| 620 | |||
| 621 | static constexpr Derived& GetParent(IntrusiveListNode& node) { | ||
| 622 | return static_cast<Derived&>(static_cast<IntrusiveListBaseNode<Derived>&>(node)); | ||
| 623 | } | ||
| 624 | |||
| 625 | static constexpr Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 626 | return static_cast<const Derived&>( | ||
| 627 | static_cast<const IntrusiveListBaseNode<Derived>&>(node)); | ||
| 628 | } | ||
| 629 | }; | ||
| 630 | |||
| 631 | } // namespace Common | ||
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 84955030b..f1ee42ab2 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -45,6 +45,7 @@ void LogSettings() { | |||
| 45 | log_setting("System_LanguageIndex", values.language_index.GetValue()); | 45 | log_setting("System_LanguageIndex", values.language_index.GetValue()); |
| 46 | log_setting("System_RegionIndex", values.region_index.GetValue()); | 46 | log_setting("System_RegionIndex", values.region_index.GetValue()); |
| 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); | 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); |
| 48 | log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue()); | ||
| 48 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); | 49 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); |
| 49 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); | 50 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); |
| 50 | log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); | 51 | log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); |
| @@ -60,7 +61,7 @@ void LogSettings() { | |||
| 60 | log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); | 61 | log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); |
| 61 | log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); | 62 | log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); |
| 62 | log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); | 63 | log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); |
| 63 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); | 64 | log_setting("Renderer_UseVsync", values.vsync_mode.GetValue()); |
| 64 | log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); | 65 | log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); |
| 65 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); | 66 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); |
| 66 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); | 67 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); |
| @@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 191 | 192 | ||
| 192 | // Core | 193 | // Core |
| 193 | values.use_multi_core.SetGlobal(true); | 194 | values.use_multi_core.SetGlobal(true); |
| 194 | values.use_extended_memory_layout.SetGlobal(true); | 195 | values.use_unsafe_extended_memory_layout.SetGlobal(true); |
| 195 | 196 | ||
| 196 | // CPU | 197 | // CPU |
| 197 | values.cpu_accuracy.SetGlobal(true); | 198 | values.cpu_accuracy.SetGlobal(true); |
| @@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 205 | // Renderer | 206 | // Renderer |
| 206 | values.fsr_sharpening_slider.SetGlobal(true); | 207 | values.fsr_sharpening_slider.SetGlobal(true); |
| 207 | values.renderer_backend.SetGlobal(true); | 208 | values.renderer_backend.SetGlobal(true); |
| 209 | values.async_presentation.SetGlobal(true); | ||
| 208 | values.renderer_force_max_clock.SetGlobal(true); | 210 | values.renderer_force_max_clock.SetGlobal(true); |
| 209 | values.vulkan_device.SetGlobal(true); | 211 | values.vulkan_device.SetGlobal(true); |
| 210 | values.fullscreen_mode.SetGlobal(true); | 212 | values.fullscreen_mode.SetGlobal(true); |
| @@ -221,11 +223,9 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 221 | values.nvdec_emulation.SetGlobal(true); | 223 | values.nvdec_emulation.SetGlobal(true); |
| 222 | values.accelerate_astc.SetGlobal(true); | 224 | values.accelerate_astc.SetGlobal(true); |
| 223 | values.async_astc.SetGlobal(true); | 225 | values.async_astc.SetGlobal(true); |
| 224 | values.use_vsync.SetGlobal(true); | ||
| 225 | values.shader_backend.SetGlobal(true); | 226 | values.shader_backend.SetGlobal(true); |
| 226 | values.use_asynchronous_shaders.SetGlobal(true); | 227 | values.use_asynchronous_shaders.SetGlobal(true); |
| 227 | values.use_fast_gpu_time.SetGlobal(true); | 228 | values.use_fast_gpu_time.SetGlobal(true); |
| 228 | values.use_pessimistic_flushes.SetGlobal(true); | ||
| 229 | values.use_vulkan_driver_pipeline_cache.SetGlobal(true); | 229 | values.use_vulkan_driver_pipeline_cache.SetGlobal(true); |
| 230 | values.bg_red.SetGlobal(true); | 230 | values.bg_red.SetGlobal(true); |
| 231 | values.bg_green.SetGlobal(true); | 231 | values.bg_green.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index b77a1580a..2bf191cef 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -16,6 +16,13 @@ | |||
| 16 | 16 | ||
| 17 | namespace Settings { | 17 | namespace Settings { |
| 18 | 18 | ||
| 19 | enum class VSyncMode : u32 { | ||
| 20 | Immediate = 0, | ||
| 21 | Mailbox = 1, | ||
| 22 | FIFO = 2, | ||
| 23 | FIFORelaxed = 3, | ||
| 24 | }; | ||
| 25 | |||
| 19 | enum class RendererBackend : u32 { | 26 | enum class RendererBackend : u32 { |
| 20 | OpenGL = 0, | 27 | OpenGL = 0, |
| 21 | Vulkan = 1, | 28 | Vulkan = 1, |
| @@ -388,7 +395,8 @@ struct Values { | |||
| 388 | 395 | ||
| 389 | // Core | 396 | // Core |
| 390 | SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; | 397 | SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; |
| 391 | SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"}; | 398 | SwitchableSetting<bool> use_unsafe_extended_memory_layout{false, |
| 399 | "use_unsafe_extended_memory_layout"}; | ||
| 392 | 400 | ||
| 393 | // Cpu | 401 | // Cpu |
| 394 | SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, | 402 | SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, |
| @@ -422,6 +430,7 @@ struct Values { | |||
| 422 | // Renderer | 430 | // Renderer |
| 423 | SwitchableSetting<RendererBackend, true> renderer_backend{ | 431 | SwitchableSetting<RendererBackend, true> renderer_backend{ |
| 424 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; | 432 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; |
| 433 | SwitchableSetting<bool> async_presentation{false, "async_presentation"}; | ||
| 425 | SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; | 434 | SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; |
| 426 | Setting<bool> renderer_debug{false, "debug"}; | 435 | Setting<bool> renderer_debug{false, "debug"}; |
| 427 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; | 436 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; |
| @@ -454,12 +463,12 @@ struct Values { | |||
| 454 | SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; | 463 | SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; |
| 455 | SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"}; | 464 | SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"}; |
| 456 | SwitchableSetting<bool> async_astc{false, "async_astc"}; | 465 | SwitchableSetting<bool> async_astc{false, "async_astc"}; |
| 457 | SwitchableSetting<bool> use_vsync{true, "use_vsync"}; | 466 | Setting<VSyncMode, true> vsync_mode{VSyncMode::FIFO, VSyncMode::Immediate, |
| 467 | VSyncMode::FIFORelaxed, "use_vsync"}; | ||
| 458 | SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL, | 468 | SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL, |
| 459 | ShaderBackend::SPIRV, "shader_backend"}; | 469 | ShaderBackend::SPIRV, "shader_backend"}; |
| 460 | SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; | 470 | SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; |
| 461 | SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; | 471 | SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; |
| 462 | SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; | ||
| 463 | SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, | 472 | SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, |
| 464 | "use_vulkan_driver_pipeline_cache"}; | 473 | "use_vulkan_driver_pipeline_cache"}; |
| 465 | 474 | ||
diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 0e2095c45..b4885835d 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h | |||
| @@ -259,6 +259,20 @@ public: | |||
| 259 | return *this; | 259 | return *this; |
| 260 | } | 260 | } |
| 261 | 261 | ||
| 262 | void RotateFromOrigin(float roll, float pitch, float yaw) { | ||
| 263 | float temp = y; | ||
| 264 | y = std::cos(roll) * y - std::sin(roll) * z; | ||
| 265 | z = std::sin(roll) * temp + std::cos(roll) * z; | ||
| 266 | |||
| 267 | temp = x; | ||
| 268 | x = std::cos(pitch) * x + std::sin(pitch) * z; | ||
| 269 | z = -std::sin(pitch) * temp + std::cos(pitch) * z; | ||
| 270 | |||
| 271 | temp = x; | ||
| 272 | x = std::cos(yaw) * x - std::sin(yaw) * y; | ||
| 273 | y = std::sin(yaw) * temp + std::cos(yaw) * y; | ||
| 274 | } | ||
| 275 | |||
| 262 | [[nodiscard]] constexpr T Length2() const { | 276 | [[nodiscard]] constexpr T Length2() const { |
| 263 | return x * x + y * y + z * z; | 277 | return x * x + y * y + z * z; |
| 264 | } | 278 | } |
diff --git a/src/core/core.cpp b/src/core/core.cpp index caa6a77be..06fba4ce5 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -137,7 +137,7 @@ struct System::Impl { | |||
| 137 | device_memory = std::make_unique<Core::DeviceMemory>(); | 137 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| 138 | 138 | ||
| 139 | is_multicore = Settings::values.use_multi_core.GetValue(); | 139 | is_multicore = Settings::values.use_multi_core.GetValue(); |
| 140 | extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); | 140 | extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 141 | 141 | ||
| 142 | core_timing.SetMulticore(is_multicore); | 142 | core_timing.SetMulticore(is_multicore); |
| 143 | core_timing.Initialize([&system]() { system.RegisterHostThread(); }); | 143 | core_timing.Initialize([&system]() { system.RegisterHostThread(); }); |
| @@ -169,7 +169,7 @@ struct System::Impl { | |||
| 169 | void ReinitializeIfNecessary(System& system) { | 169 | void ReinitializeIfNecessary(System& system) { |
| 170 | const bool must_reinitialize = | 170 | const bool must_reinitialize = |
| 171 | is_multicore != Settings::values.use_multi_core.GetValue() || | 171 | is_multicore != Settings::values.use_multi_core.GetValue() || |
| 172 | extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue(); | 172 | extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 173 | 173 | ||
| 174 | if (!must_reinitialize) { | 174 | if (!must_reinitialize) { |
| 175 | return; | 175 | return; |
| @@ -178,7 +178,7 @@ struct System::Impl { | |||
| 178 | LOG_DEBUG(Kernel, "Re-initializing"); | 178 | LOG_DEBUG(Kernel, "Re-initializing"); |
| 179 | 179 | ||
| 180 | is_multicore = Settings::values.use_multi_core.GetValue(); | 180 | is_multicore = Settings::values.use_multi_core.GetValue(); |
| 181 | extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); | 181 | extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 182 | 182 | ||
| 183 | Initialize(system); | 183 | Initialize(system); |
| 184 | } | 184 | } |
| @@ -293,6 +293,7 @@ struct System::Impl { | |||
| 293 | ASSERT(Kernel::KProcess::Initialize(main_process, system, "main", | 293 | ASSERT(Kernel::KProcess::Initialize(main_process, system, "main", |
| 294 | Kernel::KProcess::ProcessType::Userland, resource_limit) | 294 | Kernel::KProcess::ProcessType::Userland, resource_limit) |
| 295 | .IsSuccess()); | 295 | .IsSuccess()); |
| 296 | Kernel::KProcess::Register(system.Kernel(), main_process); | ||
| 296 | kernel.MakeApplicationProcess(main_process); | 297 | kernel.MakeApplicationProcess(main_process); |
| 297 | const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); | 298 | const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); |
| 298 | if (load_result != Loader::ResultStatus::Success) { | 299 | if (load_result != Loader::ResultStatus::Success) { |
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index a29c9a6f8..ecab85893 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp | |||
| @@ -280,6 +280,10 @@ void EmulatedController::LoadVirtualGamepadParams() { | |||
| 280 | virtual_stick_params[Settings::NativeAnalog::LStick].Set("axis_y", 1); | 280 | virtual_stick_params[Settings::NativeAnalog::LStick].Set("axis_y", 1); |
| 281 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_x", 2); | 281 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_x", 2); |
| 282 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_y", 3); | 282 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("axis_y", 3); |
| 283 | virtual_stick_params[Settings::NativeAnalog::LStick].Set("deadzone", 0.0f); | ||
| 284 | virtual_stick_params[Settings::NativeAnalog::LStick].Set("range", 1.0f); | ||
| 285 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("deadzone", 0.0f); | ||
| 286 | virtual_stick_params[Settings::NativeAnalog::RStick].Set("range", 1.0f); | ||
| 283 | } | 287 | } |
| 284 | 288 | ||
| 285 | void EmulatedController::ReloadInput() { | 289 | void EmulatedController::ReloadInput() { |
| @@ -372,6 +376,7 @@ void EmulatedController::ReloadInput() { | |||
| 372 | motion.accel = emulated_motion.GetAcceleration(); | 376 | motion.accel = emulated_motion.GetAcceleration(); |
| 373 | motion.gyro = emulated_motion.GetGyroscope(); | 377 | motion.gyro = emulated_motion.GetGyroscope(); |
| 374 | motion.rotation = emulated_motion.GetRotations(); | 378 | motion.rotation = emulated_motion.GetRotations(); |
| 379 | motion.euler = emulated_motion.GetEulerAngles(); | ||
| 375 | motion.orientation = emulated_motion.GetOrientation(); | 380 | motion.orientation = emulated_motion.GetOrientation(); |
| 376 | motion.is_at_rest = !emulated_motion.IsMoving(motion_sensitivity); | 381 | motion.is_at_rest = !emulated_motion.IsMoving(motion_sensitivity); |
| 377 | } | 382 | } |
| @@ -547,6 +552,8 @@ void EmulatedController::EnableSystemButtons() { | |||
| 547 | void EmulatedController::DisableSystemButtons() { | 552 | void EmulatedController::DisableSystemButtons() { |
| 548 | std::scoped_lock lock{mutex}; | 553 | std::scoped_lock lock{mutex}; |
| 549 | system_buttons_enabled = false; | 554 | system_buttons_enabled = false; |
| 555 | controller.home_button_state.raw = 0; | ||
| 556 | controller.capture_button_state.raw = 0; | ||
| 550 | } | 557 | } |
| 551 | 558 | ||
| 552 | void EmulatedController::ResetSystemButtons() { | 559 | void EmulatedController::ResetSystemButtons() { |
| @@ -730,6 +737,8 @@ void EmulatedController::SetButton(const Common::Input::CallbackStatus& callback | |||
| 730 | if (is_configuring) { | 737 | if (is_configuring) { |
| 731 | controller.npad_button_state.raw = NpadButton::None; | 738 | controller.npad_button_state.raw = NpadButton::None; |
| 732 | controller.debug_pad_button_state.raw = 0; | 739 | controller.debug_pad_button_state.raw = 0; |
| 740 | controller.home_button_state.raw = 0; | ||
| 741 | controller.capture_button_state.raw = 0; | ||
| 733 | lock.unlock(); | 742 | lock.unlock(); |
| 734 | TriggerOnChange(ControllerTriggerType::Button, false); | 743 | TriggerOnChange(ControllerTriggerType::Button, false); |
| 735 | return; | 744 | return; |
| @@ -972,14 +981,11 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback | |||
| 972 | emulated.UpdateOrientation(raw_status.delta_timestamp); | 981 | emulated.UpdateOrientation(raw_status.delta_timestamp); |
| 973 | force_update_motion = raw_status.force_update; | 982 | force_update_motion = raw_status.force_update; |
| 974 | 983 | ||
| 975 | if (is_configuring) { | ||
| 976 | return; | ||
| 977 | } | ||
| 978 | |||
| 979 | auto& motion = controller.motion_state[index]; | 984 | auto& motion = controller.motion_state[index]; |
| 980 | motion.accel = emulated.GetAcceleration(); | 985 | motion.accel = emulated.GetAcceleration(); |
| 981 | motion.gyro = emulated.GetGyroscope(); | 986 | motion.gyro = emulated.GetGyroscope(); |
| 982 | motion.rotation = emulated.GetRotations(); | 987 | motion.rotation = emulated.GetRotations(); |
| 988 | motion.euler = emulated.GetEulerAngles(); | ||
| 983 | motion.orientation = emulated.GetOrientation(); | 989 | motion.orientation = emulated.GetOrientation(); |
| 984 | motion.is_at_rest = !emulated.IsMoving(motion_sensitivity); | 990 | motion.is_at_rest = !emulated.IsMoving(motion_sensitivity); |
| 985 | } | 991 | } |
diff --git a/src/core/hid/emulated_controller.h b/src/core/hid/emulated_controller.h index 429655355..6e01f4e12 100644 --- a/src/core/hid/emulated_controller.h +++ b/src/core/hid/emulated_controller.h | |||
| @@ -106,6 +106,7 @@ struct ControllerMotion { | |||
| 106 | Common::Vec3f accel{}; | 106 | Common::Vec3f accel{}; |
| 107 | Common::Vec3f gyro{}; | 107 | Common::Vec3f gyro{}; |
| 108 | Common::Vec3f rotation{}; | 108 | Common::Vec3f rotation{}; |
| 109 | Common::Vec3f euler{}; | ||
| 109 | std::array<Common::Vec3f, 3> orientation{}; | 110 | std::array<Common::Vec3f, 3> orientation{}; |
| 110 | bool is_at_rest{}; | 111 | bool is_at_rest{}; |
| 111 | }; | 112 | }; |
diff --git a/src/core/hid/input_converter.cpp b/src/core/hid/input_converter.cpp index 7cee39a53..53b00b1f9 100644 --- a/src/core/hid/input_converter.cpp +++ b/src/core/hid/input_converter.cpp | |||
| @@ -54,6 +54,7 @@ Common::Input::ButtonStatus TransformToButton(const Common::Input::CallbackStatu | |||
| 54 | case Common::Input::InputType::Analog: | 54 | case Common::Input::InputType::Analog: |
| 55 | status.value = TransformToTrigger(callback).pressed.value; | 55 | status.value = TransformToTrigger(callback).pressed.value; |
| 56 | status.toggle = callback.analog_status.properties.toggle; | 56 | status.toggle = callback.analog_status.properties.toggle; |
| 57 | status.inverted = callback.analog_status.properties.inverted_button; | ||
| 57 | break; | 58 | break; |
| 58 | case Common::Input::InputType::Trigger: | 59 | case Common::Input::InputType::Trigger: |
| 59 | status.value = TransformToTrigger(callback).pressed.value; | 60 | status.value = TransformToTrigger(callback).pressed.value; |
| @@ -61,6 +62,9 @@ Common::Input::ButtonStatus TransformToButton(const Common::Input::CallbackStatu | |||
| 61 | case Common::Input::InputType::Button: | 62 | case Common::Input::InputType::Button: |
| 62 | status = callback.button_status; | 63 | status = callback.button_status; |
| 63 | break; | 64 | break; |
| 65 | case Common::Input::InputType::Motion: | ||
| 66 | status.value = std::abs(callback.motion_status.gyro.x.raw_value) > 1.0f; | ||
| 67 | break; | ||
| 64 | default: | 68 | default: |
| 65 | LOG_ERROR(Input, "Conversion from type {} to button not implemented", callback.type); | 69 | LOG_ERROR(Input, "Conversion from type {} to button not implemented", callback.type); |
| 66 | break; | 70 | break; |
| @@ -226,6 +230,10 @@ Common::Input::TriggerStatus TransformToTrigger(const Common::Input::CallbackSta | |||
| 226 | status = callback.trigger_status; | 230 | status = callback.trigger_status; |
| 227 | calculate_button_value = false; | 231 | calculate_button_value = false; |
| 228 | break; | 232 | break; |
| 233 | case Common::Input::InputType::Motion: | ||
| 234 | status.analog.properties.range = 1.0f; | ||
| 235 | raw_value = callback.motion_status.accel.x.raw_value; | ||
| 236 | break; | ||
| 229 | default: | 237 | default: |
| 230 | LOG_ERROR(Input, "Conversion from type {} to trigger not implemented", callback.type); | 238 | LOG_ERROR(Input, "Conversion from type {} to trigger not implemented", callback.type); |
| 231 | break; | 239 | break; |
diff --git a/src/core/hid/motion_input.cpp b/src/core/hid/motion_input.cpp index 0dd66c1cc..b60478dbb 100644 --- a/src/core/hid/motion_input.cpp +++ b/src/core/hid/motion_input.cpp | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <cmath> | ||
| 5 | |||
| 4 | #include "common/math_util.h" | 6 | #include "common/math_util.h" |
| 5 | #include "core/hid/motion_input.h" | 7 | #include "core/hid/motion_input.h" |
| 6 | 8 | ||
| @@ -51,6 +53,20 @@ void MotionInput::SetQuaternion(const Common::Quaternion<f32>& quaternion) { | |||
| 51 | quat = quaternion; | 53 | quat = quaternion; |
| 52 | } | 54 | } |
| 53 | 55 | ||
| 56 | void MotionInput::SetEulerAngles(const Common::Vec3f& euler_angles) { | ||
| 57 | const float cr = std::cos(euler_angles.x * 0.5f); | ||
| 58 | const float sr = std::sin(euler_angles.x * 0.5f); | ||
| 59 | const float cp = std::cos(euler_angles.y * 0.5f); | ||
| 60 | const float sp = std::sin(euler_angles.y * 0.5f); | ||
| 61 | const float cy = std::cos(euler_angles.z * 0.5f); | ||
| 62 | const float sy = std::sin(euler_angles.z * 0.5f); | ||
| 63 | |||
| 64 | quat.w = cr * cp * cy + sr * sp * sy; | ||
| 65 | quat.xyz.x = sr * cp * cy - cr * sp * sy; | ||
| 66 | quat.xyz.y = cr * sp * cy + sr * cp * sy; | ||
| 67 | quat.xyz.z = cr * cp * sy - sr * sp * cy; | ||
| 68 | } | ||
| 69 | |||
| 54 | void MotionInput::SetGyroBias(const Common::Vec3f& bias) { | 70 | void MotionInput::SetGyroBias(const Common::Vec3f& bias) { |
| 55 | gyro_bias = bias; | 71 | gyro_bias = bias; |
| 56 | } | 72 | } |
| @@ -222,6 +238,26 @@ Common::Vec3f MotionInput::GetRotations() const { | |||
| 222 | return rotations; | 238 | return rotations; |
| 223 | } | 239 | } |
| 224 | 240 | ||
| 241 | Common::Vec3f MotionInput::GetEulerAngles() const { | ||
| 242 | // roll (x-axis rotation) | ||
| 243 | const float sinr_cosp = 2 * (quat.w * quat.xyz.x + quat.xyz.y * quat.xyz.z); | ||
| 244 | const float cosr_cosp = 1 - 2 * (quat.xyz.x * quat.xyz.x + quat.xyz.y * quat.xyz.y); | ||
| 245 | |||
| 246 | // pitch (y-axis rotation) | ||
| 247 | const float sinp = std::sqrt(1 + 2 * (quat.w * quat.xyz.y - quat.xyz.x * quat.xyz.z)); | ||
| 248 | const float cosp = std::sqrt(1 - 2 * (quat.w * quat.xyz.y - quat.xyz.x * quat.xyz.z)); | ||
| 249 | |||
| 250 | // yaw (z-axis rotation) | ||
| 251 | const float siny_cosp = 2 * (quat.w * quat.xyz.z + quat.xyz.x * quat.xyz.y); | ||
| 252 | const float cosy_cosp = 1 - 2 * (quat.xyz.y * quat.xyz.y + quat.xyz.z * quat.xyz.z); | ||
| 253 | |||
| 254 | return { | ||
| 255 | std::atan2(sinr_cosp, cosr_cosp), | ||
| 256 | 2 * std::atan2(sinp, cosp) - Common::PI / 2, | ||
| 257 | std::atan2(siny_cosp, cosy_cosp), | ||
| 258 | }; | ||
| 259 | } | ||
| 260 | |||
| 225 | void MotionInput::ResetOrientation() { | 261 | void MotionInput::ResetOrientation() { |
| 226 | if (!reset_enabled || only_accelerometer) { | 262 | if (!reset_enabled || only_accelerometer) { |
| 227 | return; | 263 | return; |
diff --git a/src/core/hid/motion_input.h b/src/core/hid/motion_input.h index 9f3fc1cf7..482719359 100644 --- a/src/core/hid/motion_input.h +++ b/src/core/hid/motion_input.h | |||
| @@ -35,6 +35,7 @@ public: | |||
| 35 | void SetAcceleration(const Common::Vec3f& acceleration); | 35 | void SetAcceleration(const Common::Vec3f& acceleration); |
| 36 | void SetGyroscope(const Common::Vec3f& gyroscope); | 36 | void SetGyroscope(const Common::Vec3f& gyroscope); |
| 37 | void SetQuaternion(const Common::Quaternion<f32>& quaternion); | 37 | void SetQuaternion(const Common::Quaternion<f32>& quaternion); |
| 38 | void SetEulerAngles(const Common::Vec3f& euler_angles); | ||
| 38 | void SetGyroBias(const Common::Vec3f& bias); | 39 | void SetGyroBias(const Common::Vec3f& bias); |
| 39 | void SetGyroThreshold(f32 threshold); | 40 | void SetGyroThreshold(f32 threshold); |
| 40 | 41 | ||
| @@ -54,6 +55,7 @@ public: | |||
| 54 | [[nodiscard]] Common::Vec3f GetGyroBias() const; | 55 | [[nodiscard]] Common::Vec3f GetGyroBias() const; |
| 55 | [[nodiscard]] Common::Vec3f GetRotations() const; | 56 | [[nodiscard]] Common::Vec3f GetRotations() const; |
| 56 | [[nodiscard]] Common::Quaternion<f32> GetQuaternion() const; | 57 | [[nodiscard]] Common::Quaternion<f32> GetQuaternion() const; |
| 58 | [[nodiscard]] Common::Vec3f GetEulerAngles() const; | ||
| 57 | 59 | ||
| 58 | [[nodiscard]] bool IsMoving(f32 sensitivity) const; | 60 | [[nodiscard]] bool IsMoving(f32 sensitivity) const; |
| 59 | [[nodiscard]] bool IsCalibrated(f32 sensitivity) const; | 61 | [[nodiscard]] bool IsCalibrated(f32 sensitivity) const; |
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 36d0d20d2..49bdc671e 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp | |||
| @@ -35,12 +35,13 @@ namespace { | |||
| 35 | using namespace Common::Literals; | 35 | using namespace Common::Literals; |
| 36 | 36 | ||
| 37 | u32 GetMemorySizeForInit() { | 37 | u32 GetMemorySizeForInit() { |
| 38 | return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB; | 38 | return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB |
| 39 | : Smc::MemorySize_4GB; | ||
| 39 | } | 40 | } |
| 40 | 41 | ||
| 41 | Smc::MemoryArrangement GetMemoryArrangeForInit() { | 42 | Smc::MemoryArrangement GetMemoryArrangeForInit() { |
| 42 | return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB | 43 | return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB |
| 43 | : Smc::MemoryArrangement_4GB; | 44 | : Smc::MemoryArrangement_4GB; |
| 44 | } | 45 | } |
| 45 | } // namespace | 46 | } // namespace |
| 46 | 47 | ||
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index 9b71fe371..f384b1568 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h | |||
| @@ -182,8 +182,8 @@ public: | |||
| 182 | explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {} | 182 | explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {} |
| 183 | 183 | ||
| 184 | static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) { | 184 | static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) { |
| 185 | const u64 lid = lhs.GetId(); | 185 | const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs)); |
| 186 | const u64 rid = rhs.GetId(); | 186 | const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs)); |
| 187 | 187 | ||
| 188 | if (lid < rid) { | 188 | if (lid < rid) { |
| 189 | return -1; | 189 | return -1; |
diff --git a/src/core/hle/kernel/k_event_info.h b/src/core/hle/kernel/k_event_info.h index 25b3ff594..eacfa5dc6 100644 --- a/src/core/hle/kernel/k_event_info.h +++ b/src/core/hle/kernel/k_event_info.h | |||
| @@ -5,14 +5,15 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include <boost/intrusive/list.hpp> | 8 | #include "common/intrusive_list.h" |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/slab_helpers.h" | 10 | #include "core/hle/kernel/slab_helpers.h" |
| 11 | #include "core/hle/kernel/svc_types.h" | 11 | #include "core/hle/kernel/svc_types.h" |
| 12 | 12 | ||
| 13 | namespace Kernel { | 13 | namespace Kernel { |
| 14 | 14 | ||
| 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, public boost::intrusive::list_base_hook<> { | 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, |
| 16 | public Common::IntrusiveListBaseNode<KEventInfo> { | ||
| 16 | public: | 17 | public: |
| 17 | struct InfoCreateThread { | 18 | struct InfoCreateThread { |
| 18 | u32 thread_id{}; | 19 | u32 thread_id{}; |
diff --git a/src/core/hle/kernel/k_object_name.h b/src/core/hle/kernel/k_object_name.h index 2d97fc777..a8876fe37 100644 --- a/src/core/hle/kernel/k_object_name.h +++ b/src/core/hle/kernel/k_object_name.h | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <boost/intrusive/list.hpp> | 8 | |
| 9 | #include "common/intrusive_list.h" | ||
| 9 | 10 | ||
| 10 | #include "core/hle/kernel/k_light_lock.h" | 11 | #include "core/hle/kernel/k_light_lock.h" |
| 11 | #include "core/hle/kernel/slab_helpers.h" | 12 | #include "core/hle/kernel/slab_helpers.h" |
| @@ -15,13 +16,14 @@ namespace Kernel { | |||
| 15 | 16 | ||
| 16 | class KObjectNameGlobalData; | 17 | class KObjectNameGlobalData; |
| 17 | 18 | ||
| 18 | class KObjectName : public KSlabAllocated<KObjectName>, public boost::intrusive::list_base_hook<> { | 19 | class KObjectName : public KSlabAllocated<KObjectName>, |
| 20 | public Common::IntrusiveListBaseNode<KObjectName> { | ||
| 19 | public: | 21 | public: |
| 20 | explicit KObjectName(KernelCore&) {} | 22 | explicit KObjectName(KernelCore&) {} |
| 21 | virtual ~KObjectName() = default; | 23 | virtual ~KObjectName() = default; |
| 22 | 24 | ||
| 23 | static constexpr size_t NameLengthMax = 12; | 25 | static constexpr size_t NameLengthMax = 12; |
| 24 | using List = boost::intrusive::list<KObjectName>; | 26 | using List = Common::IntrusiveListBaseTraits<KObjectName>::ListType; |
| 25 | 27 | ||
| 26 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); | 28 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); |
| 27 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); | 29 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); |
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h index 21c040e62..625280290 100644 --- a/src/core/hle/kernel/k_server_port.h +++ b/src/core/hle/kernel/k_server_port.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | 9 | ||
| 10 | #include <boost/intrusive/list.hpp> | 10 | #include "common/intrusive_list.h" |
| 11 | 11 | ||
| 12 | #include "core/hle/kernel/k_server_session.h" | 12 | #include "core/hle/kernel/k_server_session.h" |
| 13 | #include "core/hle/kernel/k_synchronization_object.h" | 13 | #include "core/hle/kernel/k_synchronization_object.h" |
| @@ -42,7 +42,7 @@ public: | |||
| 42 | bool IsSignaled() const override; | 42 | bool IsSignaled() const override; |
| 43 | 43 | ||
| 44 | private: | 44 | private: |
| 45 | using SessionList = boost::intrusive::list<KServerSession>; | 45 | using SessionList = Common::IntrusiveListBaseTraits<KServerSession>::ListType; |
| 46 | 46 | ||
| 47 | void CleanupSessions(); | 47 | void CleanupSessions(); |
| 48 | 48 | ||
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h index 5ee02f556..403891919 100644 --- a/src/core/hle/kernel/k_server_session.h +++ b/src/core/hle/kernel/k_server_session.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | 10 | ||
| 11 | #include <boost/intrusive/list.hpp> | 11 | #include "common/intrusive_list.h" |
| 12 | 12 | ||
| 13 | #include "core/hle/kernel/k_light_lock.h" | 13 | #include "core/hle/kernel/k_light_lock.h" |
| 14 | #include "core/hle/kernel/k_session_request.h" | 14 | #include "core/hle/kernel/k_session_request.h" |
| @@ -27,7 +27,7 @@ class KSession; | |||
| 27 | class KThread; | 27 | class KThread; |
| 28 | 28 | ||
| 29 | class KServerSession final : public KSynchronizationObject, | 29 | class KServerSession final : public KSynchronizationObject, |
| 30 | public boost::intrusive::list_base_hook<> { | 30 | public Common::IntrusiveListBaseNode<KServerSession> { |
| 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); | 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); |
| 32 | 32 | ||
| 33 | friend class ServiceThread; | 33 | friend class ServiceThread; |
| @@ -67,7 +67,8 @@ private: | |||
| 67 | KSession* m_parent{}; | 67 | KSession* m_parent{}; |
| 68 | 68 | ||
| 69 | /// List of threads which are pending a reply. | 69 | /// List of threads which are pending a reply. |
| 70 | boost::intrusive::list<KSessionRequest> m_request_list{}; | 70 | using RequestList = Common::IntrusiveListBaseTraits<KSessionRequest>::ListType; |
| 71 | RequestList m_request_list{}; | ||
| 71 | KSessionRequest* m_current_request{}; | 72 | KSessionRequest* m_current_request{}; |
| 72 | 73 | ||
| 73 | KLightLock m_lock; | 74 | KLightLock m_lock; |
diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h index b5f04907b..283669e0a 100644 --- a/src/core/hle/kernel/k_session_request.h +++ b/src/core/hle/kernel/k_session_request.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/intrusive_list.h" | ||
| 9 | |||
| 8 | #include "core/hle/kernel/k_auto_object.h" | 10 | #include "core/hle/kernel/k_auto_object.h" |
| 9 | #include "core/hle/kernel/k_event.h" | 11 | #include "core/hle/kernel/k_event.h" |
| 10 | #include "core/hle/kernel/k_memory_block.h" | 12 | #include "core/hle/kernel/k_memory_block.h" |
| @@ -16,7 +18,7 @@ namespace Kernel { | |||
| 16 | 18 | ||
| 17 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, | 19 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, |
| 18 | public KAutoObject, | 20 | public KAutoObject, |
| 19 | public boost::intrusive::list_base_hook<> { | 21 | public Common::IntrusiveListBaseNode<KSessionRequest> { |
| 20 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); | 22 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); |
| 21 | 23 | ||
| 22 | public: | 24 | public: |
diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h index 75b73ba39..2d8ff20d6 100644 --- a/src/core/hle/kernel/k_shared_memory_info.h +++ b/src/core/hle/kernel/k_shared_memory_info.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <boost/intrusive/list.hpp> | 6 | #include "common/intrusive_list.h" |
| 7 | 7 | ||
| 8 | #include "core/hle/kernel/slab_helpers.h" | 8 | #include "core/hle/kernel/slab_helpers.h" |
| 9 | 9 | ||
| @@ -12,7 +12,7 @@ namespace Kernel { | |||
| 12 | class KSharedMemory; | 12 | class KSharedMemory; |
| 13 | 13 | ||
| 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, | 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, |
| 15 | public boost::intrusive::list_base_hook<> { | 15 | public Common::IntrusiveListBaseNode<KSharedMemoryInfo> { |
| 16 | 16 | ||
| 17 | public: | 17 | public: |
| 18 | explicit KSharedMemoryInfo(KernelCore&) {} | 18 | explicit KSharedMemoryInfo(KernelCore&) {} |
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index 9c1a41128..f9814ac8f 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/intrusive/list.hpp> | 15 | #include "common/intrusive_list.h" |
| 16 | 16 | ||
| 17 | #include "common/intrusive_red_black_tree.h" | 17 | #include "common/intrusive_red_black_tree.h" |
| 18 | #include "common/spin_lock.h" | 18 | #include "common/spin_lock.h" |
| @@ -119,7 +119,7 @@ s32 GetCurrentCoreId(KernelCore& kernel); | |||
| 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); | 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); |
| 120 | 120 | ||
| 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, | 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, |
| 122 | public boost::intrusive::list_base_hook<>, | 122 | public Common::IntrusiveListBaseNode<KThread>, |
| 123 | public KTimerTask { | 123 | public KTimerTask { |
| 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); | 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); |
| 125 | 125 | ||
| @@ -138,7 +138,7 @@ public: | |||
| 138 | public: | 138 | public: |
| 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; | 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; |
| 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; | 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; |
| 141 | using WaiterList = boost::intrusive::list<KThread>; | 141 | using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType; |
| 142 | 142 | ||
| 143 | /** | 143 | /** |
| 144 | * Gets the thread's current priority | 144 | * Gets the thread's current priority |
| @@ -750,8 +750,9 @@ private: | |||
| 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; | 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; |
| 751 | 751 | ||
| 752 | public: | 752 | public: |
| 753 | class LockWithPriorityInheritanceInfo : public KSlabAllocated<LockWithPriorityInheritanceInfo>, | 753 | class LockWithPriorityInheritanceInfo |
| 754 | public boost::intrusive::list_base_hook<> { | 754 | : public KSlabAllocated<LockWithPriorityInheritanceInfo>, |
| 755 | public Common::IntrusiveListBaseNode<LockWithPriorityInheritanceInfo> { | ||
| 755 | public: | 756 | public: |
| 756 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} | 757 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} |
| 757 | 758 | ||
| @@ -839,7 +840,7 @@ public: | |||
| 839 | 840 | ||
| 840 | private: | 841 | private: |
| 841 | using LockWithPriorityInheritanceInfoList = | 842 | using LockWithPriorityInheritanceInfoList = |
| 842 | boost::intrusive::list<LockWithPriorityInheritanceInfo>; | 843 | Common::IntrusiveListBaseTraits<LockWithPriorityInheritanceInfo>::ListType; |
| 843 | 844 | ||
| 844 | ConditionVariableThreadTree* m_condvar_tree{}; | 845 | ConditionVariableThreadTree* m_condvar_tree{}; |
| 845 | u64 m_condvar_key{}; | 846 | u64 m_condvar_key{}; |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4f3366c9d..f33600ca5 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -95,7 +95,7 @@ struct KernelCore::Impl { | |||
| 95 | pt_heap_region.GetSize()); | 95 | pt_heap_region.GetSize()); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | InitializeHackSharedMemory(); | 98 | InitializeHackSharedMemory(kernel); |
| 99 | RegisterHostThread(nullptr); | 99 | RegisterHostThread(nullptr); |
| 100 | } | 100 | } |
| 101 | 101 | ||
| @@ -216,10 +216,12 @@ struct KernelCore::Impl { | |||
| 216 | auto* main_thread{Kernel::KThread::Create(system.Kernel())}; | 216 | auto* main_thread{Kernel::KThread::Create(system.Kernel())}; |
| 217 | main_thread->SetCurrentCore(core); | 217 | main_thread->SetCurrentCore(core); |
| 218 | ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess()); | 218 | ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess()); |
| 219 | KThread::Register(system.Kernel(), main_thread); | ||
| 219 | 220 | ||
| 220 | auto* idle_thread{Kernel::KThread::Create(system.Kernel())}; | 221 | auto* idle_thread{Kernel::KThread::Create(system.Kernel())}; |
| 221 | idle_thread->SetCurrentCore(core); | 222 | idle_thread->SetCurrentCore(core); |
| 222 | ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess()); | 223 | ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess()); |
| 224 | KThread::Register(system.Kernel(), idle_thread); | ||
| 223 | 225 | ||
| 224 | schedulers[i]->Initialize(main_thread, idle_thread, core); | 226 | schedulers[i]->Initialize(main_thread, idle_thread, core); |
| 225 | } | 227 | } |
| @@ -230,6 +232,7 @@ struct KernelCore::Impl { | |||
| 230 | const Core::Timing::CoreTiming& core_timing) { | 232 | const Core::Timing::CoreTiming& core_timing) { |
| 231 | system_resource_limit = KResourceLimit::Create(system.Kernel()); | 233 | system_resource_limit = KResourceLimit::Create(system.Kernel()); |
| 232 | system_resource_limit->Initialize(&core_timing); | 234 | system_resource_limit->Initialize(&core_timing); |
| 235 | KResourceLimit::Register(kernel, system_resource_limit); | ||
| 233 | 236 | ||
| 234 | const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()}; | 237 | const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()}; |
| 235 | const auto total_size{sizes.first}; | 238 | const auto total_size{sizes.first}; |
| @@ -355,6 +358,7 @@ struct KernelCore::Impl { | |||
| 355 | ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {}, | 358 | ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {}, |
| 356 | core_id) | 359 | core_id) |
| 357 | .IsSuccess()); | 360 | .IsSuccess()); |
| 361 | KThread::Register(system.Kernel(), shutdown_threads[core_id]); | ||
| 358 | } | 362 | } |
| 359 | } | 363 | } |
| 360 | 364 | ||
| @@ -729,7 +733,7 @@ struct KernelCore::Impl { | |||
| 729 | memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); | 733 | memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); |
| 730 | } | 734 | } |
| 731 | 735 | ||
| 732 | void InitializeHackSharedMemory() { | 736 | void InitializeHackSharedMemory(KernelCore& kernel) { |
| 733 | // Setup memory regions for emulated processes | 737 | // Setup memory regions for emulated processes |
| 734 | // TODO(bunnei): These should not be hardcoded regions initialized within the kernel | 738 | // TODO(bunnei): These should not be hardcoded regions initialized within the kernel |
| 735 | constexpr std::size_t hid_size{0x40000}; | 739 | constexpr std::size_t hid_size{0x40000}; |
| @@ -746,14 +750,23 @@ struct KernelCore::Impl { | |||
| 746 | 750 | ||
| 747 | hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, | 751 | hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, |
| 748 | Svc::MemoryPermission::Read, hid_size); | 752 | Svc::MemoryPermission::Read, hid_size); |
| 753 | KSharedMemory::Register(kernel, hid_shared_mem); | ||
| 754 | |||
| 749 | font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, | 755 | font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, |
| 750 | Svc::MemoryPermission::Read, font_size); | 756 | Svc::MemoryPermission::Read, font_size); |
| 757 | KSharedMemory::Register(kernel, font_shared_mem); | ||
| 758 | |||
| 751 | irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, | 759 | irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, |
| 752 | Svc::MemoryPermission::Read, irs_size); | 760 | Svc::MemoryPermission::Read, irs_size); |
| 761 | KSharedMemory::Register(kernel, irs_shared_mem); | ||
| 762 | |||
| 753 | time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, | 763 | time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, |
| 754 | Svc::MemoryPermission::Read, time_size); | 764 | Svc::MemoryPermission::Read, time_size); |
| 765 | KSharedMemory::Register(kernel, time_shared_mem); | ||
| 766 | |||
| 755 | hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, | 767 | hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, |
| 756 | Svc::MemoryPermission::Read, hidbus_size); | 768 | Svc::MemoryPermission::Read, hidbus_size); |
| 769 | KSharedMemory::Register(kernel, hidbus_shared_mem); | ||
| 757 | } | 770 | } |
| 758 | 771 | ||
| 759 | std::mutex registered_objects_lock; | 772 | std::mutex registered_objects_lock; |
| @@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process, | |||
| 1072 | // Commit the thread reservation. | 1085 | // Commit the thread reservation. |
| 1073 | thread_reservation.Commit(); | 1086 | thread_reservation.Commit(); |
| 1074 | 1087 | ||
| 1088 | // Register the thread. | ||
| 1089 | KThread::Register(kernel, thread); | ||
| 1090 | |||
| 1075 | return std::jthread( | 1091 | return std::jthread( |
| 1076 | [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] { | 1092 | [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] { |
| 1077 | // Set the thread name. | 1093 | // Set the thread name. |
| 1078 | Common::SetCurrentThreadName(thread_name.c_str()); | 1094 | Common::SetCurrentThreadName(thread_name.c_str()); |
| 1079 | 1095 | ||
| 1080 | // Register the thread. | 1096 | // Set the thread as current. |
| 1081 | kernel.RegisterHostThread(thread); | 1097 | kernel.RegisterHostThread(thread); |
| 1082 | 1098 | ||
| 1083 | // Run the callback. | 1099 | // Run the callback. |
| @@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name, | |||
| 1099 | // Ensure that we don't hold onto any extra references. | 1115 | // Ensure that we don't hold onto any extra references. |
| 1100 | SCOPE_EXIT({ process->Close(); }); | 1116 | SCOPE_EXIT({ process->Close(); }); |
| 1101 | 1117 | ||
| 1118 | // Register the new process. | ||
| 1119 | KProcess::Register(*this, process); | ||
| 1120 | |||
| 1102 | // Run the host thread. | 1121 | // Run the host thread. |
| 1103 | return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func)); | 1122 | return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func)); |
| 1104 | } | 1123 | } |
| @@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function | |||
| 1124 | // Ensure that we don't hold onto any extra references. | 1143 | // Ensure that we don't hold onto any extra references. |
| 1125 | SCOPE_EXIT({ process->Close(); }); | 1144 | SCOPE_EXIT({ process->Close(); }); |
| 1126 | 1145 | ||
| 1146 | // Register the new process. | ||
| 1147 | KProcess::Register(*this, process); | ||
| 1148 | |||
| 1127 | // Reserve a new thread from the process resource limit. | 1149 | // Reserve a new thread from the process resource limit. |
| 1128 | KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax); | 1150 | KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax); |
| 1129 | ASSERT(thread_reservation.Succeeded()); | 1151 | ASSERT(thread_reservation.Succeeded()); |
| @@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function | |||
| 1136 | // Commit the thread reservation. | 1158 | // Commit the thread reservation. |
| 1137 | thread_reservation.Commit(); | 1159 | thread_reservation.Commit(); |
| 1138 | 1160 | ||
| 1161 | // Register the new thread. | ||
| 1162 | KThread::Register(*this, thread); | ||
| 1163 | |||
| 1139 | // Begin running the thread. | 1164 | // Begin running the thread. |
| 1140 | ASSERT(R_SUCCEEDED(thread->Run())); | 1165 | ASSERT(R_SUCCEEDED(thread->Run())); |
| 1141 | } | 1166 | } |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index a17c46121..e59de844c 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -1807,7 +1807,7 @@ void IApplicationFunctions::GetFriendInvitationStorageChannelEvent(HLERequestCon | |||
| 1807 | } | 1807 | } |
| 1808 | 1808 | ||
| 1809 | void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(HLERequestContext& ctx) { | 1809 | void IApplicationFunctions::TryPopFromFriendInvitationStorageChannel(HLERequestContext& ctx) { |
| 1810 | LOG_WARNING(Service_AM, "(STUBBED) called"); | 1810 | LOG_DEBUG(Service_AM, "(STUBBED) called"); |
| 1811 | 1811 | ||
| 1812 | IPC::ResponseBuilder rb{ctx, 2}; | 1812 | IPC::ResponseBuilder rb{ctx, 2}; |
| 1813 | rb.Push(AM::ResultNoDataInChannel); | 1813 | rb.Push(AM::ResultNoDataInChannel); |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 23b8be993..3e62fa4fc 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -49,12 +49,6 @@ public: | |||
| 49 | }; | 49 | }; |
| 50 | // clang-format on | 50 | // clang-format on |
| 51 | RegisterHandlers(functions); | 51 | RegisterHandlers(functions); |
| 52 | |||
| 53 | if (impl->GetSystem() | ||
| 54 | .Initialize(device_name, in_params, handle, applet_resource_user_id) | ||
| 55 | .IsError()) { | ||
| 56 | LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!"); | ||
| 57 | } | ||
| 58 | } | 52 | } |
| 59 | 53 | ||
| 60 | ~IAudioOut() override { | 54 | ~IAudioOut() override { |
| @@ -287,6 +281,14 @@ void AudOutU::OpenAudioOut(HLERequestContext& ctx) { | |||
| 287 | 281 | ||
| 288 | auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name, | 282 | auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name, |
| 289 | in_params, handle, applet_resource_user_id); | 283 | in_params, handle, applet_resource_user_id); |
| 284 | result = audio_out->GetImpl()->GetSystem().Initialize(device_name, in_params, handle, | ||
| 285 | applet_resource_user_id); | ||
| 286 | if (result.IsError()) { | ||
| 287 | LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!"); | ||
| 288 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 289 | rb.Push(result); | ||
| 290 | return; | ||
| 291 | } | ||
| 290 | 292 | ||
| 291 | impl->sessions[new_session_id] = audio_out->GetImpl(); | 293 | impl->sessions[new_session_id] = audio_out->GetImpl(); |
| 292 | impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id; | 294 | impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id; |
diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h index e4cb4e1f2..0e222362e 100644 --- a/src/core/hle/service/ipc_helpers.h +++ b/src/core/hle/service/ipc_helpers.h | |||
| @@ -156,6 +156,7 @@ public: | |||
| 156 | 156 | ||
| 157 | auto* session = Kernel::KSession::Create(kernel); | 157 | auto* session = Kernel::KSession::Create(kernel); |
| 158 | session->Initialize(nullptr, 0); | 158 | session->Initialize(nullptr, 0); |
| 159 | Kernel::KSession::Register(kernel, session); | ||
| 159 | 160 | ||
| 160 | auto next_manager = std::make_shared<Service::SessionRequestManager>( | 161 | auto next_manager = std::make_shared<Service::SessionRequestManager>( |
| 161 | kernel, manager->GetServerManager()); | 162 | kernel, manager->GetServerManager()); |
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp index a39ce5212..6a313a03b 100644 --- a/src/core/hle/service/kernel_helpers.cpp +++ b/src/core/hle/service/kernel_helpers.cpp | |||
| @@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_) | |||
| 25 | Kernel::KProcess::ProcessType::KernelInternal, | 25 | Kernel::KProcess::ProcessType::KernelInternal, |
| 26 | kernel.GetSystemResourceLimit()) | 26 | kernel.GetSystemResourceLimit()) |
| 27 | .IsSuccess()); | 27 | .IsSuccess()); |
| 28 | |||
| 29 | // Register the process. | ||
| 30 | Kernel::KProcess::Register(kernel, process); | ||
| 28 | process_created = true; | 31 | process_created = true; |
| 29 | } | 32 | } |
| 30 | 33 | ||
diff --git a/src/core/hle/service/mutex.cpp b/src/core/hle/service/mutex.cpp index 07589a0f0..b0ff71d1b 100644 --- a/src/core/hle/service/mutex.cpp +++ b/src/core/hle/service/mutex.cpp | |||
| @@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) { | |||
| 12 | m_event = Kernel::KEvent::Create(system.Kernel()); | 12 | m_event = Kernel::KEvent::Create(system.Kernel()); |
| 13 | m_event->Initialize(nullptr); | 13 | m_event->Initialize(nullptr); |
| 14 | 14 | ||
| 15 | // Register the event. | ||
| 16 | Kernel::KEvent::Register(system.Kernel(), m_event); | ||
| 17 | |||
| 15 | ASSERT(R_SUCCEEDED(m_event->Signal())); | 18 | ASSERT(R_SUCCEEDED(m_event->Signal())); |
| 16 | } | 19 | } |
| 17 | 20 | ||
diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp index 6b4a1291e..156bc27d8 100644 --- a/src/core/hle/service/server_manager.cpp +++ b/src/core/hle/service/server_manager.cpp | |||
| @@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m | |||
| 33 | // Initialize event. | 33 | // Initialize event. |
| 34 | m_event = Kernel::KEvent::Create(system.Kernel()); | 34 | m_event = Kernel::KEvent::Create(system.Kernel()); |
| 35 | m_event->Initialize(nullptr); | 35 | m_event->Initialize(nullptr); |
| 36 | |||
| 37 | // Register event. | ||
| 38 | Kernel::KEvent::Register(system.Kernel(), m_event); | ||
| 36 | } | 39 | } |
| 37 | 40 | ||
| 38 | ServerManager::~ServerManager() { | 41 | ServerManager::~ServerManager() { |
| @@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) { | |||
| 160 | // Initialize the event. | 163 | // Initialize the event. |
| 161 | m_deferral_event->Initialize(nullptr); | 164 | m_deferral_event->Initialize(nullptr); |
| 162 | 165 | ||
| 166 | // Register the event. | ||
| 167 | Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event); | ||
| 168 | |||
| 163 | // Set the output. | 169 | // Set the output. |
| 164 | *out_event = m_deferral_event; | 170 | *out_event = m_deferral_event; |
| 165 | 171 | ||
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index c45be5726..1608fa24c 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp | |||
| @@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions, | |||
| 64 | auto* port = Kernel::KPort::Create(kernel); | 64 | auto* port = Kernel::KPort::Create(kernel); |
| 65 | port->Initialize(ServerSessionCountMax, false, 0); | 65 | port->Initialize(ServerSessionCountMax, false, 0); |
| 66 | 66 | ||
| 67 | // Register the port. | ||
| 68 | Kernel::KPort::Register(kernel, port); | ||
| 69 | |||
| 67 | service_ports.emplace(name, port); | 70 | service_ports.emplace(name, port); |
| 68 | registered_services.emplace(name, handler); | 71 | registered_services.emplace(name, handler); |
| 69 | if (deferral_event) { | 72 | if (deferral_event) { |
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp index 419c1df2b..7dce28fe0 100644 --- a/src/core/hle/service/sm/sm_controller.cpp +++ b/src/core/hle/service/sm/sm_controller.cpp | |||
| @@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) { | |||
| 49 | // Commit the session reservation. | 49 | // Commit the session reservation. |
| 50 | session_reservation.Commit(); | 50 | session_reservation.Commit(); |
| 51 | 51 | ||
| 52 | // Register the session. | ||
| 53 | Kernel::KSession::Register(system.Kernel(), session); | ||
| 54 | |||
| 52 | // Register with server manager. | 55 | // Register with server manager. |
| 53 | session_manager->GetServerManager().RegisterSession(&session->GetServerSession(), | 56 | session_manager->GetServerManager().RegisterSession(&session->GetServerSession(), |
| 54 | session_manager); | 57 | session_manager); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 432310632..a9667463f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -462,7 +462,7 @@ struct Memory::Impl { | |||
| 462 | } | 462 | } |
| 463 | 463 | ||
| 464 | if (Settings::IsFastmemEnabled()) { | 464 | if (Settings::IsFastmemEnabled()) { |
| 465 | const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; | 465 | const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; |
| 466 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | 466 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); |
| 467 | } | 467 | } |
| 468 | 468 | ||
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 9178b00ca..7a2f3c90a 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -85,6 +85,20 @@ static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) { | |||
| 85 | return "Unknown"; | 85 | return "Unknown"; |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | static constexpr const char* TranslateVSyncMode(Settings::VSyncMode mode) { | ||
| 89 | switch (mode) { | ||
| 90 | case Settings::VSyncMode::Immediate: | ||
| 91 | return "Immediate"; | ||
| 92 | case Settings::VSyncMode::Mailbox: | ||
| 93 | return "Mailbox"; | ||
| 94 | case Settings::VSyncMode::FIFO: | ||
| 95 | return "FIFO"; | ||
| 96 | case Settings::VSyncMode::FIFORelaxed: | ||
| 97 | return "FIFO Relaxed"; | ||
| 98 | } | ||
| 99 | return "Unknown"; | ||
| 100 | } | ||
| 101 | |||
| 88 | u64 GetTelemetryId() { | 102 | u64 GetTelemetryId() { |
| 89 | u64 telemetry_id{}; | 103 | u64 telemetry_id{}; |
| 90 | const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; | 104 | const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; |
| @@ -241,7 +255,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, | |||
| 241 | AddField(field_type, "Renderer_NvdecEmulation", | 255 | AddField(field_type, "Renderer_NvdecEmulation", |
| 242 | TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); | 256 | TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); |
| 243 | AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); | 257 | AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); |
| 244 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); | 258 | AddField(field_type, "Renderer_UseVsync", |
| 259 | TranslateVSyncMode(Settings::values.vsync_mode.GetValue())); | ||
| 245 | AddField(field_type, "Renderer_ShaderBackend", | 260 | AddField(field_type, "Renderer_ShaderBackend", |
| 246 | static_cast<u32>(Settings::values.shader_backend.GetValue())); | 261 | static_cast<u32>(Settings::values.shader_backend.GetValue())); |
| 247 | AddField(field_type, "Renderer_UseAsynchronousShaders", | 262 | AddField(field_type, "Renderer_UseAsynchronousShaders", |
diff --git a/src/dedicated_room/yuzu_room.cpp b/src/dedicated_room/yuzu_room.cpp index 359891883..d707dabe2 100644 --- a/src/dedicated_room/yuzu_room.cpp +++ b/src/dedicated_room/yuzu_room.cpp | |||
| @@ -49,6 +49,7 @@ static void PrintHelp(const char* argv0) { | |||
| 49 | " [options] <filename>\n" | 49 | " [options] <filename>\n" |
| 50 | "--room-name The name of the room\n" | 50 | "--room-name The name of the room\n" |
| 51 | "--room-description The room description\n" | 51 | "--room-description The room description\n" |
| 52 | "--bind-address The bind address for the room\n" | ||
| 52 | "--port The port used for the room\n" | 53 | "--port The port used for the room\n" |
| 53 | "--max_members The maximum number of players for this room\n" | 54 | "--max_members The maximum number of players for this room\n" |
| 54 | "--password The password for the room\n" | 55 | "--password The password for the room\n" |
| @@ -195,6 +196,7 @@ int main(int argc, char** argv) { | |||
| 195 | std::string web_api_url; | 196 | std::string web_api_url; |
| 196 | std::string ban_list_file; | 197 | std::string ban_list_file; |
| 197 | std::string log_file = "yuzu-room.log"; | 198 | std::string log_file = "yuzu-room.log"; |
| 199 | std::string bind_address; | ||
| 198 | u64 preferred_game_id = 0; | 200 | u64 preferred_game_id = 0; |
| 199 | u32 port = Network::DefaultRoomPort; | 201 | u32 port = Network::DefaultRoomPort; |
| 200 | u32 max_members = 16; | 202 | u32 max_members = 16; |
| @@ -203,6 +205,7 @@ int main(int argc, char** argv) { | |||
| 203 | static struct option long_options[] = { | 205 | static struct option long_options[] = { |
| 204 | {"room-name", required_argument, 0, 'n'}, | 206 | {"room-name", required_argument, 0, 'n'}, |
| 205 | {"room-description", required_argument, 0, 'd'}, | 207 | {"room-description", required_argument, 0, 'd'}, |
| 208 | {"bind-address", required_argument, 0, 's'}, | ||
| 206 | {"port", required_argument, 0, 'p'}, | 209 | {"port", required_argument, 0, 'p'}, |
| 207 | {"max_members", required_argument, 0, 'm'}, | 210 | {"max_members", required_argument, 0, 'm'}, |
| 208 | {"password", required_argument, 0, 'w'}, | 211 | {"password", required_argument, 0, 'w'}, |
| @@ -222,7 +225,8 @@ int main(int argc, char** argv) { | |||
| 222 | InitializeLogging(log_file); | 225 | InitializeLogging(log_file); |
| 223 | 226 | ||
| 224 | while (optind < argc) { | 227 | while (optind < argc) { |
| 225 | int arg = getopt_long(argc, argv, "n:d:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index); | 228 | int arg = |
| 229 | getopt_long(argc, argv, "n:d:s:p:m:w:g:u:t:a:i:l:hv", long_options, &option_index); | ||
| 226 | if (arg != -1) { | 230 | if (arg != -1) { |
| 227 | switch (static_cast<char>(arg)) { | 231 | switch (static_cast<char>(arg)) { |
| 228 | case 'n': | 232 | case 'n': |
| @@ -231,6 +235,9 @@ int main(int argc, char** argv) { | |||
| 231 | case 'd': | 235 | case 'd': |
| 232 | room_description.assign(optarg); | 236 | room_description.assign(optarg); |
| 233 | break; | 237 | break; |
| 238 | case 's': | ||
| 239 | bind_address.assign(optarg); | ||
| 240 | break; | ||
| 234 | case 'p': | 241 | case 'p': |
| 235 | port = strtoul(optarg, &endarg, 0); | 242 | port = strtoul(optarg, &endarg, 0); |
| 236 | break; | 243 | break; |
| @@ -295,6 +302,9 @@ int main(int argc, char** argv) { | |||
| 295 | PrintHelp(argv[0]); | 302 | PrintHelp(argv[0]); |
| 296 | return -1; | 303 | return -1; |
| 297 | } | 304 | } |
| 305 | if (bind_address.empty()) { | ||
| 306 | LOG_INFO(Network, "Bind address is empty: defaulting to 0.0.0.0"); | ||
| 307 | } | ||
| 298 | if (port > UINT16_MAX) { | 308 | if (port > UINT16_MAX) { |
| 299 | LOG_ERROR(Network, "Port needs to be in the range 0 - 65535!"); | 309 | LOG_ERROR(Network, "Port needs to be in the range 0 - 65535!"); |
| 300 | PrintHelp(argv[0]); | 310 | PrintHelp(argv[0]); |
| @@ -358,8 +368,8 @@ int main(int argc, char** argv) { | |||
| 358 | if (auto room = network.GetRoom().lock()) { | 368 | if (auto room = network.GetRoom().lock()) { |
| 359 | AnnounceMultiplayerRoom::GameInfo preferred_game_info{.name = preferred_game, | 369 | AnnounceMultiplayerRoom::GameInfo preferred_game_info{.name = preferred_game, |
| 360 | .id = preferred_game_id}; | 370 | .id = preferred_game_id}; |
| 361 | if (!room->Create(room_name, room_description, "", port, password, max_members, username, | 371 | if (!room->Create(room_name, room_description, bind_address, port, password, max_members, |
| 362 | preferred_game_info, std::move(verify_backend), ban_list, | 372 | username, preferred_game_info, std::move(verify_backend), ban_list, |
| 363 | enable_yuzu_mods)) { | 373 | enable_yuzu_mods)) { |
| 364 | LOG_INFO(Network, "Failed to create room: "); | 374 | LOG_INFO(Network, "Failed to create room: "); |
| 365 | return -1; | 375 | return -1; |
diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp index 9361b00c5..8c2ee4eb3 100644 --- a/src/input_common/input_mapping.cpp +++ b/src/input_common/input_mapping.cpp | |||
| @@ -82,6 +82,9 @@ void MappingFactory::RegisterButton(const MappingData& data) { | |||
| 82 | new_input.Set("axis", data.index); | 82 | new_input.Set("axis", data.index); |
| 83 | new_input.Set("threshold", 0.5f); | 83 | new_input.Set("threshold", 0.5f); |
| 84 | break; | 84 | break; |
| 85 | case EngineInputType::Motion: | ||
| 86 | new_input.Set("motion", data.index); | ||
| 87 | break; | ||
| 85 | default: | 88 | default: |
| 86 | return; | 89 | return; |
| 87 | } | 90 | } |
diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp index 8c6a6521a..5c2c4a463 100644 --- a/src/input_common/input_poller.cpp +++ b/src/input_common/input_poller.cpp | |||
| @@ -939,6 +939,7 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateAnalogDevice( | |||
| 939 | .threshold = std::clamp(params.Get("threshold", 0.5f), 0.0f, 1.0f), | 939 | .threshold = std::clamp(params.Get("threshold", 0.5f), 0.0f, 1.0f), |
| 940 | .offset = std::clamp(params.Get("offset", 0.0f), -1.0f, 1.0f), | 940 | .offset = std::clamp(params.Get("offset", 0.0f), -1.0f, 1.0f), |
| 941 | .inverted = params.Get("invert", "+") == "-", | 941 | .inverted = params.Get("invert", "+") == "-", |
| 942 | .inverted_button = params.Get("inverted", false) != 0, | ||
| 942 | .toggle = params.Get("toggle", false) != 0, | 943 | .toggle = params.Get("toggle", false) != 0, |
| 943 | }; | 944 | }; |
| 944 | input_engine->PreSetController(identifier); | 945 | input_engine->PreSetController(identifier); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0cd87a48f..fee510f7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | |||
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { | 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { |
| 476 | ctx.OpStore(ctx.sample_mask, value); | 476 | const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)}; |
| 477 | ctx.OpStore(pointer, value); | ||
| 477 | } | 478 | } |
| 478 | 479 | ||
| 479 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | 480 | void EmitSetFragDepth(EmitContext& ctx, Id value) { |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d48d4860e..47739794f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); | 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); |
| 1573 | } | 1573 | } |
| 1574 | if (info.stores_sample_mask) { | 1574 | if (info.stores_sample_mask) { |
| 1575 | sample_mask = DefineOutput(*this, U32[1], std::nullopt); | 1575 | const Id array_type{TypeArray(U32[1], Const(1U))}; |
| 1576 | sample_mask = DefineOutput(*this, array_type, std::nullopt); | ||
| 1576 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); | 1577 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); |
| 1577 | } | 1578 | } |
| 1578 | break; | 1579 | break; |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 39b774c98..1e158f375 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -15,7 +15,7 @@ add_executable(tests | |||
| 15 | core/core_timing.cpp | 15 | core/core_timing.cpp |
| 16 | core/internal_network/network.cpp | 16 | core/internal_network/network.cpp |
| 17 | precompiled_headers.h | 17 | precompiled_headers.h |
| 18 | video_core/buffer_base.cpp | 18 | video_core/memory_tracker.cpp |
| 19 | input_common/calibration_configuration_job.cpp | 19 | input_common/calibration_configuration_job.cpp |
| 20 | ) | 20 | ) |
| 21 | 21 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp deleted file mode 100644 index 734dbf4b6..000000000 --- a/src/tests/video_core/buffer_base.cpp +++ /dev/null | |||
| @@ -1,549 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include <catch2/catch_test_macros.hpp> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | using VideoCommon::BufferBase; | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | |||
| 20 | constexpr VAddr c = 0x1328914000; | ||
| 21 | |||
| 22 | class RasterizerInterface { | ||
| 23 | public: | ||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 25 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 26 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 27 | Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 29 | int& value = page_table[page]; | ||
| 30 | value += delta; | ||
| 31 | if (value < 0) { | ||
| 32 | throw std::logic_error{"negative page"}; | ||
| 33 | } | ||
| 34 | if (value == 0) { | ||
| 35 | page_table.erase(page); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 41 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 42 | return it == page_table.end() ? 0 : it->second; | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 46 | unsigned count = 0; | ||
| 47 | for (const auto& [index, value] : page_table) { | ||
| 48 | count += value; | ||
| 49 | } | ||
| 50 | return count; | ||
| 51 | } | ||
| 52 | |||
| 53 | private: | ||
| 54 | std::unordered_map<u64, int> page_table; | ||
| 55 | }; | ||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | TEST_CASE("BufferBase: Small buffer", "[video_core]") { | ||
| 59 | RasterizerInterface rasterizer; | ||
| 60 | BufferBase buffer(rasterizer, c, WORD); | ||
| 61 | REQUIRE(rasterizer.Count() == 0); | ||
| 62 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 63 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 64 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 65 | |||
| 66 | buffer.MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 67 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2}); | ||
| 68 | } | ||
| 69 | |||
| 70 | TEST_CASE("BufferBase: Large buffer", "[video_core]") { | ||
| 71 | RasterizerInterface rasterizer; | ||
| 72 | BufferBase buffer(rasterizer, c, WORD * 32); | ||
| 73 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 74 | buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 75 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2}); | ||
| 76 | REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8}); | ||
| 77 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE}); | ||
| 78 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE}); | ||
| 79 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 80 | Range{WORD * 3 + PAGE * 63, WORD * 4}); | ||
| 81 | |||
| 82 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 83 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 84 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 85 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9}); | ||
| 86 | |||
| 87 | buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 88 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 89 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7}); | ||
| 90 | |||
| 91 | buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 92 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32}); | ||
| 93 | |||
| 94 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 95 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 96 | |||
| 97 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 98 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 99 | } | ||
| 100 | |||
| 101 | TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") { | ||
| 102 | RasterizerInterface rasterizer; | ||
| 103 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 104 | REQUIRE(rasterizer.Count() == 0); | ||
| 105 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 106 | REQUIRE(rasterizer.Count() == 1); | ||
| 107 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 108 | REQUIRE(rasterizer.Count() == 0); | ||
| 109 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 110 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 111 | REQUIRE(rasterizer.Count() == 2); | ||
| 112 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 113 | REQUIRE(rasterizer.Count() == 0); | ||
| 114 | } | ||
| 115 | |||
| 116 | TEST_CASE("BufferBase: Basic range", "[video_core]") { | ||
| 117 | RasterizerInterface rasterizer; | ||
| 118 | BufferBase buffer(rasterizer, c, WORD); | ||
| 119 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 120 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 121 | int num = 0; | ||
| 122 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 123 | REQUIRE(offset == 0U); | ||
| 124 | REQUIRE(size == PAGE); | ||
| 125 | ++num; | ||
| 126 | }); | ||
| 127 | REQUIRE(num == 1U); | ||
| 128 | } | ||
| 129 | |||
| 130 | TEST_CASE("BufferBase: Border upload", "[video_core]") { | ||
| 131 | RasterizerInterface rasterizer; | ||
| 132 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 133 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 134 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 135 | buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 136 | REQUIRE(offset == WORD - PAGE); | ||
| 137 | REQUIRE(size == PAGE * 2); | ||
| 138 | }); | ||
| 139 | } | ||
| 140 | |||
| 141 | TEST_CASE("BufferBase: Border upload range", "[video_core]") { | ||
| 142 | RasterizerInterface rasterizer; | ||
| 143 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 144 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 145 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 146 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 147 | REQUIRE(offset == WORD - PAGE); | ||
| 148 | REQUIRE(size == PAGE * 2); | ||
| 149 | }); | ||
| 150 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 151 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 152 | REQUIRE(offset == WORD - PAGE); | ||
| 153 | REQUIRE(size == PAGE); | ||
| 154 | }); | ||
| 155 | buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 156 | REQUIRE(offset == WORD); | ||
| 157 | REQUIRE(size == PAGE); | ||
| 158 | }); | ||
| 159 | } | ||
| 160 | |||
| 161 | TEST_CASE("BufferBase: Border upload partial range", "[video_core]") { | ||
| 162 | RasterizerInterface rasterizer; | ||
| 163 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 164 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 165 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 166 | buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 167 | REQUIRE(offset == WORD - PAGE); | ||
| 168 | REQUIRE(size == PAGE * 2); | ||
| 169 | }); | ||
| 170 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 171 | buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 172 | REQUIRE(offset == WORD - PAGE); | ||
| 173 | REQUIRE(size == PAGE); | ||
| 174 | }); | ||
| 175 | buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 176 | REQUIRE(offset == WORD); | ||
| 177 | REQUIRE(size == PAGE); | ||
| 178 | }); | ||
| 179 | } | ||
| 180 | |||
| 181 | TEST_CASE("BufferBase: Partial word uploads", "[video_core]") { | ||
| 182 | RasterizerInterface rasterizer; | ||
| 183 | BufferBase buffer(rasterizer, c, 0x9d000); | ||
| 184 | int num = 0; | ||
| 185 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 186 | REQUIRE(offset == 0U); | ||
| 187 | REQUIRE(size == WORD); | ||
| 188 | ++num; | ||
| 189 | }); | ||
| 190 | REQUIRE(num == 1); | ||
| 191 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 192 | REQUIRE(offset == WORD); | ||
| 193 | REQUIRE(size == WORD); | ||
| 194 | ++num; | ||
| 195 | }); | ||
| 196 | REQUIRE(num == 2); | ||
| 197 | buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 198 | REQUIRE(offset == WORD * 2); | ||
| 199 | REQUIRE(size == PAGE * 0x1d); | ||
| 200 | ++num; | ||
| 201 | }); | ||
| 202 | REQUIRE(num == 3); | ||
| 203 | } | ||
| 204 | |||
| 205 | TEST_CASE("BufferBase: Partial page upload", "[video_core]") { | ||
| 206 | RasterizerInterface rasterizer; | ||
| 207 | BufferBase buffer(rasterizer, c, WORD); | ||
| 208 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 209 | int num = 0; | ||
| 210 | buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 211 | buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 212 | buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 213 | REQUIRE(offset == PAGE * 2); | ||
| 214 | REQUIRE(size == PAGE); | ||
| 215 | ++num; | ||
| 216 | }); | ||
| 217 | REQUIRE(num == 1); | ||
| 218 | buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 219 | REQUIRE(offset == PAGE * 9); | ||
| 220 | REQUIRE(size == PAGE); | ||
| 221 | ++num; | ||
| 222 | }); | ||
| 223 | REQUIRE(num == 2); | ||
| 224 | } | ||
| 225 | |||
| 226 | TEST_CASE("BufferBase: Partial page upload with multiple words on the right") { | ||
| 227 | RasterizerInterface rasterizer; | ||
| 228 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 229 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 230 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 231 | int num = 0; | ||
| 232 | buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 233 | REQUIRE(offset == PAGE * 13); | ||
| 234 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 235 | ++num; | ||
| 236 | }); | ||
| 237 | REQUIRE(num == 1); | ||
| 238 | buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 239 | REQUIRE(offset == WORD * 7 + PAGE * 10); | ||
| 240 | REQUIRE(size == PAGE * 3); | ||
| 241 | ++num; | ||
| 242 | }); | ||
| 243 | REQUIRE(num == 2); | ||
| 244 | } | ||
| 245 | |||
| 246 | TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 247 | RasterizerInterface rasterizer; | ||
| 248 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 249 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 250 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 251 | int num = 0; | ||
| 252 | buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 253 | REQUIRE(offset == PAGE * 16); | ||
| 254 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 255 | ++num; | ||
| 256 | }); | ||
| 257 | REQUIRE(num == 1); | ||
| 258 | buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 259 | REQUIRE(offset == PAGE * 13); | ||
| 260 | REQUIRE(size == PAGE * 3); | ||
| 261 | ++num; | ||
| 262 | }); | ||
| 263 | REQUIRE(num == 2); | ||
| 264 | } | ||
| 265 | |||
| 266 | TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 267 | RasterizerInterface rasterizer; | ||
| 268 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 269 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 270 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 271 | int num = 0; | ||
| 272 | buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 273 | REQUIRE(offset == PAGE * 16); | ||
| 274 | REQUIRE(size == WORD); | ||
| 275 | ++num; | ||
| 276 | }); | ||
| 277 | REQUIRE(num == 1); | ||
| 278 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 279 | REQUIRE(offset == PAGE * 13); | ||
| 280 | REQUIRE(size == PAGE * 3); | ||
| 281 | ++num; | ||
| 282 | }); | ||
| 283 | REQUIRE(num == 2); | ||
| 284 | buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 285 | REQUIRE(offset == WORD + PAGE * 16); | ||
| 286 | REQUIRE(size == PAGE * 73); | ||
| 287 | ++num; | ||
| 288 | }); | ||
| 289 | REQUIRE(num == 3); | ||
| 290 | } | ||
| 291 | |||
| 292 | TEST_CASE("BufferBase: Empty right bits", "[video_core]") { | ||
| 293 | RasterizerInterface rasterizer; | ||
| 294 | BufferBase buffer(rasterizer, c, WORD * 2048); | ||
| 295 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 296 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 297 | buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 298 | REQUIRE(offset == WORD - PAGE); | ||
| 299 | REQUIRE(size == PAGE * 2); | ||
| 300 | }); | ||
| 301 | } | ||
| 302 | |||
| 303 | TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") { | ||
| 304 | RasterizerInterface rasterizer; | ||
| 305 | BufferBase buffer(rasterizer, c, WORD); | ||
| 306 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 307 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 308 | int num = 0; | ||
| 309 | buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 310 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 311 | buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 312 | REQUIRE(num == 0); | ||
| 313 | buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 314 | REQUIRE(num == 1); | ||
| 315 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 316 | REQUIRE(rasterizer.Count() == 0); | ||
| 317 | } | ||
| 318 | |||
| 319 | TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") { | ||
| 320 | RasterizerInterface rasterizer; | ||
| 321 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 322 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 323 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 324 | REQUIRE(rasterizer.Count() == 0); | ||
| 325 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 326 | REQUIRE(rasterizer.Count() == 1); | ||
| 327 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2)); | ||
| 328 | buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2); | ||
| 329 | buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2); | ||
| 330 | REQUIRE(rasterizer.Count() == 2); | ||
| 331 | } | ||
| 332 | |||
| 333 | TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") { | ||
| 334 | RasterizerInterface rasterizer; | ||
| 335 | BufferBase buffer(rasterizer, c, 0x310720); | ||
| 336 | buffer.UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 337 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 338 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 339 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 340 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 341 | } | ||
| 342 | |||
| 343 | TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") { | ||
| 344 | RasterizerInterface rasterizer; | ||
| 345 | BufferBase buffer(rasterizer, c, WORD); | ||
| 346 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 347 | buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 348 | buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 349 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 350 | static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3}; | ||
| 351 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 352 | REQUIRE(offset == offsets.at(i)); | ||
| 353 | REQUIRE(size == sizes.at(i)); | ||
| 354 | ++i; | ||
| 355 | }); | ||
| 356 | } | ||
| 357 | |||
| 358 | TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") { | ||
| 359 | RasterizerInterface rasterizer; | ||
| 360 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 361 | buffer.UnmarkRegionAsCpuModified(c, 0x22000); | ||
| 362 | REQUIRE(rasterizer.Count() == 0x22); | ||
| 363 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 364 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 365 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 366 | static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21}; | ||
| 367 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE}; | ||
| 368 | REQUIRE(offset == offsets.at(i)); | ||
| 369 | REQUIRE(size == sizes.at(i)); | ||
| 370 | ++i; | ||
| 371 | }); | ||
| 372 | } | ||
| 373 | |||
| 374 | TEST_CASE("BufferBase: Single page modified range", "[video_core]") { | ||
| 375 | RasterizerInterface rasterizer; | ||
| 376 | BufferBase buffer(rasterizer, c, PAGE); | ||
| 377 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 378 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 379 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("BufferBase: Two page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 385 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 386 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 387 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2)); | ||
| 388 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 389 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 390 | } | ||
| 391 | |||
| 392 | TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") { | ||
| 393 | for (int offset = 0; offset < 4; ++offset) { | ||
| 394 | const VAddr address = c + WORD * offset; | ||
| 395 | RasterizerInterface rasterizer; | ||
| 396 | BufferBase buffer(rasterizer, address, WORD * 4); | ||
| 397 | REQUIRE(buffer.IsRegionCpuModified(address, PAGE)); | ||
| 398 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 399 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 400 | |||
| 401 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 402 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD)); | ||
| 403 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 404 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 405 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 406 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 407 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 408 | |||
| 409 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 410 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") { | ||
| 415 | RasterizerInterface rasterizer; | ||
| 416 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 417 | buffer.UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 418 | REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 419 | |||
| 420 | buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 421 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 422 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 423 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 424 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 425 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 426 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 427 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 428 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 429 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 430 | } | ||
| 431 | |||
| 432 | TEST_CASE("BufferBase: Out of bounds region query") { | ||
| 433 | RasterizerInterface rasterizer; | ||
| 434 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 435 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE)); | ||
| 436 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE)); | ||
| 437 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE)); | ||
| 438 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64)); | ||
| 439 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64)); | ||
| 440 | } | ||
| 441 | |||
| 442 | TEST_CASE("BufferBase: Wrap word regions") { | ||
| 443 | RasterizerInterface rasterizer; | ||
| 444 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 445 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 446 | buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 447 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2)); | ||
| 448 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 449 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 450 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 451 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 452 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 453 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 454 | |||
| 455 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 457 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 458 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 459 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 460 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 461 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 462 | } | ||
| 463 | |||
| 464 | TEST_CASE("BufferBase: Unaligned page region query") { | ||
| 465 | RasterizerInterface rasterizer; | ||
| 466 | BufferBase buffer(rasterizer, c, WORD); | ||
| 467 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 468 | buffer.MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 469 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 470 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 471 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); | ||
| 472 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); | ||
| 473 | } | ||
| 474 | |||
| 475 | TEST_CASE("BufferBase: Cached write") { | ||
| 476 | RasterizerInterface rasterizer; | ||
| 477 | BufferBase buffer(rasterizer, c, WORD); | ||
| 478 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 479 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 480 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | buffer.FlushCachedWrites(); | ||
| 482 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 483 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 484 | REQUIRE(rasterizer.Count() == 0); | ||
| 485 | } | ||
| 486 | |||
| 487 | TEST_CASE("BufferBase: Multiple cached write") { | ||
| 488 | RasterizerInterface rasterizer; | ||
| 489 | BufferBase buffer(rasterizer, c, WORD); | ||
| 490 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 491 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 492 | buffer.CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 493 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 494 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 495 | buffer.FlushCachedWrites(); | ||
| 496 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 497 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 498 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 499 | REQUIRE(rasterizer.Count() == 0); | ||
| 500 | } | ||
| 501 | |||
| 502 | TEST_CASE("BufferBase: Cached write unmarked") { | ||
| 503 | RasterizerInterface rasterizer; | ||
| 504 | BufferBase buffer(rasterizer, c, WORD); | ||
| 505 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 506 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 507 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 508 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | buffer.FlushCachedWrites(); | ||
| 510 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 511 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 512 | REQUIRE(rasterizer.Count() == 0); | ||
| 513 | } | ||
| 514 | |||
| 515 | TEST_CASE("BufferBase: Cached write iterated") { | ||
| 516 | RasterizerInterface rasterizer; | ||
| 517 | BufferBase buffer(rasterizer, c, WORD); | ||
| 518 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 519 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 520 | int num = 0; | ||
| 521 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 522 | REQUIRE(num == 0); | ||
| 523 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | buffer.FlushCachedWrites(); | ||
| 525 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 526 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 527 | REQUIRE(rasterizer.Count() == 0); | ||
| 528 | } | ||
| 529 | |||
| 530 | TEST_CASE("BufferBase: Cached write downloads") { | ||
| 531 | RasterizerInterface rasterizer; | ||
| 532 | BufferBase buffer(rasterizer, c, WORD); | ||
| 533 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 534 | REQUIRE(rasterizer.Count() == 64); | ||
| 535 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 536 | REQUIRE(rasterizer.Count() == 63); | ||
| 537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 538 | int num = 0; | ||
| 539 | buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | buffer.FlushCachedWrites(); | ||
| 545 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } | ||
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp new file mode 100644 index 000000000..3981907a2 --- /dev/null +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -0,0 +1,549 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <memory> | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include <catch2/catch_test_macros.hpp> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | constexpr u64 HIGH_PAGE_BITS = 22; | ||
| 20 | constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS; | ||
| 21 | |||
| 22 | constexpr VAddr c = 16 * HIGH_PAGE_SIZE; | ||
| 23 | |||
| 24 | class RasterizerInterface { | ||
| 25 | public: | ||
| 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 29 | Core::Memory::YUZU_PAGEBITS}; | ||
| 30 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 31 | int& value = page_table[page]; | ||
| 32 | value += delta; | ||
| 33 | if (value < 0) { | ||
| 34 | throw std::logic_error{"negative page"}; | ||
| 35 | } | ||
| 36 | if (value == 0) { | ||
| 37 | page_table.erase(page); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 43 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 44 | return it == page_table.end() ? 0 : it->second; | ||
| 45 | } | ||
| 46 | |||
| 47 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 48 | unsigned count = 0; | ||
| 49 | for (const auto& [index, value] : page_table) { | ||
| 50 | count += value; | ||
| 51 | } | ||
| 52 | return count; | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | std::unordered_map<u64, int> page_table; | ||
| 57 | }; | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>; | ||
| 61 | |||
| 62 | TEST_CASE("MemoryTracker: Small region", "[video_core]") { | ||
| 63 | RasterizerInterface rasterizer; | ||
| 64 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 65 | REQUIRE(rasterizer.Count() == 0); | ||
| 66 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 67 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 68 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 69 | |||
| 70 | memory_track->MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 71 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2}); | ||
| 72 | } | ||
| 73 | |||
| 74 | TEST_CASE("MemoryTracker: Large region", "[video_core]") { | ||
| 75 | RasterizerInterface rasterizer; | ||
| 76 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 77 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 78 | memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 79 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) == | ||
| 80 | Range{c + PAGE, c + WORD + PAGE * 2}); | ||
| 81 | REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == | ||
| 82 | Range{c + PAGE * 2, c + PAGE * 8}); | ||
| 83 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE}); | ||
| 84 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) == | ||
| 85 | Range{c + WORD * 4, c + WORD * 4 + PAGE}); | ||
| 86 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 87 | Range{c + WORD * 3 + PAGE * 63, c + WORD * 4}); | ||
| 88 | |||
| 89 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 90 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 91 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 92 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9}); | ||
| 93 | |||
| 94 | memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 95 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 96 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7}); | ||
| 97 | |||
| 98 | memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 99 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32}); | ||
| 100 | |||
| 101 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 102 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 103 | |||
| 104 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 105 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 106 | } | ||
| 107 | |||
| 108 | TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") { | ||
| 109 | RasterizerInterface rasterizer; | ||
| 110 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 111 | REQUIRE(rasterizer.Count() == 0); | ||
| 112 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 113 | REQUIRE(rasterizer.Count() == 1); | ||
| 114 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 115 | REQUIRE(rasterizer.Count() == 0); | ||
| 116 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 117 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 118 | REQUIRE(rasterizer.Count() == 2); | ||
| 119 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 120 | REQUIRE(rasterizer.Count() == 0); | ||
| 121 | } | ||
| 122 | |||
| 123 | TEST_CASE("MemoryTracker: Basic range", "[video_core]") { | ||
| 124 | RasterizerInterface rasterizer; | ||
| 125 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 126 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 127 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 128 | int num = 0; | ||
| 129 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 130 | REQUIRE(offset == c); | ||
| 131 | REQUIRE(size == PAGE); | ||
| 132 | ++num; | ||
| 133 | }); | ||
| 134 | REQUIRE(num == 1U); | ||
| 135 | } | ||
| 136 | |||
| 137 | TEST_CASE("MemoryTracker: Border upload", "[video_core]") { | ||
| 138 | RasterizerInterface rasterizer; | ||
| 139 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 140 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 141 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 142 | memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 143 | REQUIRE(offset == c + WORD - PAGE); | ||
| 144 | REQUIRE(size == PAGE * 2); | ||
| 145 | }); | ||
| 146 | } | ||
| 147 | |||
| 148 | TEST_CASE("MemoryTracker: Border upload range", "[video_core]") { | ||
| 149 | RasterizerInterface rasterizer; | ||
| 150 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 151 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 152 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 153 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 154 | REQUIRE(offset == c + WORD - PAGE); | ||
| 155 | REQUIRE(size == PAGE * 2); | ||
| 156 | }); | ||
| 157 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 158 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 159 | REQUIRE(offset == c + WORD - PAGE); | ||
| 160 | REQUIRE(size == PAGE); | ||
| 161 | }); | ||
| 162 | memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 163 | REQUIRE(offset == c + WORD); | ||
| 164 | REQUIRE(size == PAGE); | ||
| 165 | }); | ||
| 166 | } | ||
| 167 | |||
| 168 | TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") { | ||
| 169 | RasterizerInterface rasterizer; | ||
| 170 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 171 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 172 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 173 | memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 174 | REQUIRE(offset == c + WORD - PAGE); | ||
| 175 | REQUIRE(size == PAGE * 2); | ||
| 176 | }); | ||
| 177 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 178 | memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 179 | REQUIRE(offset == c + WORD - PAGE); | ||
| 180 | REQUIRE(size == PAGE); | ||
| 181 | }); | ||
| 182 | memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 183 | REQUIRE(offset == c + WORD); | ||
| 184 | REQUIRE(size == PAGE); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") { | ||
| 189 | RasterizerInterface rasterizer; | ||
| 190 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 191 | int num = 0; | ||
| 192 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 193 | REQUIRE(offset == c); | ||
| 194 | REQUIRE(size == WORD); | ||
| 195 | ++num; | ||
| 196 | }); | ||
| 197 | REQUIRE(num == 1); | ||
| 198 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 199 | REQUIRE(offset == c + WORD); | ||
| 200 | REQUIRE(size == WORD); | ||
| 201 | ++num; | ||
| 202 | }); | ||
| 203 | REQUIRE(num == 2); | ||
| 204 | memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 205 | REQUIRE(offset == c + WORD * 2); | ||
| 206 | REQUIRE(size == PAGE * 0x1d); | ||
| 207 | ++num; | ||
| 208 | }); | ||
| 209 | REQUIRE(num == 3); | ||
| 210 | } | ||
| 211 | |||
| 212 | TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") { | ||
| 213 | RasterizerInterface rasterizer; | ||
| 214 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 215 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 216 | int num = 0; | ||
| 217 | memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 218 | memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 219 | memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 220 | REQUIRE(offset == c + PAGE * 2); | ||
| 221 | REQUIRE(size == PAGE); | ||
| 222 | ++num; | ||
| 223 | }); | ||
| 224 | REQUIRE(num == 1); | ||
| 225 | memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 226 | REQUIRE(offset == c + PAGE * 9); | ||
| 227 | REQUIRE(size == PAGE); | ||
| 228 | ++num; | ||
| 229 | }); | ||
| 230 | REQUIRE(num == 2); | ||
| 231 | } | ||
| 232 | |||
| 233 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") { | ||
| 234 | RasterizerInterface rasterizer; | ||
| 235 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 236 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 9); | ||
| 237 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 238 | int num = 0; | ||
| 239 | memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 240 | REQUIRE(offset == c + PAGE * 13); | ||
| 241 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 242 | ++num; | ||
| 243 | }); | ||
| 244 | REQUIRE(num == 1); | ||
| 245 | memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 246 | REQUIRE(offset == c + WORD * 7 + PAGE * 10); | ||
| 247 | REQUIRE(size == PAGE * 3); | ||
| 248 | ++num; | ||
| 249 | }); | ||
| 250 | REQUIRE(num == 2); | ||
| 251 | } | ||
| 252 | |||
| 253 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 254 | RasterizerInterface rasterizer; | ||
| 255 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 256 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 257 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 258 | int num = 0; | ||
| 259 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 260 | REQUIRE(offset == c + PAGE * 16); | ||
| 261 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 262 | ++num; | ||
| 263 | }); | ||
| 264 | REQUIRE(num == 1); | ||
| 265 | memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 266 | REQUIRE(offset == c + PAGE * 13); | ||
| 267 | REQUIRE(size == PAGE * 3); | ||
| 268 | ++num; | ||
| 269 | }); | ||
| 270 | REQUIRE(num == 2); | ||
| 271 | } | ||
| 272 | |||
| 273 | TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 274 | RasterizerInterface rasterizer; | ||
| 275 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 276 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 277 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 278 | int num = 0; | ||
| 279 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 280 | REQUIRE(offset == c + PAGE * 16); | ||
| 281 | REQUIRE(size == WORD); | ||
| 282 | ++num; | ||
| 283 | }); | ||
| 284 | REQUIRE(num == 1); | ||
| 285 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 286 | REQUIRE(offset == c + PAGE * 13); | ||
| 287 | REQUIRE(size == PAGE * 3); | ||
| 288 | ++num; | ||
| 289 | }); | ||
| 290 | REQUIRE(num == 2); | ||
| 291 | memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 292 | REQUIRE(offset == c + WORD + PAGE * 16); | ||
| 293 | REQUIRE(size == PAGE * 73); | ||
| 294 | ++num; | ||
| 295 | }); | ||
| 296 | REQUIRE(num == 3); | ||
| 297 | } | ||
| 298 | |||
| 299 | TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") { | ||
| 300 | RasterizerInterface rasterizer; | ||
| 301 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 302 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 303 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 304 | memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 305 | REQUIRE(offset == c + WORD - PAGE); | ||
| 306 | REQUIRE(size == PAGE * 2); | ||
| 307 | }); | ||
| 308 | } | ||
| 309 | |||
| 310 | TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") { | ||
| 311 | RasterizerInterface rasterizer; | ||
| 312 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 313 | memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD); | ||
| 314 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 315 | REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE); | ||
| 316 | int num = 0; | ||
| 317 | memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 318 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 319 | memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 320 | REQUIRE(num == 0); | ||
| 321 | memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 322 | REQUIRE(num == 1); | ||
| 323 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 324 | REQUIRE(rasterizer.Count() == 2 * WORD / PAGE); | ||
| 325 | } | ||
| 326 | |||
| 327 | TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") { | ||
| 328 | RasterizerInterface rasterizer; | ||
| 329 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 330 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 331 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 332 | REQUIRE(rasterizer.Count() == 2); | ||
| 333 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 334 | REQUIRE(rasterizer.Count() == 3); | ||
| 335 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2)); | ||
| 336 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2); | ||
| 337 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2); | ||
| 338 | REQUIRE(rasterizer.Count() == 7); | ||
| 339 | } | ||
| 340 | |||
| 341 | TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") { | ||
| 342 | RasterizerInterface rasterizer; | ||
| 343 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 344 | memory_track->UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 345 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 346 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 347 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 348 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 349 | } | ||
| 350 | |||
| 351 | TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") { | ||
| 352 | RasterizerInterface rasterizer; | ||
| 353 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 354 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 355 | memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 356 | memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 357 | memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 358 | static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3}; | ||
| 359 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 360 | REQUIRE(offset == offsets.at(i)); | ||
| 361 | REQUIRE(size == sizes.at(i)); | ||
| 362 | ++i; | ||
| 363 | }); | ||
| 364 | } | ||
| 365 | |||
| 366 | TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") { | ||
| 367 | RasterizerInterface rasterizer; | ||
| 368 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 369 | memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23); | ||
| 370 | REQUIRE(rasterizer.Count() == 0x23); | ||
| 371 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 372 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 373 | memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable { | ||
| 374 | static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21}; | ||
| 375 | static constexpr std::array<u64, 3> sizes{PAGE, PAGE}; | ||
| 376 | REQUIRE(offset == offsets.at(i)); | ||
| 377 | REQUIRE(size == sizes.at(i)); | ||
| 378 | ++i; | ||
| 379 | }); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 385 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 386 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 387 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 388 | } | ||
| 389 | |||
| 390 | TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") { | ||
| 391 | RasterizerInterface rasterizer; | ||
| 392 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 393 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 394 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 395 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2)); | ||
| 396 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 397 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 398 | } | ||
| 399 | |||
| 400 | TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") { | ||
| 401 | for (int offset = 0; offset < 4; ++offset) { | ||
| 402 | const VAddr address = c + WORD * offset; | ||
| 403 | RasterizerInterface rasterizer; | ||
| 404 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 405 | REQUIRE(memory_track->IsRegionCpuModified(address, PAGE)); | ||
| 406 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 407 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 408 | |||
| 409 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 410 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD)); | ||
| 411 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 412 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 413 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 414 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 415 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 416 | |||
| 417 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 418 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") { | ||
| 423 | RasterizerInterface rasterizer; | ||
| 424 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 425 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 426 | REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 427 | |||
| 428 | memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 429 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 430 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 431 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 432 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 433 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 434 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 435 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 436 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 437 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 438 | } | ||
| 439 | |||
| 440 | TEST_CASE("MemoryTracker: Wrap word regions") { | ||
| 441 | RasterizerInterface rasterizer; | ||
| 442 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 443 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 444 | memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 445 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2)); | ||
| 446 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 447 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 448 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 449 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 450 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 451 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 452 | |||
| 453 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 454 | memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 455 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 457 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 458 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 459 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 460 | } | ||
| 461 | |||
| 462 | TEST_CASE("MemoryTracker: Unaligned page region query") { | ||
| 463 | RasterizerInterface rasterizer; | ||
| 464 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 465 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 466 | memory_track->MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 467 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 468 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 469 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000)); | ||
| 470 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1)); | ||
| 471 | } | ||
| 472 | |||
| 473 | TEST_CASE("MemoryTracker: Cached write") { | ||
| 474 | RasterizerInterface rasterizer; | ||
| 475 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 476 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 477 | memory_track->CachedCpuWrite(c + PAGE, c + PAGE); | ||
| 478 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 479 | memory_track->FlushCachedWrites(); | ||
| 480 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 482 | REQUIRE(rasterizer.Count() == 0); | ||
| 483 | } | ||
| 484 | |||
| 485 | TEST_CASE("MemoryTracker: Multiple cached write") { | ||
| 486 | RasterizerInterface rasterizer; | ||
| 487 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 488 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 489 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 490 | memory_track->CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 491 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 492 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 493 | memory_track->FlushCachedWrites(); | ||
| 494 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 495 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 496 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 497 | REQUIRE(rasterizer.Count() == 0); | ||
| 498 | } | ||
| 499 | |||
| 500 | TEST_CASE("MemoryTracker: Cached write unmarked") { | ||
| 501 | RasterizerInterface rasterizer; | ||
| 502 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 503 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 504 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 505 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 506 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 507 | memory_track->FlushCachedWrites(); | ||
| 508 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 510 | REQUIRE(rasterizer.Count() == 0); | ||
| 511 | } | ||
| 512 | |||
| 513 | TEST_CASE("MemoryTracker: Cached write iterated") { | ||
| 514 | RasterizerInterface rasterizer; | ||
| 515 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 516 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 517 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 518 | int num = 0; | ||
| 519 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 520 | REQUIRE(num == 0); | ||
| 521 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 522 | memory_track->FlushCachedWrites(); | ||
| 523 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 525 | REQUIRE(rasterizer.Count() == 0); | ||
| 526 | } | ||
| 527 | |||
| 528 | TEST_CASE("MemoryTracker: Cached write downloads") { | ||
| 529 | RasterizerInterface rasterizer; | ||
| 530 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 531 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 532 | REQUIRE(rasterizer.Count() == 64); | ||
| 533 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 534 | REQUIRE(rasterizer.Count() == 63); | ||
| 535 | memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 536 | int num = 0; | ||
| 537 | memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 538 | REQUIRE(num == 1); | ||
| 539 | num = 0; | ||
| 540 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | memory_track->FlushCachedWrites(); | ||
| 545 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } \ No newline at end of file | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e904573d7..a0009a36f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -11,8 +11,11 @@ endif() | |||
| 11 | 11 | ||
| 12 | add_library(video_core STATIC | 12 | add_library(video_core STATIC |
| 13 | buffer_cache/buffer_base.h | 13 | buffer_cache/buffer_base.h |
| 14 | buffer_cache/buffer_cache_base.h | ||
| 14 | buffer_cache/buffer_cache.cpp | 15 | buffer_cache/buffer_cache.cpp |
| 15 | buffer_cache/buffer_cache.h | 16 | buffer_cache/buffer_cache.h |
| 17 | buffer_cache/memory_tracker_base.h | ||
| 18 | buffer_cache/word_manager.h | ||
| 16 | cache_types.h | 19 | cache_types.h |
| 17 | cdma_pusher.cpp | 20 | cdma_pusher.cpp |
| 18 | cdma_pusher.h | 21 | cdma_pusher.h |
| @@ -104,6 +107,7 @@ add_library(video_core STATIC | |||
| 104 | renderer_null/renderer_null.h | 107 | renderer_null/renderer_null.h |
| 105 | renderer_opengl/blit_image.cpp | 108 | renderer_opengl/blit_image.cpp |
| 106 | renderer_opengl/blit_image.h | 109 | renderer_opengl/blit_image.h |
| 110 | renderer_opengl/gl_buffer_cache_base.cpp | ||
| 107 | renderer_opengl/gl_buffer_cache.cpp | 111 | renderer_opengl/gl_buffer_cache.cpp |
| 108 | renderer_opengl/gl_buffer_cache.h | 112 | renderer_opengl/gl_buffer_cache.h |
| 109 | renderer_opengl/gl_compute_pipeline.cpp | 113 | renderer_opengl/gl_compute_pipeline.cpp |
| @@ -154,6 +158,7 @@ add_library(video_core STATIC | |||
| 154 | renderer_vulkan/renderer_vulkan.cpp | 158 | renderer_vulkan/renderer_vulkan.cpp |
| 155 | renderer_vulkan/vk_blit_screen.cpp | 159 | renderer_vulkan/vk_blit_screen.cpp |
| 156 | renderer_vulkan/vk_blit_screen.h | 160 | renderer_vulkan/vk_blit_screen.h |
| 161 | renderer_vulkan/vk_buffer_cache_base.cpp | ||
| 157 | renderer_vulkan/vk_buffer_cache.cpp | 162 | renderer_vulkan/vk_buffer_cache.cpp |
| 158 | renderer_vulkan/vk_buffer_cache.h | 163 | renderer_vulkan/vk_buffer_cache.h |
| 159 | renderer_vulkan/vk_command_pool.cpp | 164 | renderer_vulkan/vk_command_pool.cpp |
| @@ -174,6 +179,8 @@ add_library(video_core STATIC | |||
| 174 | renderer_vulkan/vk_master_semaphore.h | 179 | renderer_vulkan/vk_master_semaphore.h |
| 175 | renderer_vulkan/vk_pipeline_cache.cpp | 180 | renderer_vulkan/vk_pipeline_cache.cpp |
| 176 | renderer_vulkan/vk_pipeline_cache.h | 181 | renderer_vulkan/vk_pipeline_cache.h |
| 182 | renderer_vulkan/vk_present_manager.cpp | ||
| 183 | renderer_vulkan/vk_present_manager.h | ||
| 177 | renderer_vulkan/vk_query_cache.cpp | 184 | renderer_vulkan/vk_query_cache.cpp |
| 178 | renderer_vulkan/vk_query_cache.h | 185 | renderer_vulkan/vk_query_cache.h |
| 179 | renderer_vulkan/vk_rasterizer.cpp | 186 | renderer_vulkan/vk_rasterizer.cpp |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 1b4d63616..9cbd95c4b 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| @@ -11,9 +11,7 @@ | |||
| 11 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 12 | #include "common/common_funcs.h" | 12 | #include "common/common_funcs.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/div_ceil.h" | 14 | #include "video_core/buffer_cache/word_manager.h" |
| 15 | #include "common/settings.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | 15 | ||
| 18 | namespace VideoCommon { | 16 | namespace VideoCommon { |
| 19 | 17 | ||
| @@ -36,116 +34,12 @@ struct NullBufferParams {}; | |||
| 36 | */ | 34 | */ |
| 37 | template <class RasterizerInterface> | 35 | template <class RasterizerInterface> |
| 38 | class BufferBase { | 36 | class BufferBase { |
| 39 | static constexpr u64 PAGES_PER_WORD = 64; | ||
| 40 | static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 41 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 42 | |||
| 43 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 44 | union WordsArray { | ||
| 45 | /// Returns the pointer to the words state | ||
| 46 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 47 | return is_short ? &stack : heap; | ||
| 48 | } | ||
| 49 | |||
| 50 | /// Returns the pointer to the words state | ||
| 51 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 52 | return is_short ? &stack : heap; | ||
| 53 | } | ||
| 54 | |||
| 55 | u64 stack = 0; ///< Small buffers storage | ||
| 56 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Words { | ||
| 60 | explicit Words() = default; | ||
| 61 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 62 | if (IsShort()) { | ||
| 63 | cpu.stack = ~u64{0}; | ||
| 64 | gpu.stack = 0; | ||
| 65 | cached_cpu.stack = 0; | ||
| 66 | untracked.stack = ~u64{0}; | ||
| 67 | } else { | ||
| 68 | // Share allocation between CPU and GPU pages and set their default values | ||
| 69 | const size_t num_words = NumWords(); | ||
| 70 | u64* const alloc = new u64[num_words * 4]; | ||
| 71 | cpu.heap = alloc; | ||
| 72 | gpu.heap = alloc + num_words; | ||
| 73 | cached_cpu.heap = alloc + num_words * 2; | ||
| 74 | untracked.heap = alloc + num_words * 3; | ||
| 75 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 76 | std::fill_n(gpu.heap, num_words, 0); | ||
| 77 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 78 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 79 | } | ||
| 80 | // Clean up tailing bits | ||
| 81 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 82 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 83 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 84 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 85 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 86 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 87 | } | ||
| 88 | |||
| 89 | ~Words() { | ||
| 90 | Release(); | ||
| 91 | } | ||
| 92 | |||
| 93 | Words& operator=(Words&& rhs) noexcept { | ||
| 94 | Release(); | ||
| 95 | size_bytes = rhs.size_bytes; | ||
| 96 | cpu = rhs.cpu; | ||
| 97 | gpu = rhs.gpu; | ||
| 98 | cached_cpu = rhs.cached_cpu; | ||
| 99 | untracked = rhs.untracked; | ||
| 100 | rhs.cpu.heap = nullptr; | ||
| 101 | return *this; | ||
| 102 | } | ||
| 103 | |||
| 104 | Words(Words&& rhs) noexcept | ||
| 105 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 106 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 107 | rhs.cpu.heap = nullptr; | ||
| 108 | } | ||
| 109 | |||
| 110 | Words& operator=(const Words&) = delete; | ||
| 111 | Words(const Words&) = delete; | ||
| 112 | |||
| 113 | /// Returns true when the buffer fits in the small vector optimization | ||
| 114 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 115 | return size_bytes <= BYTES_PER_WORD; | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Returns the number of words of the buffer | ||
| 119 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 120 | return Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Release buffer resources | ||
| 124 | void Release() { | ||
| 125 | if (!IsShort()) { | ||
| 126 | // CPU written words is the base for the heap allocation | ||
| 127 | delete[] cpu.heap; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | u64 size_bytes = 0; | ||
| 132 | WordsArray cpu; | ||
| 133 | WordsArray gpu; | ||
| 134 | WordsArray cached_cpu; | ||
| 135 | WordsArray untracked; | ||
| 136 | }; | ||
| 137 | |||
| 138 | enum class Type { | ||
| 139 | CPU, | ||
| 140 | GPU, | ||
| 141 | CachedCPU, | ||
| 142 | Untracked, | ||
| 143 | }; | ||
| 144 | |||
| 145 | public: | 37 | public: |
| 146 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 147 | : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 148 | words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} | 40 | |
| 41 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | ||
| 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | ||
| 149 | 43 | ||
| 150 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
| 151 | 45 | ||
| @@ -155,100 +49,6 @@ public: | |||
| 155 | BufferBase& operator=(BufferBase&&) = default; | 49 | BufferBase& operator=(BufferBase&&) = default; |
| 156 | BufferBase(BufferBase&&) = default; | 50 | BufferBase(BufferBase&&) = default; |
| 157 | 51 | ||
| 158 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 159 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 160 | u64 query_size) const noexcept { | ||
| 161 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 162 | return ModifiedRegion<Type::CPU>(offset, query_size); | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 166 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 167 | u64 query_size) const noexcept { | ||
| 168 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 169 | return ModifiedRegion<Type::GPU>(offset, query_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | /// Returns true if a region has been modified from the CPU | ||
| 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 174 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 175 | return IsRegionModified<Type::CPU>(offset, query_size); | ||
| 176 | } | ||
| 177 | |||
| 178 | /// Returns true if a region has been modified from the GPU | ||
| 179 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 180 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 181 | return IsRegionModified<Type::GPU>(offset, query_size); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 185 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 186 | ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 190 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 191 | ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); | ||
| 192 | } | ||
| 193 | |||
| 194 | /// Mark region as modified from the host GPU | ||
| 195 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 196 | ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); | ||
| 197 | } | ||
| 198 | |||
| 199 | /// Unmark region as modified from the host GPU | ||
| 200 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 201 | ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Mark region as modified from the CPU | ||
| 205 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 206 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||
| 207 | flags |= BufferFlagBits::CachedWrites; | ||
| 208 | ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 212 | void FlushCachedWrites() noexcept { | ||
| 213 | flags &= ~BufferFlagBits::CachedWrites; | ||
| 214 | const u64 num_words = NumWords(); | ||
| 215 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 216 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 217 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 218 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 219 | const u64 cached_bits = cached_words[word_index]; | ||
| 220 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 221 | untracked_words[word_index] |= cached_bits; | ||
| 222 | cpu_words[word_index] |= cached_bits; | ||
| 223 | if (!Settings::values.use_pessimistic_flushes) { | ||
| 224 | cached_words[word_index] = 0; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 230 | template <typename Func> | ||
| 231 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 232 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func); | ||
| 233 | } | ||
| 234 | |||
| 235 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 236 | template <typename Func> | ||
| 237 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) { | ||
| 238 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func); | ||
| 239 | } | ||
| 240 | |||
| 241 | template <typename Func> | ||
| 242 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 243 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func); | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 247 | template <typename Func> | ||
| 248 | void ForEachDownloadRange(Func&& func) { | ||
| 249 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func); | ||
| 250 | } | ||
| 251 | |||
| 252 | /// Mark buffer as picked | 52 | /// Mark buffer as picked |
| 253 | void Pick() noexcept { | 53 | void Pick() noexcept { |
| 254 | flags |= BufferFlagBits::Picked; | 54 | flags |= BufferFlagBits::Picked; |
| @@ -295,11 +95,6 @@ public: | |||
| 295 | return static_cast<u32>(other_cpu_addr - cpu_addr); | 95 | return static_cast<u32>(other_cpu_addr - cpu_addr); |
| 296 | } | 96 | } |
| 297 | 97 | ||
| 298 | /// Returns the size in bytes of the buffer | ||
| 299 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 300 | return words.size_bytes; | ||
| 301 | } | ||
| 302 | |||
| 303 | size_t getLRUID() const noexcept { | 98 | size_t getLRUID() const noexcept { |
| 304 | return lru_id; | 99 | return lru_id; |
| 305 | } | 100 | } |
| @@ -308,305 +103,16 @@ public: | |||
| 308 | lru_id = lru_id_; | 103 | lru_id = lru_id_; |
| 309 | } | 104 | } |
| 310 | 105 | ||
| 311 | private: | 106 | size_t SizeBytes() const { |
| 312 | template <Type type> | 107 | return size_bytes; |
| 313 | u64* Array() noexcept { | ||
| 314 | if constexpr (type == Type::CPU) { | ||
| 315 | return words.cpu.Pointer(IsShort()); | ||
| 316 | } else if constexpr (type == Type::GPU) { | ||
| 317 | return words.gpu.Pointer(IsShort()); | ||
| 318 | } else if constexpr (type == Type::CachedCPU) { | ||
| 319 | return words.cached_cpu.Pointer(IsShort()); | ||
| 320 | } else if constexpr (type == Type::Untracked) { | ||
| 321 | return words.untracked.Pointer(IsShort()); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | template <Type type> | ||
| 326 | const u64* Array() const noexcept { | ||
| 327 | if constexpr (type == Type::CPU) { | ||
| 328 | return words.cpu.Pointer(IsShort()); | ||
| 329 | } else if constexpr (type == Type::GPU) { | ||
| 330 | return words.gpu.Pointer(IsShort()); | ||
| 331 | } else if constexpr (type == Type::CachedCPU) { | ||
| 332 | return words.cached_cpu.Pointer(IsShort()); | ||
| 333 | } else if constexpr (type == Type::Untracked) { | ||
| 334 | return words.untracked.Pointer(IsShort()); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 338 | /** | ||
| 339 | * Change the state of a range of pages | ||
| 340 | * | ||
| 341 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 342 | * @param size Size in bytes to mark or unmark as modified | ||
| 343 | */ | ||
| 344 | template <Type type, bool enable> | ||
| 345 | void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { | ||
| 346 | const s64 difference = dirty_addr - cpu_addr; | ||
| 347 | const u64 offset = std::max<s64>(difference, 0); | ||
| 348 | size += std::min<s64>(difference, 0); | ||
| 349 | if (offset >= SizeBytes() || size < 0) { | ||
| 350 | return; | ||
| 351 | } | ||
| 352 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 353 | u64* const state_words = Array<type>(); | ||
| 354 | const u64 offset_end = std::min(offset + size, SizeBytes()); | ||
| 355 | const u64 begin_page_index = offset / BYTES_PER_PAGE; | ||
| 356 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | ||
| 357 | const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE); | ||
| 358 | const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD); | ||
| 359 | u64 page_index = begin_page_index % PAGES_PER_WORD; | ||
| 360 | u64 word_index = begin_word_index; | ||
| 361 | while (word_index < end_word_index) { | ||
| 362 | const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD; | ||
| 363 | const u64 left_offset = | ||
| 364 | std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD; | ||
| 365 | const u64 right_offset = page_index; | ||
| 366 | u64 bits = ~u64{0}; | ||
| 367 | bits = (bits >> right_offset) << right_offset; | ||
| 368 | bits = (bits << left_offset) >> left_offset; | ||
| 369 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 370 | NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); | ||
| 371 | } | ||
| 372 | if constexpr (enable) { | ||
| 373 | state_words[word_index] |= bits; | ||
| 374 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 375 | untracked_words[word_index] |= bits; | ||
| 376 | } | ||
| 377 | } else { | ||
| 378 | state_words[word_index] &= ~bits; | ||
| 379 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 380 | untracked_words[word_index] &= ~bits; | ||
| 381 | } | ||
| 382 | } | ||
| 383 | page_index = 0; | ||
| 384 | ++word_index; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | /** | ||
| 389 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 390 | * | ||
| 391 | * @param word_index Index to the word to notify to the rasterizer | ||
| 392 | * @param current_bits Current state of the word | ||
| 393 | * @param new_bits New state of the word | ||
| 394 | * | ||
| 395 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 396 | */ | ||
| 397 | template <bool add_to_rasterizer> | ||
| 398 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 399 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 400 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 401 | while (changed_bits != 0) { | ||
| 402 | const int empty_bits = std::countr_zero(changed_bits); | ||
| 403 | addr += empty_bits * BYTES_PER_PAGE; | ||
| 404 | changed_bits >>= empty_bits; | ||
| 405 | |||
| 406 | const u32 continuous_bits = std::countr_one(changed_bits); | ||
| 407 | const u64 size = continuous_bits * BYTES_PER_PAGE; | ||
| 408 | const VAddr begin_addr = addr; | ||
| 409 | addr += size; | ||
| 410 | changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0; | ||
| 411 | rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1); | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | /** | ||
| 416 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 417 | * needed. Call the given function on each turned off range. | ||
| 418 | * | ||
| 419 | * @param query_cpu_range Base CPU address to loop over | ||
| 420 | * @param size Size in bytes of the CPU range to loop over | ||
| 421 | * @param func Function to call for each turned off region | ||
| 422 | */ | ||
| 423 | template <Type type, typename Func> | ||
| 424 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) { | ||
| 425 | static_assert(type != Type::Untracked); | ||
| 426 | |||
| 427 | const s64 difference = query_cpu_range - cpu_addr; | ||
| 428 | const u64 query_begin = std::max<s64>(difference, 0); | ||
| 429 | size += std::min<s64>(difference, 0); | ||
| 430 | if (query_begin >= SizeBytes() || size < 0) { | ||
| 431 | return; | ||
| 432 | } | ||
| 433 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 434 | u64* const state_words = Array<type>(); | ||
| 435 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | ||
| 436 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | ||
| 437 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | ||
| 438 | |||
| 439 | const auto modified = [](u64 word) { return word != 0; }; | ||
| 440 | const auto first_modified_word = std::find_if(words_begin, words_end, modified); | ||
| 441 | if (first_modified_word == words_end) { | ||
| 442 | // Exit early when the buffer is not modified | ||
| 443 | return; | ||
| 444 | } | ||
| 445 | const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified); | ||
| 446 | |||
| 447 | const u64 word_index_begin = std::distance(state_words, first_modified_word); | ||
| 448 | const u64 word_index_end = std::distance(state_words, last_modified_word); | ||
| 449 | |||
| 450 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); | ||
| 451 | const unsigned local_page_end = | ||
| 452 | static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||
| 453 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | ||
| 454 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | ||
| 455 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | ||
| 456 | const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE); | ||
| 457 | const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin); | ||
| 458 | const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end); | ||
| 459 | const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD; | ||
| 460 | const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1; | ||
| 461 | |||
| 462 | u64 page_begin = first_word_page_begin; | ||
| 463 | u64 current_base = 0; | ||
| 464 | u64 current_size = 0; | ||
| 465 | bool on_going = false; | ||
| 466 | for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) { | ||
| 467 | const bool is_last_word = word_index + 1 == word_index_end; | ||
| 468 | const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD; | ||
| 469 | const u64 right_offset = page_begin; | ||
| 470 | const u64 left_offset = PAGES_PER_WORD - page_end; | ||
| 471 | u64 bits = ~u64{0}; | ||
| 472 | bits = (bits >> right_offset) << right_offset; | ||
| 473 | bits = (bits << left_offset) >> left_offset; | ||
| 474 | |||
| 475 | const u64 current_word = state_words[word_index] & bits; | ||
| 476 | if (clear) { | ||
| 477 | state_words[word_index] &= ~bits; | ||
| 478 | } | ||
| 479 | |||
| 480 | if constexpr (type == Type::CPU) { | ||
| 481 | const u64 current_bits = untracked_words[word_index] & bits; | ||
| 482 | untracked_words[word_index] &= ~bits; | ||
| 483 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | ||
| 484 | } | ||
| 485 | // Exclude CPU modified pages when visiting GPU pages | ||
| 486 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||
| 487 | u64 page = page_begin; | ||
| 488 | page_begin = 0; | ||
| 489 | |||
| 490 | while (page < page_end) { | ||
| 491 | const int empty_bits = std::countr_zero(word >> page); | ||
| 492 | if (on_going && empty_bits != 0) { | ||
| 493 | InvokeModifiedRange(func, current_size, current_base); | ||
| 494 | current_size = 0; | ||
| 495 | on_going = false; | ||
| 496 | } | ||
| 497 | if (empty_bits == PAGES_PER_WORD) { | ||
| 498 | break; | ||
| 499 | } | ||
| 500 | page += empty_bits; | ||
| 501 | |||
| 502 | const int continuous_bits = std::countr_one(word >> page); | ||
| 503 | if (!on_going && continuous_bits != 0) { | ||
| 504 | current_base = word_index * PAGES_PER_WORD + page; | ||
| 505 | on_going = true; | ||
| 506 | } | ||
| 507 | current_size += continuous_bits; | ||
| 508 | page += continuous_bits; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | if (on_going && current_size > 0) { | ||
| 512 | InvokeModifiedRange(func, current_size, current_base); | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | template <typename Func> | ||
| 517 | void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) { | ||
| 518 | const u64 current_size_bytes = current_size * BYTES_PER_PAGE; | ||
| 519 | const u64 offset_begin = current_base * BYTES_PER_PAGE; | ||
| 520 | const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes()); | ||
| 521 | func(offset_begin, offset_end - offset_begin); | ||
| 522 | } | 108 | } |
| 523 | 109 | ||
| 524 | /** | 110 | private: |
| 525 | * Returns true when a region has been modified | ||
| 526 | * | ||
| 527 | * @param offset Offset in bytes from the start of the buffer | ||
| 528 | * @param size Size in bytes of the region to query for modifications | ||
| 529 | */ | ||
| 530 | template <Type type> | ||
| 531 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 532 | static_assert(type != Type::Untracked); | ||
| 533 | |||
| 534 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 535 | const u64* const state_words = Array<type>(); | ||
| 536 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 537 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 538 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 539 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 540 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | ||
| 541 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | ||
| 542 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 543 | const u64 word = state_words[word_index] & ~off_word; | ||
| 544 | if (word == 0) { | ||
| 545 | continue; | ||
| 546 | } | ||
| 547 | const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit); | ||
| 548 | const u64 local_page_end = page_end % PAGES_PER_WORD; | ||
| 549 | const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD; | ||
| 550 | if (((word >> page_index) << page_index) << page_end_shift != 0) { | ||
| 551 | return true; | ||
| 552 | } | ||
| 553 | } | ||
| 554 | return false; | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * Returns a begin end pair with the inclusive modified region | ||
| 559 | * | ||
| 560 | * @param offset Offset in bytes from the start of the buffer | ||
| 561 | * @param size Size in bytes of the region to query for modifications | ||
| 562 | */ | ||
| 563 | template <Type type> | ||
| 564 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 565 | static_assert(type != Type::Untracked); | ||
| 566 | |||
| 567 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 568 | const u64* const state_words = Array<type>(); | ||
| 569 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 570 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 571 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 572 | const u64 page_base = offset / BYTES_PER_PAGE; | ||
| 573 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 574 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 575 | u64 end = 0; | ||
| 576 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | ||
| 577 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 578 | const u64 word = state_words[word_index] & ~off_word; | ||
| 579 | if (word == 0) { | ||
| 580 | continue; | ||
| 581 | } | ||
| 582 | const u64 local_page_begin = std::countr_zero(word); | ||
| 583 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 584 | const u64 page_index = word_index * PAGES_PER_WORD; | ||
| 585 | const u64 page_begin = std::max(page_index + local_page_begin, page_base); | ||
| 586 | const u64 page_end = std::min(page_index + local_page_end, page_limit); | ||
| 587 | begin = std::min(begin, page_begin); | ||
| 588 | end = std::max(end, page_end); | ||
| 589 | } | ||
| 590 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 591 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 592 | } | ||
| 593 | |||
| 594 | /// Returns the number of words of the buffer | ||
| 595 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 596 | return words.NumWords(); | ||
| 597 | } | ||
| 598 | |||
| 599 | /// Returns true when the buffer fits in the small vector optimization | ||
| 600 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 601 | return words.IsShort(); | ||
| 602 | } | ||
| 603 | |||
| 604 | RasterizerInterface* rasterizer = nullptr; | ||
| 605 | VAddr cpu_addr = 0; | 111 | VAddr cpu_addr = 0; |
| 606 | Words words; | ||
| 607 | BufferFlagBits flags{}; | 112 | BufferFlagBits flags{}; |
| 608 | int stream_score = 0; | 113 | int stream_score = 0; |
| 609 | size_t lru_id = SIZE_MAX; | 114 | size_t lru_id = SIZE_MAX; |
| 115 | size_t size_bytes = 0; | ||
| 610 | }; | 116 | }; |
| 611 | 117 | ||
| 612 | } // namespace VideoCommon | 118 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index a16308b60..40db243d2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/microprofile.h" | 4 | #include "common/microprofile.h" |
| 5 | 5 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index abdc593df..e534e1e9c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1,485 +1,29 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <mutex> | ||
| 10 | #include <numeric> | 8 | #include <numeric> |
| 11 | #include <span> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "common/div_ceil.h" | ||
| 19 | #include "common/literals.h" | ||
| 20 | #include "common/lru_cache.h" | ||
| 21 | #include "common/microprofile.h" | ||
| 22 | #include "common/polyfill_ranges.h" | ||
| 23 | #include "common/scratch_buffer.h" | ||
| 24 | #include "common/settings.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 27 | #include "video_core/control/channel_state_cache.h" | ||
| 28 | #include "video_core/delayed_destruction_ring.h" | ||
| 29 | #include "video_core/dirty_flags.h" | ||
| 30 | #include "video_core/engines/draw_manager.h" | ||
| 31 | #include "video_core/engines/kepler_compute.h" | ||
| 32 | #include "video_core/engines/maxwell_3d.h" | ||
| 33 | #include "video_core/memory_manager.h" | ||
| 34 | #include "video_core/rasterizer_interface.h" | ||
| 35 | #include "video_core/surface.h" | ||
| 36 | #include "video_core/texture_cache/slot_vector.h" | ||
| 37 | #include "video_core/texture_cache/types.h" | ||
| 38 | 9 | ||
| 39 | namespace VideoCommon { | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 40 | |||
| 41 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 42 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 43 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 44 | |||
| 45 | using BufferId = SlotId; | ||
| 46 | |||
| 47 | using VideoCore::Surface::PixelFormat; | ||
| 48 | using namespace Common::Literals; | ||
| 49 | |||
| 50 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 51 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 52 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 53 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 54 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 55 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 56 | constexpr u32 NUM_STAGES = 5; | ||
| 57 | |||
| 58 | enum class ObtainBufferSynchronize : u32 { | ||
| 59 | NoSynchronize = 0, | ||
| 60 | FullSynchronize = 1, | ||
| 61 | SynchronizeNoDirty = 2, | ||
| 62 | }; | ||
| 63 | |||
| 64 | enum class ObtainBufferOperation : u32 { | ||
| 65 | DoNothing = 0, | ||
| 66 | MarkAsWritten = 1, | ||
| 67 | DiscardWrite = 2, | ||
| 68 | MarkQuery = 3, | ||
| 69 | }; | ||
| 70 | |||
| 71 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 72 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 73 | |||
| 74 | template <typename P> | ||
| 75 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 76 | |||
| 77 | // Page size for caching purposes. | ||
| 78 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 79 | static constexpr u32 YUZU_PAGEBITS = 16; | ||
| 80 | static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS; | ||
| 81 | |||
| 82 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 83 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 84 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 85 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 86 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 87 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 88 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 89 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 90 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 91 | |||
| 92 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 93 | |||
| 94 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 95 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 96 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 97 | |||
| 98 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 99 | |||
| 100 | using Runtime = typename P::Runtime; | ||
| 101 | using Buffer = typename P::Buffer; | ||
| 102 | |||
| 103 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 104 | using IntervalType = typename IntervalSet::interval_type; | ||
| 105 | |||
| 106 | struct Empty {}; | ||
| 107 | |||
| 108 | struct OverlapResult { | ||
| 109 | std::vector<BufferId> ids; | ||
| 110 | VAddr begin; | ||
| 111 | VAddr end; | ||
| 112 | bool has_stream_leap = false; | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct Binding { | ||
| 116 | VAddr cpu_addr{}; | ||
| 117 | u32 size{}; | ||
| 118 | BufferId buffer_id; | ||
| 119 | }; | ||
| 120 | |||
| 121 | struct TextureBufferBinding : Binding { | ||
| 122 | PixelFormat format; | ||
| 123 | }; | ||
| 124 | |||
| 125 | static constexpr Binding NULL_BINDING{ | ||
| 126 | .cpu_addr = 0, | ||
| 127 | .size = 0, | ||
| 128 | .buffer_id = NULL_BUFFER_ID, | ||
| 129 | }; | ||
| 130 | |||
| 131 | public: | ||
| 132 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 133 | |||
| 134 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 135 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 136 | |||
| 137 | void TickFrame(); | ||
| 138 | |||
| 139 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 140 | |||
| 141 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 142 | |||
| 143 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 144 | |||
| 145 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 146 | |||
| 147 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 148 | |||
| 149 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 150 | |||
| 151 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 152 | |||
| 153 | void UpdateComputeBuffers(); | ||
| 154 | |||
| 155 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 156 | |||
| 157 | void BindHostStageBuffers(size_t stage); | ||
| 158 | |||
| 159 | void BindHostComputeBuffers(); | ||
| 160 | |||
| 161 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 162 | const UniformBufferSizes* sizes); | ||
| 163 | |||
| 164 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 165 | |||
| 166 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 167 | |||
| 168 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 169 | bool is_written); | ||
| 170 | |||
| 171 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 172 | |||
| 173 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 174 | PixelFormat format, bool is_written, bool is_image); | ||
| 175 | |||
| 176 | void UnbindComputeStorageBuffers(); | ||
| 177 | |||
| 178 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 179 | bool is_written); | ||
| 180 | |||
| 181 | void UnbindComputeTextureBuffers(); | ||
| 182 | |||
| 183 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 184 | bool is_written, bool is_image); | ||
| 185 | |||
| 186 | void FlushCachedWrites(); | ||
| 187 | |||
| 188 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 189 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 190 | |||
| 191 | void AccumulateFlushes(); | ||
| 192 | |||
| 193 | /// Return true when the caller should wait for async downloads | ||
| 194 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 195 | |||
| 196 | /// Commit asynchronous downloads | ||
| 197 | void CommitAsyncFlushes(); | ||
| 198 | void CommitAsyncFlushesHigh(); | ||
| 199 | |||
| 200 | /// Pop asynchronous downloads | ||
| 201 | void PopAsyncFlushes(); | ||
| 202 | |||
| 203 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 204 | |||
| 205 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 206 | |||
| 207 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 208 | ObtainBufferSynchronize sync_info, | ||
| 209 | ObtainBufferOperation post_op); | ||
| 210 | |||
| 211 | /// Return true when a CPU region is modified from the GPU | ||
| 212 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 213 | |||
| 214 | /// Return true when a region is registered on the cache | ||
| 215 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 216 | |||
| 217 | /// Return true when a CPU region is modified from the CPU | ||
| 218 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 219 | |||
| 220 | void SetDrawIndirect( | ||
| 221 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 222 | current_draw_indirect = current_draw_indirect_; | ||
| 223 | } | ||
| 224 | |||
| 225 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 226 | |||
| 227 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 228 | |||
| 229 | std::recursive_mutex mutex; | ||
| 230 | Runtime& runtime; | ||
| 231 | |||
| 232 | private: | ||
| 233 | template <typename Func> | ||
| 234 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 235 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 236 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 237 | index += disabled_bits; | ||
| 238 | enabled_mask >>= disabled_bits; | ||
| 239 | func(index); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename Func> | ||
| 244 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 245 | const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE); | ||
| 246 | for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 247 | const BufferId buffer_id = page_table[page]; | ||
| 248 | if (!buffer_id) { | ||
| 249 | ++page; | ||
| 250 | continue; | ||
| 251 | } | ||
| 252 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 253 | func(buffer_id, buffer); | ||
| 254 | |||
| 255 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 256 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | template <typename Func> | ||
| 261 | void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 262 | const VAddr start_address = cpu_addr; | ||
| 263 | const VAddr end_address = start_address + size; | ||
| 264 | const VAddr search_base = | ||
| 265 | static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size))); | ||
| 266 | const IntervalType search_interval{search_base, search_base + 1}; | ||
| 267 | auto it = common_ranges.lower_bound(search_interval); | ||
| 268 | if (it == common_ranges.end()) { | ||
| 269 | it = common_ranges.begin(); | ||
| 270 | } | ||
| 271 | for (; it != common_ranges.end(); it++) { | ||
| 272 | VAddr inter_addr_end = it->upper(); | ||
| 273 | VAddr inter_addr = it->lower(); | ||
| 274 | if (inter_addr >= end_address) { | ||
| 275 | break; | ||
| 276 | } | ||
| 277 | if (inter_addr_end <= start_address) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | if (inter_addr_end > end_address) { | ||
| 281 | inter_addr_end = end_address; | ||
| 282 | } | ||
| 283 | if (inter_addr < start_address) { | ||
| 284 | inter_addr = start_address; | ||
| 285 | } | ||
| 286 | func(inter_addr, inter_addr_end); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 291 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 292 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 293 | } | ||
| 294 | |||
| 295 | void RunGarbageCollector(); | ||
| 296 | |||
| 297 | void BindHostIndexBuffer(); | ||
| 298 | |||
| 299 | void BindHostVertexBuffers(); | ||
| 300 | |||
| 301 | void BindHostDrawIndirectBuffers(); | ||
| 302 | |||
| 303 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 304 | |||
| 305 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 306 | |||
| 307 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 308 | |||
| 309 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 310 | |||
| 311 | void BindHostTransformFeedbackBuffers(); | ||
| 312 | |||
| 313 | void BindHostComputeUniformBuffers(); | ||
| 314 | |||
| 315 | void BindHostComputeStorageBuffers(); | ||
| 316 | |||
| 317 | void BindHostComputeTextureBuffers(); | ||
| 318 | |||
| 319 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 320 | |||
| 321 | void DoUpdateComputeBuffers(); | ||
| 322 | |||
| 323 | void UpdateIndexBuffer(); | ||
| 324 | |||
| 325 | void UpdateVertexBuffers(); | ||
| 326 | |||
| 327 | void UpdateVertexBuffer(u32 index); | ||
| 328 | |||
| 329 | void UpdateDrawIndirect(); | ||
| 330 | |||
| 331 | void UpdateUniformBuffers(size_t stage); | ||
| 332 | |||
| 333 | void UpdateStorageBuffers(size_t stage); | ||
| 334 | |||
| 335 | void UpdateTextureBuffers(size_t stage); | ||
| 336 | |||
| 337 | void UpdateTransformFeedbackBuffers(); | ||
| 338 | |||
| 339 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 340 | |||
| 341 | void UpdateComputeUniformBuffers(); | ||
| 342 | |||
| 343 | void UpdateComputeStorageBuffers(); | ||
| 344 | |||
| 345 | void UpdateComputeTextureBuffers(); | ||
| 346 | |||
| 347 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 348 | |||
| 349 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 350 | |||
| 351 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 352 | |||
| 353 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 354 | |||
| 355 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 356 | |||
| 357 | void Register(BufferId buffer_id); | ||
| 358 | |||
| 359 | void Unregister(BufferId buffer_id); | ||
| 360 | |||
| 361 | template <bool insert> | ||
| 362 | void ChangeRegister(BufferId buffer_id); | ||
| 363 | |||
| 364 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 365 | |||
| 366 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 367 | |||
| 368 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 369 | |||
| 370 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 371 | std::span<BufferCopy> copies); | ||
| 372 | |||
| 373 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 374 | std::span<const BufferCopy> copies); | ||
| 375 | |||
| 376 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 377 | |||
| 378 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 379 | |||
| 380 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 381 | |||
| 382 | void DeleteBuffer(BufferId buffer_id); | ||
| 383 | |||
| 384 | void NotifyBufferDeletion(); | ||
| 385 | |||
| 386 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 387 | bool is_written = false) const; | ||
| 388 | |||
| 389 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 390 | PixelFormat format); | ||
| 391 | |||
| 392 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 393 | |||
| 394 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 395 | |||
| 396 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 397 | |||
| 398 | void ClearDownload(IntervalType subtract_interval); | ||
| 399 | |||
| 400 | VideoCore::RasterizerInterface& rasterizer; | ||
| 401 | Core::Memory::Memory& cpu_memory; | ||
| 402 | |||
| 403 | SlotVector<Buffer> slot_buffers; | ||
| 404 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 405 | |||
| 406 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 407 | |||
| 408 | u32 last_index_count = 0; | ||
| 409 | |||
| 410 | Binding index_buffer; | ||
| 411 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 412 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 413 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 414 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 415 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 416 | Binding count_buffer_binding; | ||
| 417 | Binding indirect_buffer_binding; | ||
| 418 | |||
| 419 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 420 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 421 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 422 | |||
| 423 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 424 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 425 | |||
| 426 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 427 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 428 | |||
| 429 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 430 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 431 | u32 enabled_compute_storage_buffers = 0; | ||
| 432 | u32 written_compute_storage_buffers = 0; | ||
| 433 | |||
| 434 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 435 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 436 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 437 | u32 enabled_compute_texture_buffers = 0; | ||
| 438 | u32 written_compute_texture_buffers = 0; | ||
| 439 | u32 image_compute_texture_buffers = 0; | ||
| 440 | |||
| 441 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 442 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 443 | |||
| 444 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 445 | |||
| 446 | bool has_deleted_buffers = false; | ||
| 447 | 11 | ||
| 448 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | 12 | namespace VideoCommon { |
| 449 | dirty_uniform_buffers{}; | ||
| 450 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 451 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 452 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 453 | uniform_buffer_binding_sizes{}; | ||
| 454 | |||
| 455 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 456 | |||
| 457 | IntervalSet uncommitted_ranges; | ||
| 458 | IntervalSet common_ranges; | ||
| 459 | std::deque<IntervalSet> committed_ranges; | ||
| 460 | |||
| 461 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 462 | |||
| 463 | struct LRUItemParams { | ||
| 464 | using ObjectType = BufferId; | ||
| 465 | using TickType = u64; | ||
| 466 | }; | ||
| 467 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 468 | u64 frame_tick = 0; | ||
| 469 | u64 total_used_memory = 0; | ||
| 470 | u64 minimum_memory = 0; | ||
| 471 | u64 critical_memory = 0; | ||
| 472 | 13 | ||
| 473 | std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; | 14 | using Core::Memory::YUZU_PAGESIZE; |
| 474 | }; | ||
| 475 | 15 | ||
| 476 | template <class P> | 16 | template <class P> |
| 477 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 478 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| 479 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { | 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ |
| 20 | rasterizer} { | ||
| 480 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 481 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 482 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| 24 | inline_buffer_id = NULL_BUFFER_ID; | ||
| 25 | |||
| 26 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 483 | 27 | ||
| 484 | if (!runtime.CanReportMemoryUsage()) { | 28 | if (!runtime.CanReportMemoryUsage()) { |
| 485 | minimum_memory = DEFAULT_EXPECTED_MEMORY; | 29 | minimum_memory = DEFAULT_EXPECTED_MEMORY; |
| @@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() { | |||
| 531 | uniform_cache_hits[0] = 0; | 75 | uniform_cache_hits[0] = 0; |
| 532 | uniform_cache_shots[0] = 0; | 76 | uniform_cache_shots[0] = 0; |
| 533 | 77 | ||
| 78 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 79 | |||
| 534 | const bool skip_preferred = hits * 256 < shots * 251; | 80 | const bool skip_preferred = hits * 256 < shots * 251; |
| 535 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 81 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 536 | 82 | ||
| @@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() { | |||
| 543 | } | 89 | } |
| 544 | ++frame_tick; | 90 | ++frame_tick; |
| 545 | delayed_destruction_ring.Tick(); | 91 | delayed_destruction_ring.Tick(); |
| 92 | |||
| 93 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 94 | for (auto& buffer : async_buffers_death_ring) { | ||
| 95 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 96 | } | ||
| 97 | async_buffers_death_ring.clear(); | ||
| 98 | } | ||
| 546 | } | 99 | } |
| 547 | 100 | ||
| 548 | template <class P> | 101 | template <class P> |
| 549 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 102 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { |
| 550 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 103 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); |
| 551 | buffer.MarkRegionAsCpuModified(cpu_addr, size); | 104 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { |
| 552 | }); | 105 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; |
| 106 | ClearDownload(subtract_interval); | ||
| 107 | common_ranges.subtract(subtract_interval); | ||
| 108 | } | ||
| 553 | } | 109 | } |
| 554 | 110 | ||
| 555 | template <class P> | 111 | template <class P> |
| 556 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 112 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { |
| 557 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 113 | memory_tracker.CachedCpuWrite(cpu_addr, size); |
| 558 | if (!buffer.HasCachedWrites()) { | 114 | const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE), |
| 559 | cached_write_buffer_ids.push_back(buffer_id); | 115 | Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)}; |
| 560 | } | 116 | cached_ranges.add(add_interval); |
| 561 | buffer.CachedCpuWrite(cpu_addr, size); | ||
| 562 | }); | ||
| 563 | } | 117 | } |
| 564 | 118 | ||
| 565 | template <class P> | 119 | template <class P> |
| 566 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 120 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 121 | WaitOnAsyncFlushes(cpu_addr, size); | ||
| 567 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 122 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { |
| 568 | DownloadBufferMemory(buffer, cpu_addr, size); | 123 | DownloadBufferMemory(buffer, cpu_addr, size); |
| 569 | }); | 124 | }); |
| 570 | } | 125 | } |
| 571 | 126 | ||
| 572 | template <class P> | 127 | template <class P> |
| 128 | void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) { | ||
| 129 | bool must_wait = false; | ||
| 130 | ForEachInOverlapCounter(async_downloads, cpu_addr, size, | ||
| 131 | [&](VAddr, VAddr, int) { must_wait = true; }); | ||
| 132 | bool must_release = false; | ||
| 133 | ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; }); | ||
| 134 | if (must_release) { | ||
| 135 | std::function<void()> tmp([]() {}); | ||
| 136 | rasterizer.SignalFence(std::move(tmp)); | ||
| 137 | } | ||
| 138 | if (must_wait || must_release) { | ||
| 139 | rasterizer.ReleaseFences(); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | template <class P> | ||
| 573 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | 144 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { |
| 145 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); | ||
| 574 | uncommitted_ranges.subtract(subtract_interval); | 146 | uncommitted_ranges.subtract(subtract_interval); |
| 147 | pending_ranges.subtract(subtract_interval); | ||
| 575 | for (auto& interval_set : committed_ranges) { | 148 | for (auto& interval_set : committed_ranges) { |
| 576 | interval_set.subtract(subtract_interval); | 149 | interval_set.subtract(subtract_interval); |
| 577 | } | 150 | } |
| @@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 591 | } | 164 | } |
| 592 | 165 | ||
| 593 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 166 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 167 | WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount)); | ||
| 594 | ClearDownload(subtract_interval); | 168 | ClearDownload(subtract_interval); |
| 595 | 169 | ||
| 596 | BufferId buffer_a; | 170 | BufferId buffer_a; |
| @@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 616 | const VAddr diff = base_address - *cpu_src_address; | 190 | const VAddr diff = base_address - *cpu_src_address; |
| 617 | const VAddr new_base_address = *cpu_dest_address + diff; | 191 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 618 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 192 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 619 | uncommitted_ranges.add(add_interval); | ||
| 620 | tmp_intervals.push_back(add_interval); | 193 | tmp_intervals.push_back(add_interval); |
| 194 | uncommitted_ranges.add(add_interval); | ||
| 195 | pending_ranges.add(add_interval); | ||
| 621 | }; | 196 | }; |
| 622 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 197 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); |
| 623 | // This subtraction in this order is important for overlapping copies. | 198 | // This subtraction in this order is important for overlapping copies. |
| 624 | common_ranges.subtract(subtract_interval); | 199 | common_ranges.subtract(subtract_interval); |
| 625 | const bool has_new_downloads = tmp_intervals.size() != 0; | 200 | const bool has_new_downloads = tmp_intervals.size() != 0; |
| @@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 628 | } | 203 | } |
| 629 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 204 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 630 | if (has_new_downloads) { | 205 | if (has_new_downloads) { |
| 631 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 206 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 632 | } | 207 | } |
| 633 | std::vector<u8> tmp_buffer(amount); | 208 | std::vector<u8> tmp_buffer(amount); |
| 634 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 209 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |
| @@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 866 | 441 | ||
| 867 | template <class P> | 442 | template <class P> |
| 868 | void BufferCache<P>::FlushCachedWrites() { | 443 | void BufferCache<P>::FlushCachedWrites() { |
| 869 | for (const BufferId buffer_id : cached_write_buffer_ids) { | ||
| 870 | slot_buffers[buffer_id].FlushCachedWrites(); | ||
| 871 | } | ||
| 872 | cached_write_buffer_ids.clear(); | 444 | cached_write_buffer_ids.clear(); |
| 445 | memory_tracker.FlushCachedWrites(); | ||
| 446 | cached_ranges.clear(); | ||
| 873 | } | 447 | } |
| 874 | 448 | ||
| 875 | template <class P> | 449 | template <class P> |
| @@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 879 | 453 | ||
| 880 | template <class P> | 454 | template <class P> |
| 881 | void BufferCache<P>::AccumulateFlushes() { | 455 | void BufferCache<P>::AccumulateFlushes() { |
| 882 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 883 | uncommitted_ranges.clear(); | ||
| 884 | return; | ||
| 885 | } | ||
| 886 | if (uncommitted_ranges.empty()) { | 456 | if (uncommitted_ranges.empty()) { |
| 887 | return; | 457 | return; |
| 888 | } | 458 | } |
| @@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() { | |||
| 891 | 461 | ||
| 892 | template <class P> | 462 | template <class P> |
| 893 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 463 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 894 | return false; | 464 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 465 | return (!async_buffers.empty() && async_buffers.front().has_value()); | ||
| 466 | } else { | ||
| 467 | return false; | ||
| 468 | } | ||
| 895 | } | 469 | } |
| 896 | 470 | ||
| 897 | template <class P> | 471 | template <class P> |
| @@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 899 | AccumulateFlushes(); | 473 | AccumulateFlushes(); |
| 900 | 474 | ||
| 901 | if (committed_ranges.empty()) { | 475 | if (committed_ranges.empty()) { |
| 476 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 477 | if (active_async_buffers) { | ||
| 478 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 479 | } | ||
| 480 | } | ||
| 902 | return; | 481 | return; |
| 903 | } | 482 | } |
| 904 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 483 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 905 | const bool is_accuracy_normal = | ||
| 906 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 907 | 484 | ||
| 485 | pending_ranges.clear(); | ||
| 908 | auto it = committed_ranges.begin(); | 486 | auto it = committed_ranges.begin(); |
| 909 | while (it != committed_ranges.end()) { | 487 | while (it != committed_ranges.end()) { |
| 910 | auto& current_intervals = *it; | 488 | auto& current_intervals = *it; |
| @@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 926 | const std::size_t size = interval.upper() - interval.lower(); | 504 | const std::size_t size = interval.upper() - interval.lower(); |
| 927 | const VAddr cpu_addr = interval.lower(); | 505 | const VAddr cpu_addr = interval.lower(); |
| 928 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 506 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 929 | buffer.ForEachDownloadRangeAndClear( | 507 | const VAddr buffer_start = buffer.CpuAddr(); |
| 930 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 508 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 931 | if (is_accuracy_normal) { | 509 | const VAddr new_start = std::max(buffer_start, cpu_addr); |
| 932 | return; | 510 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); |
| 933 | } | 511 | memory_tracker.ForEachDownloadRange( |
| 512 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 934 | const VAddr buffer_addr = buffer.CpuAddr(); | 513 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 935 | const auto add_download = [&](VAddr start, VAddr end) { | 514 | const auto add_download = [&](VAddr start, VAddr end) { |
| 936 | const u64 new_offset = start - buffer_addr; | 515 | const u64 new_offset = start - buffer_addr; |
| @@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 944 | buffer_id, | 523 | buffer_id, |
| 945 | }); | 524 | }); |
| 946 | // Align up to avoid cache conflicts | 525 | // Align up to avoid cache conflicts |
| 947 | constexpr u64 align = 8ULL; | 526 | constexpr u64 align = 64ULL; |
| 948 | constexpr u64 mask = ~(align - 1ULL); | 527 | constexpr u64 mask = ~(align - 1ULL); |
| 949 | total_size_bytes += (new_size + align - 1) & mask; | 528 | total_size_bytes += (new_size + align - 1) & mask; |
| 950 | largest_copy = std::max(largest_copy, new_size); | 529 | largest_copy = std::max(largest_copy, new_size); |
| 951 | }; | 530 | }; |
| 952 | 531 | ||
| 953 | const VAddr start_address = buffer_addr + range_offset; | 532 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); |
| 954 | const VAddr end_address = start_address + range_size; | ||
| 955 | ForEachWrittenRange(start_address, range_size, add_download); | ||
| 956 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 957 | common_ranges.subtract(subtract_interval); | ||
| 958 | }); | 533 | }); |
| 959 | }); | 534 | }); |
| 960 | } | 535 | } |
| 961 | } | 536 | } |
| 962 | committed_ranges.clear(); | 537 | committed_ranges.clear(); |
| 963 | if (downloads.empty()) { | 538 | if (downloads.empty()) { |
| 539 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 540 | if (active_async_buffers) { | ||
| 541 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 542 | } | ||
| 543 | } | ||
| 964 | return; | 544 | return; |
| 965 | } | 545 | } |
| 966 | if constexpr (USE_MEMORY_MAPS) { | 546 | if (active_async_buffers) { |
| 967 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 547 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 968 | runtime.PreCopyBarrier(); | 548 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 969 | for (auto& [copy, buffer_id] : downloads) { | 549 | boost::container::small_vector<BufferCopy, 4> normalized_copies; |
| 970 | // Have in mind the staging buffer offset for the copy | 550 | IntervalSet new_async_range{}; |
| 971 | copy.dst_offset += download_staging.offset; | 551 | runtime.PreCopyBarrier(); |
| 972 | const std::array copies{copy}; | 552 | for (auto& [copy, buffer_id] : downloads) { |
| 973 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | 553 | copy.dst_offset += download_staging.offset; |
| 974 | } | 554 | const std::array copies{copy}; |
| 975 | runtime.PostCopyBarrier(); | 555 | BufferCopy second_copy{copy}; |
| 976 | runtime.Finish(); | 556 | Buffer& buffer = slot_buffers[buffer_id]; |
| 977 | for (const auto& [copy, buffer_id] : downloads) { | 557 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 978 | const Buffer& buffer = slot_buffers[buffer_id]; | 558 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 979 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 559 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 980 | // Undo the modified offset | 560 | async_downloads += std::make_pair(base_interval, 1); |
| 981 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 561 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 982 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 562 | normalized_copies.push_back(second_copy); |
| 983 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 563 | } |
| 564 | runtime.PostCopyBarrier(); | ||
| 565 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 566 | async_buffers.emplace_back(download_staging); | ||
| 567 | } else { | ||
| 568 | committed_ranges.clear(); | ||
| 569 | uncommitted_ranges.clear(); | ||
| 984 | } | 570 | } |
| 985 | } else { | 571 | } else { |
| 986 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 572 | if constexpr (USE_MEMORY_MAPS) { |
| 987 | for (const auto& [copy, buffer_id] : downloads) { | 573 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 988 | Buffer& buffer = slot_buffers[buffer_id]; | 574 | runtime.PreCopyBarrier(); |
| 989 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 575 | for (auto& [copy, buffer_id] : downloads) { |
| 990 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 576 | // Have in mind the staging buffer offset for the copy |
| 991 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 577 | copy.dst_offset += download_staging.offset; |
| 578 | const std::array copies{copy}; | ||
| 579 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | ||
| 580 | } | ||
| 581 | runtime.PostCopyBarrier(); | ||
| 582 | runtime.Finish(); | ||
| 583 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 584 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 585 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 586 | // Undo the modified offset | ||
| 587 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 588 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 589 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 593 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 594 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 595 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 596 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 597 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 598 | } | ||
| 992 | } | 599 | } |
| 993 | } | 600 | } |
| 994 | } | 601 | } |
| 995 | 602 | ||
| 996 | template <class P> | 603 | template <class P> |
| 997 | void BufferCache<P>::CommitAsyncFlushes() { | 604 | void BufferCache<P>::CommitAsyncFlushes() { |
| 998 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | 605 | CommitAsyncFlushesHigh(); |
| 999 | CommitAsyncFlushesHigh(); | ||
| 1000 | } else { | ||
| 1001 | uncommitted_ranges.clear(); | ||
| 1002 | committed_ranges.clear(); | ||
| 1003 | } | ||
| 1004 | } | 606 | } |
| 1005 | 607 | ||
| 1006 | template <class P> | 608 | template <class P> |
| 1007 | void BufferCache<P>::PopAsyncFlushes() {} | 609 | void BufferCache<P>::PopAsyncFlushes() { |
| 610 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 611 | PopAsyncBuffers(); | ||
| 612 | } | ||
| 1008 | 613 | ||
| 1009 | template <class P> | 614 | template <class P> |
| 1010 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 615 | void BufferCache<P>::PopAsyncBuffers() { |
| 1011 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 616 | if (async_buffers.empty()) { |
| 1012 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 617 | return; |
| 1013 | const BufferId image_id = page_table[page]; | 618 | } |
| 1014 | if (!image_id) { | 619 | if (!async_buffers.front().has_value()) { |
| 1015 | ++page; | 620 | async_buffers.pop_front(); |
| 1016 | continue; | 621 | return; |
| 1017 | } | 622 | } |
| 1018 | Buffer& buffer = slot_buffers[image_id]; | 623 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 1019 | if (buffer.IsRegionGpuModified(addr, size)) { | 624 | auto& downloads = pending_downloads.front(); |
| 1020 | return true; | 625 | auto& async_buffer = async_buffers.front(); |
| 626 | u8* base = async_buffer->mapped_span.data(); | ||
| 627 | const size_t base_offset = async_buffer->offset; | ||
| 628 | for (const auto& copy : downloads) { | ||
| 629 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | ||
| 630 | const u64 dst_offset = copy.dst_offset - base_offset; | ||
| 631 | const u8* read_mapped_memory = base + dst_offset; | ||
| 632 | ForEachInOverlapCounter( | ||
| 633 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | ||
| 634 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | ||
| 635 | end - start); | ||
| 636 | if (count == 1) { | ||
| 637 | const IntervalType base_interval{start, end}; | ||
| 638 | common_ranges.subtract(base_interval); | ||
| 639 | } | ||
| 640 | }); | ||
| 641 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | ||
| 642 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | ||
| 1021 | } | 643 | } |
| 1022 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 644 | async_buffers_death_ring.emplace_back(*async_buffer); |
| 1023 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 645 | async_buffers.pop_front(); |
| 646 | pending_downloads.pop_front(); | ||
| 1024 | } | 647 | } |
| 1025 | return false; | 648 | } |
| 649 | |||
| 650 | template <class P> | ||
| 651 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 652 | bool is_dirty = false; | ||
| 653 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | ||
| 654 | return is_dirty; | ||
| 1026 | } | 655 | } |
| 1027 | 656 | ||
| 1028 | template <class P> | 657 | template <class P> |
| 1029 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 658 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { |
| 1030 | const VAddr end_addr = addr + size; | 659 | const VAddr end_addr = addr + size; |
| 1031 | const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 660 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1032 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 661 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 1033 | const BufferId buffer_id = page_table[page]; | 662 | const BufferId buffer_id = page_table[page]; |
| 1034 | if (!buffer_id) { | 663 | if (!buffer_id) { |
| 1035 | ++page; | 664 | ++page; |
| @@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 1041 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 670 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 1042 | return true; | 671 | return true; |
| 1043 | } | 672 | } |
| 1044 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 673 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1045 | } | 674 | } |
| 1046 | return false; | 675 | return false; |
| 1047 | } | 676 | } |
| 1048 | 677 | ||
| 1049 | template <class P> | 678 | template <class P> |
| 1050 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 679 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { |
| 1051 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 680 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 1052 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 1053 | const BufferId image_id = page_table[page]; | ||
| 1054 | if (!image_id) { | ||
| 1055 | ++page; | ||
| 1056 | continue; | ||
| 1057 | } | ||
| 1058 | Buffer& buffer = slot_buffers[image_id]; | ||
| 1059 | if (buffer.IsRegionCpuModified(addr, size)) { | ||
| 1060 | return true; | ||
| 1061 | } | ||
| 1062 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 1063 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 1064 | } | ||
| 1065 | return false; | ||
| 1066 | } | 681 | } |
| 1067 | 682 | ||
| 1068 | template <class P> | 683 | template <class P> |
| @@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 1072 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 687 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 1073 | const u32 size = index_buffer.size; | 688 | const u32 size = index_buffer.size; |
| 1074 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 689 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1075 | if (!draw_state.inline_index_draw_indexes.empty()) { | 690 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1076 | if constexpr (USE_MEMORY_MAPS) { | 691 | if constexpr (USE_MEMORY_MAPS) { |
| 1077 | auto upload_staging = runtime.UploadStagingBuffer(size); | 692 | auto upload_staging = runtime.UploadStagingBuffer(size); |
| 1078 | std::array<BufferCopy, 1> copies{ | 693 | std::array<BufferCopy, 1> copies{ |
| @@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 1155 | TouchBuffer(buffer, binding.buffer_id); | 770 | TouchBuffer(buffer, binding.buffer_id); |
| 1156 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 771 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 1157 | size <= uniform_buffer_skip_cache_size && | 772 | size <= uniform_buffer_skip_cache_size && |
| 1158 | !buffer.IsRegionGpuModified(cpu_addr, size); | 773 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); |
| 1159 | if (use_fast_buffer) { | 774 | if (use_fast_buffer) { |
| 1160 | if constexpr (IS_OPENGL) { | 775 | if constexpr (IS_OPENGL) { |
| 1161 | if (runtime.HasFastBufferSubData()) { | 776 | if (runtime.HasFastBufferSubData()) { |
| @@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1378 | // We have to check for the dirty flags and index count | 993 | // We have to check for the dirty flags and index count |
| 1379 | // The index count is currently changed without updating the dirty flags | 994 | // The index count is currently changed without updating the dirty flags |
| 1380 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 995 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1381 | const auto& index_array = draw_state.index_buffer; | 996 | const auto& index_buffer_ref = draw_state.index_buffer; |
| 1382 | auto& flags = maxwell3d->dirty.flags; | 997 | auto& flags = maxwell3d->dirty.flags; |
| 1383 | if (!flags[Dirty::IndexBuffer]) { | 998 | if (!flags[Dirty::IndexBuffer]) { |
| 1384 | return; | 999 | return; |
| 1385 | } | 1000 | } |
| 1386 | flags[Dirty::IndexBuffer] = false; | 1001 | flags[Dirty::IndexBuffer] = false; |
| 1387 | last_index_count = index_array.count; | 1002 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1388 | if (!draw_state.inline_index_draw_indexes.empty()) { | ||
| 1389 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); | 1003 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); |
| 1004 | u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE); | ||
| 1005 | if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] { | ||
| 1006 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1007 | } | ||
| 1008 | if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] { | ||
| 1009 | slot_buffers.erase(inline_buffer_id); | ||
| 1010 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1011 | } | ||
| 1390 | index_buffer = Binding{ | 1012 | index_buffer = Binding{ |
| 1391 | .cpu_addr = 0, | 1013 | .cpu_addr = 0, |
| 1392 | .size = inline_index_size, | 1014 | .size = inline_index_size, |
| 1393 | .buffer_id = CreateBuffer(0, inline_index_size), | 1015 | .buffer_id = inline_buffer_id, |
| 1394 | }; | 1016 | }; |
| 1395 | return; | 1017 | return; |
| 1396 | } | 1018 | } |
| 1397 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | 1019 | |
| 1398 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1020 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1021 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | ||
| 1399 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1022 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1400 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1023 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1401 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | 1024 | const u32 draw_size = |
| 1025 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | ||
| 1402 | const u32 size = std::min(address_size, draw_size); | 1026 | const u32 size = std::min(address_size, draw_size); |
| 1403 | if (size == 0 || !cpu_addr) { | 1027 | if (size == 0 || !cpu_addr) { |
| 1404 | index_buffer = NULL_BINDING; | 1028 | index_buffer = NULL_BINDING; |
| @@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1434 | const GPUVAddr gpu_addr_begin = array.Address(); | 1058 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1435 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1059 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1436 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1060 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1437 | u32 address_size = static_cast<u32>( | 1061 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1438 | std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); | 1062 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1439 | if (array.enable == 0 || address_size == 0 || !cpu_addr) { | 1063 | if (array.enable == 0 || size == 0 || !cpu_addr) { |
| 1440 | vertex_buffers[index] = NULL_BINDING; | 1064 | vertex_buffers[index] = NULL_BINDING; |
| 1441 | return; | 1065 | return; |
| 1442 | } | 1066 | } |
| 1443 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1067 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1444 | address_size = | 1068 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1445 | static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size)); | ||
| 1446 | } | 1069 | } |
| 1447 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 1448 | vertex_buffers[index] = Binding{ | 1070 | vertex_buffers[index] = Binding{ |
| 1449 | .cpu_addr = *cpu_addr, | 1071 | .cpu_addr = *cpu_addr, |
| 1450 | .size = size, | 1072 | .size = size, |
| @@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { | |||
| 1591 | 1213 | ||
| 1592 | template <class P> | 1214 | template <class P> |
| 1593 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1215 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { |
| 1594 | Buffer& buffer = slot_buffers[buffer_id]; | 1216 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); |
| 1595 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1217 | |
| 1218 | if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { | ||
| 1219 | SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); | ||
| 1220 | } | ||
| 1596 | 1221 | ||
| 1597 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1222 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
| 1598 | common_ranges.add(base_interval); | 1223 | common_ranges.add(base_interval); |
| 1599 | |||
| 1600 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 1601 | if (!is_async) { | ||
| 1602 | return; | ||
| 1603 | } | ||
| 1604 | uncommitted_ranges.add(base_interval); | 1224 | uncommitted_ranges.add(base_interval); |
| 1225 | pending_ranges.add(base_interval); | ||
| 1605 | } | 1226 | } |
| 1606 | 1227 | ||
| 1607 | template <class P> | 1228 | template <class P> |
| @@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | |||
| 1609 | if (cpu_addr == 0) { | 1230 | if (cpu_addr == 0) { |
| 1610 | return NULL_BUFFER_ID; | 1231 | return NULL_BUFFER_ID; |
| 1611 | } | 1232 | } |
| 1612 | const u64 page = cpu_addr >> YUZU_PAGEBITS; | 1233 | const u64 page = cpu_addr >> CACHING_PAGEBITS; |
| 1613 | const BufferId buffer_id = page_table[page]; | 1234 | const BufferId buffer_id = page_table[page]; |
| 1614 | if (!buffer_id) { | 1235 | if (!buffer_id) { |
| 1615 | return CreateBuffer(cpu_addr, size); | 1236 | return CreateBuffer(cpu_addr, size); |
| @@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1638 | .has_stream_leap = has_stream_leap, | 1259 | .has_stream_leap = has_stream_leap, |
| 1639 | }; | 1260 | }; |
| 1640 | } | 1261 | } |
| 1641 | for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); | 1262 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1642 | cpu_addr += YUZU_PAGESIZE) { | 1263 | cpu_addr += CACHING_PAGESIZE) { |
| 1643 | const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; | 1264 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; |
| 1644 | if (!overlap_id) { | 1265 | if (!overlap_id) { |
| 1645 | continue; | 1266 | continue; |
| 1646 | } | 1267 | } |
| @@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1666 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1287 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1667 | has_stream_leap = true; | 1288 | has_stream_leap = true; |
| 1668 | if (expands_right) { | 1289 | if (expands_right) { |
| 1669 | begin -= YUZU_PAGESIZE * 256; | 1290 | begin -= CACHING_PAGESIZE * 256; |
| 1670 | cpu_addr = begin; | 1291 | cpu_addr = begin; |
| 1671 | } | 1292 | } |
| 1672 | if (expands_left) { | 1293 | if (expands_left) { |
| 1673 | end += YUZU_PAGESIZE * 256; | 1294 | end += CACHING_PAGESIZE * 256; |
| 1674 | } | 1295 | } |
| 1675 | } | 1296 | } |
| 1676 | } | 1297 | } |
| @@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1690 | if (accumulate_stream_score) { | 1311 | if (accumulate_stream_score) { |
| 1691 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); | 1312 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); |
| 1692 | } | 1313 | } |
| 1693 | std::vector<BufferCopy> copies; | 1314 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1694 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); | 1315 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); |
| 1695 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { | 1316 | copies.push_back(BufferCopy{ |
| 1696 | copies.push_back(BufferCopy{ | 1317 | .src_offset = 0, |
| 1697 | .src_offset = begin, | 1318 | .dst_offset = dst_base_offset, |
| 1698 | .dst_offset = dst_base_offset + begin, | 1319 | .size = overlap.SizeBytes(), |
| 1699 | .size = range_size, | ||
| 1700 | }); | ||
| 1701 | new_buffer.UnmarkRegionAsCpuModified(begin, range_size); | ||
| 1702 | new_buffer.MarkRegionAsGpuModified(begin, range_size); | ||
| 1703 | }); | 1320 | }); |
| 1704 | if (!copies.empty()) { | 1321 | runtime.CopyBuffer(new_buffer, overlap, copies); |
| 1705 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | 1322 | DeleteBuffer(overlap_id, true); |
| 1706 | } | ||
| 1707 | DeleteBuffer(overlap_id); | ||
| 1708 | } | 1323 | } |
| 1709 | 1324 | ||
| 1710 | template <class P> | 1325 | template <class P> |
| 1711 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1326 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { |
| 1327 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | ||
| 1328 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | ||
| 1329 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | ||
| 1712 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1330 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1713 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1331 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1714 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1332 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| @@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1718 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1336 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1719 | } | 1337 | } |
| 1720 | Register(new_buffer_id); | 1338 | Register(new_buffer_id); |
| 1721 | TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); | 1339 | TouchBuffer(new_buffer, new_buffer_id); |
| 1722 | return new_buffer_id; | 1340 | return new_buffer_id; |
| 1723 | } | 1341 | } |
| 1724 | 1342 | ||
| @@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1746 | } | 1364 | } |
| 1747 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1365 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1748 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1366 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| 1749 | const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; | 1367 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; |
| 1750 | const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); | 1368 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); |
| 1751 | for (u64 page = page_begin; page != page_end; ++page) { | 1369 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1752 | if constexpr (insert) { | 1370 | if constexpr (insert) { |
| 1753 | page_table[page] = buffer_id; | 1371 | page_table[page] = buffer_id; |
| @@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1766 | 1384 | ||
| 1767 | template <class P> | 1385 | template <class P> |
| 1768 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1386 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1769 | if (buffer.CpuAddr() == 0) { | ||
| 1770 | return true; | ||
| 1771 | } | ||
| 1772 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | 1387 | return SynchronizeBufferImpl(buffer, cpu_addr, size); |
| 1773 | } | 1388 | } |
| 1774 | 1389 | ||
| @@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1777 | boost::container::small_vector<BufferCopy, 4> copies; | 1392 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1778 | u64 total_size_bytes = 0; | 1393 | u64 total_size_bytes = 0; |
| 1779 | u64 largest_copy = 0; | 1394 | u64 largest_copy = 0; |
| 1780 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1395 | VAddr buffer_start = buffer.CpuAddr(); |
| 1396 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1781 | copies.push_back(BufferCopy{ | 1397 | copies.push_back(BufferCopy{ |
| 1782 | .src_offset = total_size_bytes, | 1398 | .src_offset = total_size_bytes, |
| 1783 | .dst_offset = range_offset, | 1399 | .dst_offset = cpu_addr_out - buffer_start, |
| 1784 | .size = range_size, | 1400 | .size = range_size, |
| 1785 | }); | 1401 | }); |
| 1786 | total_size_bytes += range_size; | 1402 | total_size_bytes += range_size; |
| @@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1795 | } | 1411 | } |
| 1796 | 1412 | ||
| 1797 | template <class P> | 1413 | template <class P> |
| 1414 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1415 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1416 | u64 total_size_bytes = 0; | ||
| 1417 | u64 largest_copy = 0; | ||
| 1418 | IntervalSet found_sets{}; | ||
| 1419 | auto make_copies = [&] { | ||
| 1420 | for (auto& interval : found_sets) { | ||
| 1421 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1422 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1423 | copies.push_back(BufferCopy{ | ||
| 1424 | .src_offset = total_size_bytes, | ||
| 1425 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1426 | .size = sub_size, | ||
| 1427 | }); | ||
| 1428 | total_size_bytes += sub_size; | ||
| 1429 | largest_copy = std::max<u64>(largest_copy, sub_size); | ||
| 1430 | } | ||
| 1431 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1432 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1433 | }; | ||
| 1434 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1435 | const VAddr base_adr = cpu_addr_out; | ||
| 1436 | const VAddr end_adr = base_adr + range_size; | ||
| 1437 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1438 | found_sets.add(add_interval); | ||
| 1439 | }); | ||
| 1440 | if (found_sets.empty()) { | ||
| 1441 | return true; | ||
| 1442 | } | ||
| 1443 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1444 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1445 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1446 | if (it == common_ranges.end()) { | ||
| 1447 | make_copies(); | ||
| 1448 | return false; | ||
| 1449 | } | ||
| 1450 | while (it != it_end) { | ||
| 1451 | found_sets.subtract(*it); | ||
| 1452 | it++; | ||
| 1453 | } | ||
| 1454 | make_copies(); | ||
| 1455 | return false; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | template <class P> | ||
| 1798 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1459 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1799 | std::span<BufferCopy> copies) { | 1460 | std::span<BufferCopy> copies) { |
| 1800 | if constexpr (USE_MEMORY_MAPS) { | 1461 | if constexpr (USE_MEMORY_MAPS) { |
| @@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg | |||
| 1805 | } | 1466 | } |
| 1806 | 1467 | ||
| 1807 | template <class P> | 1468 | template <class P> |
| 1808 | void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | 1469 | void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1809 | std::span<const BufferCopy> copies) { | 1470 | [[maybe_unused]] u64 largest_copy, |
| 1810 | std::span<u8> immediate_buffer; | 1471 | [[maybe_unused]] std::span<const BufferCopy> copies) { |
| 1811 | for (const BufferCopy& copy : copies) { | 1472 | if constexpr (!USE_MEMORY_MAPS) { |
| 1812 | std::span<const u8> upload_span; | 1473 | std::span<u8> immediate_buffer; |
| 1813 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1474 | for (const BufferCopy& copy : copies) { |
| 1814 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1475 | std::span<const u8> upload_span; |
| 1815 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1476 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1816 | } else { | 1477 | if (IsRangeGranular(cpu_addr, copy.size)) { |
| 1817 | if (immediate_buffer.empty()) { | 1478 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); |
| 1818 | immediate_buffer = ImmediateBuffer(largest_copy); | 1479 | } else { |
| 1480 | if (immediate_buffer.empty()) { | ||
| 1481 | immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1482 | } | ||
| 1483 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1484 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1819 | } | 1485 | } |
| 1820 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1486 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| 1821 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1822 | } | 1487 | } |
| 1823 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | ||
| 1824 | } | 1488 | } |
| 1825 | } | 1489 | } |
| 1826 | 1490 | ||
| 1827 | template <class P> | 1491 | template <class P> |
| 1828 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | 1492 | void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1829 | std::span<BufferCopy> copies) { | 1493 | [[maybe_unused]] u64 total_size_bytes, |
| 1830 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | 1494 | [[maybe_unused]] std::span<BufferCopy> copies) { |
| 1831 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1495 | if constexpr (USE_MEMORY_MAPS) { |
| 1832 | for (BufferCopy& copy : copies) { | 1496 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); |
| 1833 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1497 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1834 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1498 | for (BufferCopy& copy : copies) { |
| 1835 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1499 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1500 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1501 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | ||
| 1836 | 1502 | ||
| 1837 | // Apply the staging offset | 1503 | // Apply the staging offset |
| 1838 | copy.src_offset += upload_staging.offset; | 1504 | copy.src_offset += upload_staging.offset; |
| 1505 | } | ||
| 1506 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1839 | } | 1507 | } |
| 1840 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1841 | } | 1508 | } |
| 1842 | 1509 | ||
| 1843 | template <class P> | 1510 | template <class P> |
| @@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1847 | if (!is_dirty) { | 1514 | if (!is_dirty) { |
| 1848 | return false; | 1515 | return false; |
| 1849 | } | 1516 | } |
| 1850 | if (!IsRegionGpuModified(dest_address, copy_size)) { | 1517 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1518 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | ||
| 1519 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||
| 1851 | return false; | 1520 | return false; |
| 1852 | } | 1521 | } |
| 1853 | 1522 | ||
| @@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1886 | boost::container::small_vector<BufferCopy, 1> copies; | 1555 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1887 | u64 total_size_bytes = 0; | 1556 | u64 total_size_bytes = 0; |
| 1888 | u64 largest_copy = 0; | 1557 | u64 largest_copy = 0; |
| 1889 | buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1558 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1890 | const VAddr buffer_addr = buffer.CpuAddr(); | 1559 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { |
| 1891 | const auto add_download = [&](VAddr start, VAddr end) { | 1560 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 1892 | const u64 new_offset = start - buffer_addr; | 1561 | const auto add_download = [&](VAddr start, VAddr end) { |
| 1893 | const u64 new_size = end - start; | 1562 | const u64 new_offset = start - buffer_addr; |
| 1894 | copies.push_back(BufferCopy{ | 1563 | const u64 new_size = end - start; |
| 1895 | .src_offset = new_offset, | 1564 | copies.push_back(BufferCopy{ |
| 1896 | .dst_offset = total_size_bytes, | 1565 | .src_offset = new_offset, |
| 1897 | .size = new_size, | 1566 | .dst_offset = total_size_bytes, |
| 1898 | }); | 1567 | .size = new_size, |
| 1899 | // Align up to avoid cache conflicts | 1568 | }); |
| 1900 | constexpr u64 align = 256ULL; | 1569 | // Align up to avoid cache conflicts |
| 1901 | constexpr u64 mask = ~(align - 1ULL); | 1570 | constexpr u64 align = 64ULL; |
| 1902 | total_size_bytes += (new_size + align - 1) & mask; | 1571 | constexpr u64 mask = ~(align - 1ULL); |
| 1903 | largest_copy = std::max(largest_copy, new_size); | 1572 | total_size_bytes += (new_size + align - 1) & mask; |
| 1904 | }; | 1573 | largest_copy = std::max(largest_copy, new_size); |
| 1905 | 1574 | }; | |
| 1906 | const VAddr start_address = buffer_addr + range_offset; | 1575 | |
| 1907 | const VAddr end_address = start_address + range_size; | 1576 | const VAddr start_address = cpu_addr_out; |
| 1908 | ForEachWrittenRange(start_address, range_size, add_download); | 1577 | const VAddr end_address = start_address + range_size; |
| 1909 | const IntervalType subtract_interval{start_address, end_address}; | 1578 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1910 | ClearDownload(subtract_interval); | 1579 | const IntervalType subtract_interval{start_address, end_address}; |
| 1911 | common_ranges.subtract(subtract_interval); | 1580 | ClearDownload(subtract_interval); |
| 1912 | }); | 1581 | common_ranges.subtract(subtract_interval); |
| 1582 | }); | ||
| 1913 | if (total_size_bytes == 0) { | 1583 | if (total_size_bytes == 0) { |
| 1914 | return; | 1584 | return; |
| 1915 | } | 1585 | } |
| @@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1943 | } | 1613 | } |
| 1944 | 1614 | ||
| 1945 | template <class P> | 1615 | template <class P> |
| 1946 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1616 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { |
| 1947 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1617 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1948 | if (binding.buffer_id == buffer_id) { | 1618 | if (binding.buffer_id == buffer_id) { |
| 1949 | binding.buffer_id = BufferId{}; | 1619 | binding.buffer_id = BufferId{}; |
| @@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1962 | std::erase(cached_write_buffer_ids, buffer_id); | 1632 | std::erase(cached_write_buffer_ids, buffer_id); |
| 1963 | 1633 | ||
| 1964 | // Mark the whole buffer as CPU written to stop tracking CPU writes | 1634 | // Mark the whole buffer as CPU written to stop tracking CPU writes |
| 1965 | Buffer& buffer = slot_buffers[buffer_id]; | 1635 | if (!do_not_mark) { |
| 1966 | buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | 1636 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1637 | memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1638 | } | ||
| 1967 | 1639 | ||
| 1968 | Unregister(buffer_id); | 1640 | Unregister(buffer_id); |
| 1969 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1641 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| @@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 2011 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1683 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 2012 | return NULL_BINDING; | 1684 | return NULL_BINDING; |
| 2013 | } | 1685 | } |
| 2014 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1686 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); |
| 2015 | const Binding binding{ | 1687 | const Binding binding{ |
| 2016 | .cpu_addr = *cpu_addr, | 1688 | .cpu_addr = *cpu_addr, |
| 2017 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), | 1689 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h new file mode 100644 index 000000000..656baa550 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -0,0 +1,580 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | #include <numeric> | ||
| 12 | #include <span> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/container/small_vector.hpp> | ||
| 17 | #define BOOST_NO_MT | ||
| 18 | #include <boost/pool/detail/mutex.hpp> | ||
| 19 | #undef BOOST_NO_MT | ||
| 20 | #include <boost/icl/interval.hpp> | ||
| 21 | #include <boost/icl/interval_base_set.hpp> | ||
| 22 | #include <boost/icl/interval_set.hpp> | ||
| 23 | #include <boost/icl/split_interval_map.hpp> | ||
| 24 | #include <boost/pool/pool.hpp> | ||
| 25 | #include <boost/pool/pool_alloc.hpp> | ||
| 26 | #include <boost/pool/poolfwd.hpp> | ||
| 27 | |||
| 28 | #include "common/common_types.h" | ||
| 29 | #include "common/div_ceil.h" | ||
| 30 | #include "common/literals.h" | ||
| 31 | #include "common/lru_cache.h" | ||
| 32 | #include "common/microprofile.h" | ||
| 33 | #include "common/scope_exit.h" | ||
| 34 | #include "common/settings.h" | ||
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 37 | #include "video_core/control/channel_state_cache.h" | ||
| 38 | #include "video_core/delayed_destruction_ring.h" | ||
| 39 | #include "video_core/dirty_flags.h" | ||
| 40 | #include "video_core/engines/draw_manager.h" | ||
| 41 | #include "video_core/engines/kepler_compute.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | ||
| 46 | #include "video_core/texture_cache/slot_vector.h" | ||
| 47 | #include "video_core/texture_cache/types.h" | ||
| 48 | |||
| 49 | namespace boost { | ||
| 50 | template <typename T> | ||
| 51 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 52 | } | ||
| 53 | |||
| 54 | namespace VideoCommon { | ||
| 55 | |||
| 56 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 57 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 58 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 59 | |||
| 60 | using BufferId = SlotId; | ||
| 61 | |||
| 62 | using VideoCore::Surface::PixelFormat; | ||
| 63 | using namespace Common::Literals; | ||
| 64 | |||
| 65 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 66 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 67 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 68 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 69 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 70 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 71 | constexpr u32 NUM_STAGES = 5; | ||
| 72 | |||
| 73 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 74 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 75 | |||
| 76 | enum class ObtainBufferSynchronize : u32 { | ||
| 77 | NoSynchronize = 0, | ||
| 78 | FullSynchronize = 1, | ||
| 79 | SynchronizeNoDirty = 2, | ||
| 80 | }; | ||
| 81 | |||
| 82 | enum class ObtainBufferOperation : u32 { | ||
| 83 | DoNothing = 0, | ||
| 84 | MarkAsWritten = 1, | ||
| 85 | DiscardWrite = 2, | ||
| 86 | MarkQuery = 3, | ||
| 87 | }; | ||
| 88 | |||
| 89 | template <typename P> | ||
| 90 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 91 | // Page size for caching purposes. | ||
| 92 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 93 | static constexpr u32 CACHING_PAGEBITS = 16; | ||
| 94 | static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; | ||
| 95 | |||
| 96 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 97 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 98 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 99 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 100 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 101 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 102 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 103 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 104 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 105 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | ||
| 106 | |||
| 107 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 108 | |||
| 109 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 110 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 111 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 112 | |||
| 113 | // Debug Flags. | ||
| 114 | |||
| 115 | static constexpr bool DISABLE_DOWNLOADS = true; | ||
| 116 | |||
| 117 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 118 | |||
| 119 | using Runtime = typename P::Runtime; | ||
| 120 | using Buffer = typename P::Buffer; | ||
| 121 | using Async_Buffer = typename P::Async_Buffer; | ||
| 122 | using MemoryTracker = typename P::MemoryTracker; | ||
| 123 | |||
| 124 | using IntervalCompare = std::less<VAddr>; | ||
| 125 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | ||
| 126 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | ||
| 127 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 128 | using IntervalType = typename IntervalSet::interval_type; | ||
| 129 | |||
| 130 | template <typename Type> | ||
| 131 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 132 | // types | ||
| 133 | typedef counter_add_functor<Type> type; | ||
| 134 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 135 | |||
| 136 | // public member functions | ||
| 137 | void operator()(Type& current, const Type& added) const { | ||
| 138 | current += added; | ||
| 139 | if (current < base_type::identity_element()) { | ||
| 140 | current = base_type::identity_element(); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | // public static functions | ||
| 145 | static void version(Type&){}; | ||
| 146 | }; | ||
| 147 | |||
| 148 | using OverlapCombine = counter_add_functor<int>; | ||
| 149 | using OverlapSection = boost::icl::inter_section<int>; | ||
| 150 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | ||
| 151 | |||
| 152 | struct Empty {}; | ||
| 153 | |||
| 154 | struct OverlapResult { | ||
| 155 | std::vector<BufferId> ids; | ||
| 156 | VAddr begin; | ||
| 157 | VAddr end; | ||
| 158 | bool has_stream_leap = false; | ||
| 159 | }; | ||
| 160 | |||
| 161 | struct Binding { | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | u32 size{}; | ||
| 164 | BufferId buffer_id; | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct TextureBufferBinding : Binding { | ||
| 168 | PixelFormat format; | ||
| 169 | }; | ||
| 170 | |||
| 171 | static constexpr Binding NULL_BINDING{ | ||
| 172 | .cpu_addr = 0, | ||
| 173 | .size = 0, | ||
| 174 | .buffer_id = NULL_BUFFER_ID, | ||
| 175 | }; | ||
| 176 | |||
| 177 | public: | ||
| 178 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 179 | |||
| 180 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 181 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 182 | |||
| 183 | void TickFrame(); | ||
| 184 | |||
| 185 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 186 | |||
| 187 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 188 | |||
| 189 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 190 | |||
| 191 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 192 | |||
| 193 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 194 | |||
| 195 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 196 | |||
| 197 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 198 | |||
| 199 | void UpdateComputeBuffers(); | ||
| 200 | |||
| 201 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 202 | |||
| 203 | void BindHostStageBuffers(size_t stage); | ||
| 204 | |||
| 205 | void BindHostComputeBuffers(); | ||
| 206 | |||
| 207 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 208 | const UniformBufferSizes* sizes); | ||
| 209 | |||
| 210 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 211 | |||
| 212 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 213 | |||
| 214 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 215 | bool is_written); | ||
| 216 | |||
| 217 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 218 | |||
| 219 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 220 | PixelFormat format, bool is_written, bool is_image); | ||
| 221 | |||
| 222 | void UnbindComputeStorageBuffers(); | ||
| 223 | |||
| 224 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 225 | bool is_written); | ||
| 226 | |||
| 227 | void UnbindComputeTextureBuffers(); | ||
| 228 | |||
| 229 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 230 | bool is_written, bool is_image); | ||
| 231 | |||
| 232 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 233 | ObtainBufferSynchronize sync_info, | ||
| 234 | ObtainBufferOperation post_op); | ||
| 235 | void FlushCachedWrites(); | ||
| 236 | |||
| 237 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 238 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 239 | |||
| 240 | void AccumulateFlushes(); | ||
| 241 | |||
| 242 | /// Return true when the caller should wait for async downloads | ||
| 243 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 244 | |||
| 245 | /// Commit asynchronous downloads | ||
| 246 | void CommitAsyncFlushes(); | ||
| 247 | void CommitAsyncFlushesHigh(); | ||
| 248 | |||
| 249 | /// Pop asynchronous downloads | ||
| 250 | void PopAsyncFlushes(); | ||
| 251 | void PopAsyncBuffers(); | ||
| 252 | |||
| 253 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 254 | |||
| 255 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 256 | |||
| 257 | /// Return true when a CPU region is modified from the GPU | ||
| 258 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 259 | |||
| 260 | /// Return true when a region is registered on the cache | ||
| 261 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 262 | |||
| 263 | /// Return true when a CPU region is modified from the CPU | ||
| 264 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 265 | |||
| 266 | void SetDrawIndirect( | ||
| 267 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 268 | current_draw_indirect = current_draw_indirect_; | ||
| 269 | } | ||
| 270 | |||
| 271 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 272 | |||
| 273 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 274 | |||
| 275 | std::recursive_mutex mutex; | ||
| 276 | Runtime& runtime; | ||
| 277 | |||
| 278 | private: | ||
| 279 | template <typename Func> | ||
| 280 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 281 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 282 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 283 | index += disabled_bits; | ||
| 284 | enabled_mask >>= disabled_bits; | ||
| 285 | func(index); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | template <typename Func> | ||
| 290 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 291 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | ||
| 292 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | ||
| 293 | const BufferId buffer_id = page_table[page]; | ||
| 294 | if (!buffer_id) { | ||
| 295 | ++page; | ||
| 296 | continue; | ||
| 297 | } | ||
| 298 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 299 | func(buffer_id, buffer); | ||
| 300 | |||
| 301 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 302 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | template <typename Func> | ||
| 307 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | ||
| 308 | const VAddr start_address = cpu_addr; | ||
| 309 | const VAddr end_address = start_address + size; | ||
| 310 | const IntervalType search_interval{start_address, end_address}; | ||
| 311 | auto it = current_range.lower_bound(search_interval); | ||
| 312 | if (it == current_range.end()) { | ||
| 313 | return; | ||
| 314 | } | ||
| 315 | auto end_it = current_range.upper_bound(search_interval); | ||
| 316 | for (; it != end_it; it++) { | ||
| 317 | VAddr inter_addr_end = it->upper(); | ||
| 318 | VAddr inter_addr = it->lower(); | ||
| 319 | if (inter_addr_end > end_address) { | ||
| 320 | inter_addr_end = end_address; | ||
| 321 | } | ||
| 322 | if (inter_addr < start_address) { | ||
| 323 | inter_addr = start_address; | ||
| 324 | } | ||
| 325 | func(inter_addr, inter_addr_end); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | template <typename Func> | ||
| 330 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||
| 331 | Func&& func) { | ||
| 332 | const VAddr start_address = cpu_addr; | ||
| 333 | const VAddr end_address = start_address + size; | ||
| 334 | const IntervalType search_interval{start_address, end_address}; | ||
| 335 | auto it = current_range.lower_bound(search_interval); | ||
| 336 | if (it == current_range.end()) { | ||
| 337 | return; | ||
| 338 | } | ||
| 339 | auto end_it = current_range.upper_bound(search_interval); | ||
| 340 | for (; it != end_it; it++) { | ||
| 341 | auto& inter = it->first; | ||
| 342 | VAddr inter_addr_end = inter.upper(); | ||
| 343 | VAddr inter_addr = inter.lower(); | ||
| 344 | if (inter_addr_end > end_address) { | ||
| 345 | inter_addr_end = end_address; | ||
| 346 | } | ||
| 347 | if (inter_addr < start_address) { | ||
| 348 | inter_addr = start_address; | ||
| 349 | } | ||
| 350 | func(inter_addr, inter_addr_end, it->second); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||
| 355 | const IntervalType search_interval, int subtract_value) { | ||
| 356 | bool any_removals = false; | ||
| 357 | current_range.add(std::make_pair(search_interval, subtract_value)); | ||
| 358 | do { | ||
| 359 | any_removals = false; | ||
| 360 | auto it = current_range.lower_bound(search_interval); | ||
| 361 | if (it == current_range.end()) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | auto end_it = current_range.upper_bound(search_interval); | ||
| 365 | for (; it != end_it; it++) { | ||
| 366 | if (it->second <= 0) { | ||
| 367 | any_removals = true; | ||
| 368 | current_range.erase(it); | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | } while (any_removals); | ||
| 373 | } | ||
| 374 | |||
| 375 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 376 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 377 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 378 | } | ||
| 379 | |||
| 380 | void RunGarbageCollector(); | ||
| 381 | |||
| 382 | void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size); | ||
| 383 | |||
| 384 | void BindHostIndexBuffer(); | ||
| 385 | |||
| 386 | void BindHostVertexBuffers(); | ||
| 387 | |||
| 388 | void BindHostDrawIndirectBuffers(); | ||
| 389 | |||
| 390 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 391 | |||
| 392 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 393 | |||
| 394 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 395 | |||
| 396 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 397 | |||
| 398 | void BindHostTransformFeedbackBuffers(); | ||
| 399 | |||
| 400 | void BindHostComputeUniformBuffers(); | ||
| 401 | |||
| 402 | void BindHostComputeStorageBuffers(); | ||
| 403 | |||
| 404 | void BindHostComputeTextureBuffers(); | ||
| 405 | |||
| 406 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 407 | |||
| 408 | void DoUpdateComputeBuffers(); | ||
| 409 | |||
| 410 | void UpdateIndexBuffer(); | ||
| 411 | |||
| 412 | void UpdateVertexBuffers(); | ||
| 413 | |||
| 414 | void UpdateVertexBuffer(u32 index); | ||
| 415 | |||
| 416 | void UpdateDrawIndirect(); | ||
| 417 | |||
| 418 | void UpdateUniformBuffers(size_t stage); | ||
| 419 | |||
| 420 | void UpdateStorageBuffers(size_t stage); | ||
| 421 | |||
| 422 | void UpdateTextureBuffers(size_t stage); | ||
| 423 | |||
| 424 | void UpdateTransformFeedbackBuffers(); | ||
| 425 | |||
| 426 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 427 | |||
| 428 | void UpdateComputeUniformBuffers(); | ||
| 429 | |||
| 430 | void UpdateComputeStorageBuffers(); | ||
| 431 | |||
| 432 | void UpdateComputeTextureBuffers(); | ||
| 433 | |||
| 434 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 435 | |||
| 436 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 437 | |||
| 438 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 439 | |||
| 440 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 441 | |||
| 442 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 443 | |||
| 444 | void Register(BufferId buffer_id); | ||
| 445 | |||
| 446 | void Unregister(BufferId buffer_id); | ||
| 447 | |||
| 448 | template <bool insert> | ||
| 449 | void ChangeRegister(BufferId buffer_id); | ||
| 450 | |||
| 451 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 452 | |||
| 453 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 454 | |||
| 455 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 456 | |||
| 457 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 458 | |||
| 459 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 460 | std::span<BufferCopy> copies); | ||
| 461 | |||
| 462 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 463 | std::span<const BufferCopy> copies); | ||
| 464 | |||
| 465 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 466 | |||
| 467 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 468 | |||
| 469 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 470 | |||
| 471 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | ||
| 472 | |||
| 473 | void NotifyBufferDeletion(); | ||
| 474 | |||
| 475 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 476 | bool is_written) const; | ||
| 477 | |||
| 478 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 479 | PixelFormat format); | ||
| 480 | |||
| 481 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 482 | |||
| 483 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 484 | |||
| 485 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 486 | |||
| 487 | void ClearDownload(IntervalType subtract_interval); | ||
| 488 | |||
| 489 | VideoCore::RasterizerInterface& rasterizer; | ||
| 490 | Core::Memory::Memory& cpu_memory; | ||
| 491 | |||
| 492 | SlotVector<Buffer> slot_buffers; | ||
| 493 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 494 | |||
| 495 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 496 | |||
| 497 | u32 last_index_count = 0; | ||
| 498 | |||
| 499 | Binding index_buffer; | ||
| 500 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 501 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 502 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 503 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 504 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 505 | Binding count_buffer_binding; | ||
| 506 | Binding indirect_buffer_binding; | ||
| 507 | |||
| 508 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 509 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 510 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 511 | |||
| 512 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 513 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 514 | |||
| 515 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 516 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 517 | |||
| 518 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 519 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 520 | u32 enabled_compute_storage_buffers = 0; | ||
| 521 | u32 written_compute_storage_buffers = 0; | ||
| 522 | |||
| 523 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 524 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 525 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 526 | u32 enabled_compute_texture_buffers = 0; | ||
| 527 | u32 written_compute_texture_buffers = 0; | ||
| 528 | u32 image_compute_texture_buffers = 0; | ||
| 529 | |||
| 530 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 531 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 532 | |||
| 533 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 534 | |||
| 535 | bool has_deleted_buffers = false; | ||
| 536 | |||
| 537 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 538 | dirty_uniform_buffers{}; | ||
| 539 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 540 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 541 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 542 | uniform_buffer_binding_sizes{}; | ||
| 543 | |||
| 544 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 545 | |||
| 546 | MemoryTracker memory_tracker; | ||
| 547 | IntervalSet uncommitted_ranges; | ||
| 548 | IntervalSet common_ranges; | ||
| 549 | IntervalSet cached_ranges; | ||
| 550 | IntervalSet pending_ranges; | ||
| 551 | std::deque<IntervalSet> committed_ranges; | ||
| 552 | |||
| 553 | // Async Buffers | ||
| 554 | OverlapCounter async_downloads; | ||
| 555 | std::deque<std::optional<Async_Buffer>> async_buffers; | ||
| 556 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | ||
| 557 | std::optional<Async_Buffer> current_buffer; | ||
| 558 | |||
| 559 | std::deque<Async_Buffer> async_buffers_death_ring; | ||
| 560 | |||
| 561 | size_t immediate_buffer_capacity = 0; | ||
| 562 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 563 | |||
| 564 | struct LRUItemParams { | ||
| 565 | using ObjectType = BufferId; | ||
| 566 | using TickType = u64; | ||
| 567 | }; | ||
| 568 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 569 | u64 frame_tick = 0; | ||
| 570 | u64 total_used_memory = 0; | ||
| 571 | u64 minimum_memory = 0; | ||
| 572 | u64 critical_memory = 0; | ||
| 573 | BufferId inline_buffer_id; | ||
| 574 | |||
| 575 | bool active_async_buffers = false; | ||
| 576 | |||
| 577 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||
| 578 | }; | ||
| 579 | |||
| 580 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h new file mode 100644 index 000000000..dc4ebfcaa --- /dev/null +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <deque> | ||
| 9 | #include <limits> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/buffer_cache/word_manager.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | template <class RasterizerInterface> | ||
| 21 | class MemoryTrackerBase { | ||
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | ||
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | ||
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | ||
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | ||
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | ||
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | ||
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | ||
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | ||
| 30 | |||
| 31 | public: | ||
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | ||
| 33 | ~MemoryTrackerBase() = default; | ||
| 34 | |||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 36 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 37 | u64 query_size) noexcept { | ||
| 38 | return IteratePairs<true>( | ||
| 39 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 40 | return manager->template ModifiedRegion<Type::CPU>(offset, size); | ||
| 41 | }); | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 45 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 46 | u64 query_size) noexcept { | ||
| 47 | return IteratePairs<false>( | ||
| 48 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 49 | return manager->template ModifiedRegion<Type::GPU>(offset, size); | ||
| 50 | }); | ||
| 51 | } | ||
| 52 | |||
| 53 | /// Returns true if a region has been modified from the CPU | ||
| 54 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 55 | return IteratePages<true>( | ||
| 56 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 57 | return manager->template IsRegionModified<Type::CPU>(offset, size); | ||
| 58 | }); | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Returns true if a region has been modified from the GPU | ||
| 62 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 63 | return IteratePages<false>( | ||
| 64 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 65 | return manager->template IsRegionModified<Type::GPU>(offset, size); | ||
| 66 | }); | ||
| 67 | } | ||
| 68 | |||
| 69 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 70 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 71 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 72 | [](Manager* manager, u64 offset, size_t size) { | ||
| 73 | manager->template ChangeRegionState<Type::CPU, true>( | ||
| 74 | manager->GetCpuAddr() + offset, size); | ||
| 75 | }); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 79 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 80 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 81 | [](Manager* manager, u64 offset, size_t size) { | ||
| 82 | manager->template ChangeRegionState<Type::CPU, false>( | ||
| 83 | manager->GetCpuAddr() + offset, size); | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Mark region as modified from the host GPU | ||
| 88 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 89 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 90 | [](Manager* manager, u64 offset, size_t size) { | ||
| 91 | manager->template ChangeRegionState<Type::GPU, true>( | ||
| 92 | manager->GetCpuAddr() + offset, size); | ||
| 93 | }); | ||
| 94 | } | ||
| 95 | |||
| 96 | /// Unmark region as modified from the host GPU | ||
| 97 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 98 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 99 | [](Manager* manager, u64 offset, size_t size) { | ||
| 100 | manager->template ChangeRegionState<Type::GPU, false>( | ||
| 101 | manager->GetCpuAddr() + offset, size); | ||
| 102 | }); | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Mark region as modified from the CPU | ||
| 106 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 107 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 108 | IteratePages<true>( | ||
| 109 | dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) { | ||
| 110 | const VAddr cpu_address = manager->GetCpuAddr() + offset; | ||
| 111 | manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size); | ||
| 112 | cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS)); | ||
| 113 | }); | ||
| 114 | } | ||
| 115 | |||
| 116 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 117 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 118 | IteratePages<false>(query_cpu_addr, query_size, | ||
| 119 | [](Manager* manager, [[maybe_unused]] u64 offset, | ||
| 120 | [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); }); | ||
| 121 | } | ||
| 122 | |||
| 123 | void FlushCachedWrites() noexcept { | ||
| 124 | for (auto id : cached_pages) { | ||
| 125 | top_tier[id]->FlushCachedWrites(); | ||
| 126 | } | ||
| 127 | cached_pages.clear(); | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 131 | template <typename Func> | ||
| 132 | void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 133 | IteratePages<true>(query_cpu_range, query_size, | ||
| 134 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 135 | manager->template ForEachModifiedRange<Type::CPU, true>( | ||
| 136 | manager->GetCpuAddr() + offset, size, func); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 141 | template <typename Func> | ||
| 142 | void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) { | ||
| 143 | IteratePages<false>(query_cpu_range, query_size, | ||
| 144 | [&func, clear](Manager* manager, u64 offset, size_t size) { | ||
| 145 | if (clear) { | ||
| 146 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 147 | manager->GetCpuAddr() + offset, size, func); | ||
| 148 | } else { | ||
| 149 | manager->template ForEachModifiedRange<Type::GPU, false>( | ||
| 150 | manager->GetCpuAddr() + offset, size, func); | ||
| 151 | } | ||
| 152 | }); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename Func> | ||
| 156 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 157 | IteratePages<false>(query_cpu_range, query_size, | ||
| 158 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 159 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 160 | manager->GetCpuAddr() + offset, size, func); | ||
| 161 | }); | ||
| 162 | } | ||
| 163 | |||
| 164 | private: | ||
| 165 | template <bool create_region_on_fail, typename Func> | ||
| 166 | bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { | ||
| 167 | using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type; | ||
| 168 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 169 | std::size_t remaining_size{size}; | ||
| 170 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 171 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 172 | while (remaining_size > 0) { | ||
| 173 | const std::size_t copy_amount{ | ||
| 174 | std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 175 | auto* manager{top_tier[page_index]}; | ||
| 176 | if (manager) { | ||
| 177 | if constexpr (BOOL_BREAK) { | ||
| 178 | if (func(manager, page_offset, copy_amount)) { | ||
| 179 | return true; | ||
| 180 | } | ||
| 181 | } else { | ||
| 182 | func(manager, page_offset, copy_amount); | ||
| 183 | } | ||
| 184 | } else if constexpr (create_region_on_fail) { | ||
| 185 | CreateRegion(page_index); | ||
| 186 | manager = top_tier[page_index]; | ||
| 187 | if constexpr (BOOL_BREAK) { | ||
| 188 | if (func(manager, page_offset, copy_amount)) { | ||
| 189 | return true; | ||
| 190 | } | ||
| 191 | } else { | ||
| 192 | func(manager, page_offset, copy_amount); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | page_index++; | ||
| 196 | page_offset = 0; | ||
| 197 | remaining_size -= copy_amount; | ||
| 198 | } | ||
| 199 | return false; | ||
| 200 | } | ||
| 201 | |||
| 202 | template <bool create_region_on_fail, typename Func> | ||
| 203 | std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) { | ||
| 204 | std::size_t remaining_size{size}; | ||
| 205 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 206 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 207 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 208 | u64 end = 0; | ||
| 209 | while (remaining_size > 0) { | ||
| 210 | const std::size_t copy_amount{ | ||
| 211 | std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 212 | auto* manager{top_tier[page_index]}; | ||
| 213 | const auto execute = [&] { | ||
| 214 | auto [new_begin, new_end] = func(manager, page_offset, copy_amount); | ||
| 215 | if (new_begin != 0 || new_end != 0) { | ||
| 216 | const u64 base_address = page_index << HIGHER_PAGE_BITS; | ||
| 217 | begin = std::min(new_begin + base_address, begin); | ||
| 218 | end = std::max(new_end + base_address, end); | ||
| 219 | } | ||
| 220 | }; | ||
| 221 | if (manager) { | ||
| 222 | execute(); | ||
| 223 | } else if constexpr (create_region_on_fail) { | ||
| 224 | CreateRegion(page_index); | ||
| 225 | manager = top_tier[page_index]; | ||
| 226 | execute(); | ||
| 227 | } | ||
| 228 | page_index++; | ||
| 229 | page_offset = 0; | ||
| 230 | remaining_size -= copy_amount; | ||
| 231 | } | ||
| 232 | if (begin < end) { | ||
| 233 | return std::make_pair(begin, end); | ||
| 234 | } else { | ||
| 235 | return std::make_pair(0ULL, 0ULL); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | void CreateRegion(std::size_t page_index) { | ||
| 240 | const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; | ||
| 241 | top_tier[page_index] = GetNewManager(base_cpu_addr); | ||
| 242 | } | ||
| 243 | |||
| 244 | Manager* GetNewManager(VAddr base_cpu_addess) { | ||
| 245 | const auto on_return = [&] { | ||
| 246 | auto* new_manager = free_managers.front(); | ||
| 247 | new_manager->SetCpuAddress(base_cpu_addess); | ||
| 248 | free_managers.pop_front(); | ||
| 249 | return new_manager; | ||
| 250 | }; | ||
| 251 | if (!free_managers.empty()) { | ||
| 252 | return on_return(); | ||
| 253 | } | ||
| 254 | manager_pool.emplace_back(); | ||
| 255 | auto& last_pool = manager_pool.back(); | ||
| 256 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | ||
| 257 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | ||
| 258 | free_managers.push_back(&last_pool[i]); | ||
| 259 | } | ||
| 260 | return on_return(); | ||
| 261 | } | ||
| 262 | |||
| 263 | std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool; | ||
| 264 | std::deque<Manager*> free_managers; | ||
| 265 | |||
| 266 | std::array<Manager*, NUM_HIGH_PAGES> top_tier{}; | ||
| 267 | |||
| 268 | std::unordered_set<u32> cached_pages; | ||
| 269 | |||
| 270 | RasterizerInterface* rasterizer = nullptr; | ||
| 271 | }; | ||
| 272 | |||
| 273 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h new file mode 100644 index 000000000..a42455045 --- /dev/null +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -0,0 +1,462 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <limits> | ||
| 9 | #include <span> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include "common/alignment.h" | ||
| 13 | #include "common/common_funcs.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/div_ceil.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | constexpr u64 PAGES_PER_WORD = 64; | ||
| 21 | constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 22 | constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 23 | |||
| 24 | enum class Type { | ||
| 25 | CPU, | ||
| 26 | GPU, | ||
| 27 | CachedCPU, | ||
| 28 | Untracked, | ||
| 29 | }; | ||
| 30 | |||
| 31 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 32 | template <size_t stack_words = 1> | ||
| 33 | struct WordsArray { | ||
| 34 | /// Returns the pointer to the words state | ||
| 35 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 36 | return is_short ? stack.data() : heap; | ||
| 37 | } | ||
| 38 | |||
| 39 | /// Returns the pointer to the words state | ||
| 40 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 41 | return is_short ? stack.data() : heap; | ||
| 42 | } | ||
| 43 | |||
| 44 | std::array<u64, stack_words> stack{}; ///< Small buffers storage | ||
| 45 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 46 | }; | ||
| 47 | |||
| 48 | template <size_t stack_words = 1> | ||
| 49 | struct Words { | ||
| 50 | explicit Words() = default; | ||
| 51 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 52 | num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 53 | if (IsShort()) { | ||
| 54 | cpu.stack.fill(~u64{0}); | ||
| 55 | gpu.stack.fill(0); | ||
| 56 | cached_cpu.stack.fill(0); | ||
| 57 | untracked.stack.fill(~u64{0}); | ||
| 58 | } else { | ||
| 59 | // Share allocation between CPU and GPU pages and set their default values | ||
| 60 | u64* const alloc = new u64[num_words * 4]; | ||
| 61 | cpu.heap = alloc; | ||
| 62 | gpu.heap = alloc + num_words; | ||
| 63 | cached_cpu.heap = alloc + num_words * 2; | ||
| 64 | untracked.heap = alloc + num_words * 3; | ||
| 65 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 66 | std::fill_n(gpu.heap, num_words, 0); | ||
| 67 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 68 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 69 | } | ||
| 70 | // Clean up tailing bits | ||
| 71 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 72 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 73 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 74 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 75 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 76 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 77 | } | ||
| 78 | |||
| 79 | ~Words() { | ||
| 80 | Release(); | ||
| 81 | } | ||
| 82 | |||
| 83 | Words& operator=(Words&& rhs) noexcept { | ||
| 84 | Release(); | ||
| 85 | size_bytes = rhs.size_bytes; | ||
| 86 | num_words = rhs.num_words; | ||
| 87 | cpu = rhs.cpu; | ||
| 88 | gpu = rhs.gpu; | ||
| 89 | cached_cpu = rhs.cached_cpu; | ||
| 90 | untracked = rhs.untracked; | ||
| 91 | rhs.cpu.heap = nullptr; | ||
| 92 | return *this; | ||
| 93 | } | ||
| 94 | |||
| 95 | Words(Words&& rhs) noexcept | ||
| 96 | : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 97 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 98 | rhs.cpu.heap = nullptr; | ||
| 99 | } | ||
| 100 | |||
| 101 | Words& operator=(const Words&) = delete; | ||
| 102 | Words(const Words&) = delete; | ||
| 103 | |||
| 104 | /// Returns true when the buffer fits in the small vector optimization | ||
| 105 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 106 | return num_words <= stack_words; | ||
| 107 | } | ||
| 108 | |||
| 109 | /// Returns the number of words of the buffer | ||
| 110 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 111 | return num_words; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Release buffer resources | ||
| 115 | void Release() { | ||
| 116 | if (!IsShort()) { | ||
| 117 | // CPU written words is the base for the heap allocation | ||
| 118 | delete[] cpu.heap; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | template <Type type> | ||
| 123 | std::span<u64> Span() noexcept { | ||
| 124 | if constexpr (type == Type::CPU) { | ||
| 125 | return std::span<u64>(cpu.Pointer(IsShort()), num_words); | ||
| 126 | } else if constexpr (type == Type::GPU) { | ||
| 127 | return std::span<u64>(gpu.Pointer(IsShort()), num_words); | ||
| 128 | } else if constexpr (type == Type::CachedCPU) { | ||
| 129 | return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 130 | } else if constexpr (type == Type::Untracked) { | ||
| 131 | return std::span<u64>(untracked.Pointer(IsShort()), num_words); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | template <Type type> | ||
| 136 | std::span<const u64> Span() const noexcept { | ||
| 137 | if constexpr (type == Type::CPU) { | ||
| 138 | return std::span<const u64>(cpu.Pointer(IsShort()), num_words); | ||
| 139 | } else if constexpr (type == Type::GPU) { | ||
| 140 | return std::span<const u64>(gpu.Pointer(IsShort()), num_words); | ||
| 141 | } else if constexpr (type == Type::CachedCPU) { | ||
| 142 | return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 143 | } else if constexpr (type == Type::Untracked) { | ||
| 144 | return std::span<const u64>(untracked.Pointer(IsShort()), num_words); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | u64 size_bytes = 0; | ||
| 149 | size_t num_words = 0; | ||
| 150 | WordsArray<stack_words> cpu; | ||
| 151 | WordsArray<stack_words> gpu; | ||
| 152 | WordsArray<stack_words> cached_cpu; | ||
| 153 | WordsArray<stack_words> untracked; | ||
| 154 | }; | ||
| 155 | |||
| 156 | template <class RasterizerInterface, size_t stack_words = 1> | ||
| 157 | class WordManager { | ||
| 158 | public: | ||
| 159 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | ||
| 160 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | ||
| 161 | |||
| 162 | explicit WordManager() = default; | ||
| 163 | |||
| 164 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 165 | cpu_addr = new_cpu_addr; | ||
| 166 | } | ||
| 167 | |||
| 168 | VAddr GetCpuAddr() const { | ||
| 169 | return cpu_addr; | ||
| 170 | } | ||
| 171 | |||
| 172 | static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { | ||
| 173 | constexpr size_t number_bits = sizeof(u64) * 8; | ||
| 174 | const size_t limit_page_end = number_bits - std::min(page_end, number_bits); | ||
| 175 | u64 bits = (word >> page_start) << page_start; | ||
| 176 | bits = (bits << limit_page_end) >> limit_page_end; | ||
| 177 | return bits; | ||
| 178 | } | ||
| 179 | |||
| 180 | static std::pair<size_t, size_t> GetWordPage(VAddr address) { | ||
| 181 | const size_t converted_address = static_cast<size_t>(address); | ||
| 182 | const size_t word_number = converted_address / BYTES_PER_WORD; | ||
| 183 | const size_t amount_pages = converted_address % BYTES_PER_WORD; | ||
| 184 | return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); | ||
| 185 | } | ||
| 186 | |||
| 187 | template <typename Func> | ||
| 188 | void IterateWords(size_t offset, size_t size, Func&& func) const { | ||
| 189 | using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>; | ||
| 190 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 191 | const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL)); | ||
| 192 | const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL)); | ||
| 193 | if (start >= SizeBytes() || end <= start) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | auto [start_word, start_page] = GetWordPage(start); | ||
| 197 | auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); | ||
| 198 | const size_t num_words = NumWords(); | ||
| 199 | start_word = std::min(start_word, num_words); | ||
| 200 | end_word = std::min(end_word, num_words); | ||
| 201 | const size_t diff = end_word - start_word; | ||
| 202 | end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; | ||
| 203 | end_word = std::min(end_word, num_words); | ||
| 204 | end_page += diff * PAGES_PER_WORD; | ||
| 205 | constexpr u64 base_mask{~0ULL}; | ||
| 206 | for (size_t word_index = start_word; word_index < end_word; word_index++) { | ||
| 207 | const u64 mask = ExtractBits(base_mask, start_page, end_page); | ||
| 208 | start_page = 0; | ||
| 209 | end_page -= PAGES_PER_WORD; | ||
| 210 | if constexpr (BOOL_BREAK) { | ||
| 211 | if (func(word_index, mask)) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | } else { | ||
| 215 | func(word_index, mask); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | template <typename Func> | ||
| 221 | void IteratePages(u64 mask, Func&& func) const { | ||
| 222 | size_t offset = 0; | ||
| 223 | while (mask != 0) { | ||
| 224 | const size_t empty_bits = std::countr_zero(mask); | ||
| 225 | offset += empty_bits; | ||
| 226 | mask = mask >> empty_bits; | ||
| 227 | |||
| 228 | const size_t continuous_bits = std::countr_one(mask); | ||
| 229 | func(offset, continuous_bits); | ||
| 230 | mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; | ||
| 231 | offset += continuous_bits; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * Change the state of a range of pages | ||
| 237 | * | ||
| 238 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 239 | * @param size Size in bytes to mark or unmark as modified | ||
| 240 | */ | ||
| 241 | template <Type type, bool enable> | ||
| 242 | void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { | ||
| 243 | std::span<u64> state_words = words.template Span<type>(); | ||
| 244 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 245 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 246 | IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { | ||
| 247 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 248 | NotifyRasterizer<!enable>(index, untracked_words[index], mask); | ||
| 249 | } | ||
| 250 | if constexpr (enable) { | ||
| 251 | state_words[index] |= mask; | ||
| 252 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 253 | untracked_words[index] |= mask; | ||
| 254 | } | ||
| 255 | if constexpr (type == Type::CPU) { | ||
| 256 | cached_words[index] &= ~mask; | ||
| 257 | } | ||
| 258 | } else { | ||
| 259 | if constexpr (type == Type::CPU) { | ||
| 260 | const u64 word = state_words[index] & mask; | ||
| 261 | cached_words[index] &= ~word; | ||
| 262 | } | ||
| 263 | state_words[index] &= ~mask; | ||
| 264 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 265 | untracked_words[index] &= ~mask; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | }); | ||
| 269 | } | ||
| 270 | |||
| 271 | /** | ||
| 272 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 273 | * needed. Call the given function on each turned off range. | ||
| 274 | * | ||
| 275 | * @param query_cpu_range Base CPU address to loop over | ||
| 276 | * @param size Size in bytes of the CPU range to loop over | ||
| 277 | * @param func Function to call for each turned off region | ||
| 278 | */ | ||
| 279 | template <Type type, bool clear, typename Func> | ||
| 280 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | ||
| 281 | static_assert(type != Type::Untracked); | ||
| 282 | |||
| 283 | std::span<u64> state_words = words.template Span<type>(); | ||
| 284 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 285 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 286 | const size_t offset = query_cpu_range - cpu_addr; | ||
| 287 | bool pending = false; | ||
| 288 | size_t pending_offset{}; | ||
| 289 | size_t pending_pointer{}; | ||
| 290 | const auto release = [&]() { | ||
| 291 | func(cpu_addr + pending_offset * BYTES_PER_PAGE, | ||
| 292 | (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||
| 293 | }; | ||
| 294 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 295 | const u64 word = state_words[index] & mask; | ||
| 296 | if constexpr (clear) { | ||
| 297 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 298 | NotifyRasterizer<true>(index, untracked_words[index], mask); | ||
| 299 | } | ||
| 300 | state_words[index] &= ~mask; | ||
| 301 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 302 | untracked_words[index] &= ~mask; | ||
| 303 | } | ||
| 304 | if constexpr (type == Type::CPU) { | ||
| 305 | cached_words[index] &= ~word; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | const size_t base_offset = index * PAGES_PER_WORD; | ||
| 309 | IteratePages(word, [&](size_t pages_offset, size_t pages_size) { | ||
| 310 | const auto reset = [&]() { | ||
| 311 | pending_offset = base_offset + pages_offset; | ||
| 312 | pending_pointer = base_offset + pages_offset + pages_size; | ||
| 313 | }; | ||
| 314 | if (!pending) { | ||
| 315 | reset(); | ||
| 316 | pending = true; | ||
| 317 | return; | ||
| 318 | } | ||
| 319 | if (pending_pointer == base_offset + pages_offset) { | ||
| 320 | pending_pointer += pages_size; | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | release(); | ||
| 324 | reset(); | ||
| 325 | }); | ||
| 326 | }); | ||
| 327 | if (pending) { | ||
| 328 | release(); | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | /** | ||
| 333 | * Returns true when a region has been modified | ||
| 334 | * | ||
| 335 | * @param offset Offset in bytes from the start of the buffer | ||
| 336 | * @param size Size in bytes of the region to query for modifications | ||
| 337 | */ | ||
| 338 | template <Type type> | ||
| 339 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 340 | static_assert(type != Type::Untracked); | ||
| 341 | |||
| 342 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 343 | bool result = false; | ||
| 344 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 345 | const u64 word = state_words[index] & mask; | ||
| 346 | if (word != 0) { | ||
| 347 | result = true; | ||
| 348 | return true; | ||
| 349 | } | ||
| 350 | return false; | ||
| 351 | }); | ||
| 352 | return result; | ||
| 353 | } | ||
| 354 | |||
| 355 | /** | ||
| 356 | * Returns a begin end pair with the inclusive modified region | ||
| 357 | * | ||
| 358 | * @param offset Offset in bytes from the start of the buffer | ||
| 359 | * @param size Size in bytes of the region to query for modifications | ||
| 360 | */ | ||
| 361 | template <Type type> | ||
| 362 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 363 | static_assert(type != Type::Untracked); | ||
| 364 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 365 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 366 | u64 end = 0; | ||
| 367 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 368 | const u64 word = state_words[index] & mask; | ||
| 369 | if (word == 0) { | ||
| 370 | return; | ||
| 371 | } | ||
| 372 | const u64 local_page_begin = std::countr_zero(word); | ||
| 373 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 374 | const u64 page_index = index * PAGES_PER_WORD; | ||
| 375 | begin = std::min(begin, page_index + local_page_begin); | ||
| 376 | end = page_index + local_page_end; | ||
| 377 | }); | ||
| 378 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 379 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 380 | } | ||
| 381 | |||
| 382 | /// Returns the number of words of the manager | ||
| 383 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 384 | return words.NumWords(); | ||
| 385 | } | ||
| 386 | |||
| 387 | /// Returns the size in bytes of the manager | ||
| 388 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 389 | return words.size_bytes; | ||
| 390 | } | ||
| 391 | |||
| 392 | /// Returns true when the buffer fits in the small vector optimization | ||
| 393 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 394 | return words.IsShort(); | ||
| 395 | } | ||
| 396 | |||
| 397 | void FlushCachedWrites() noexcept { | ||
| 398 | const u64 num_words = NumWords(); | ||
| 399 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 400 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 401 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 402 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 403 | const u64 cached_bits = cached_words[word_index]; | ||
| 404 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 405 | untracked_words[word_index] |= cached_bits; | ||
| 406 | cpu_words[word_index] |= cached_bits; | ||
| 407 | cached_words[word_index] = 0; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | private: | ||
| 412 | template <Type type> | ||
| 413 | u64* Array() noexcept { | ||
| 414 | if constexpr (type == Type::CPU) { | ||
| 415 | return words.cpu.Pointer(IsShort()); | ||
| 416 | } else if constexpr (type == Type::GPU) { | ||
| 417 | return words.gpu.Pointer(IsShort()); | ||
| 418 | } else if constexpr (type == Type::CachedCPU) { | ||
| 419 | return words.cached_cpu.Pointer(IsShort()); | ||
| 420 | } else if constexpr (type == Type::Untracked) { | ||
| 421 | return words.untracked.Pointer(IsShort()); | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | template <Type type> | ||
| 426 | const u64* Array() const noexcept { | ||
| 427 | if constexpr (type == Type::CPU) { | ||
| 428 | return words.cpu.Pointer(IsShort()); | ||
| 429 | } else if constexpr (type == Type::GPU) { | ||
| 430 | return words.gpu.Pointer(IsShort()); | ||
| 431 | } else if constexpr (type == Type::CachedCPU) { | ||
| 432 | return words.cached_cpu.Pointer(IsShort()); | ||
| 433 | } else if constexpr (type == Type::Untracked) { | ||
| 434 | return words.untracked.Pointer(IsShort()); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | /** | ||
| 439 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 440 | * | ||
| 441 | * @param word_index Index to the word to notify to the rasterizer | ||
| 442 | * @param current_bits Current state of the word | ||
| 443 | * @param new_bits New state of the word | ||
| 444 | * | ||
| 445 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 446 | */ | ||
| 447 | template <bool add_to_rasterizer> | ||
| 448 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 449 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 450 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 451 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | ||
| 452 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||
| 453 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | ||
| 454 | }); | ||
| 455 | } | ||
| 456 | |||
| 457 | VAddr cpu_addr = 0; | ||
| 458 | RasterizerInterface* rasterizer = nullptr; | ||
| 459 | Words<stack_words> words; | ||
| 460 | }; | ||
| 461 | |||
| 462 | } // namespace VideoCommon | ||
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 4e75f33ca..ab4f4d407 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp | |||
| @@ -126,15 +126,14 @@ constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{ | |||
| 126 | PixelFormat::ASTC_2D_8X8_SRGB, | 126 | PixelFormat::ASTC_2D_8X8_SRGB, |
| 127 | }; | 127 | }; |
| 128 | 128 | ||
| 129 | // Missing formats: | 129 | constexpr std::array VIEW_CLASS_ASTC_10x5_RGBA{ |
| 130 | // PixelFormat::ASTC_2D_10X5_UNORM | 130 | PixelFormat::ASTC_2D_10X5_UNORM, |
| 131 | // PixelFormat::ASTC_2D_10X5_SRGB | 131 | PixelFormat::ASTC_2D_10X5_SRGB, |
| 132 | 132 | }; | |
| 133 | // Missing formats: | ||
| 134 | // PixelFormat::ASTC_2D_10X6_SRGB | ||
| 135 | 133 | ||
| 136 | constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{ | 134 | constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{ |
| 137 | PixelFormat::ASTC_2D_10X6_UNORM, | 135 | PixelFormat::ASTC_2D_10X6_UNORM, |
| 136 | PixelFormat::ASTC_2D_10X6_SRGB, | ||
| 138 | }; | 137 | }; |
| 139 | 138 | ||
| 140 | constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ | 139 | constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ |
| @@ -147,9 +146,10 @@ constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{ | |||
| 147 | PixelFormat::ASTC_2D_10X10_SRGB, | 146 | PixelFormat::ASTC_2D_10X10_SRGB, |
| 148 | }; | 147 | }; |
| 149 | 148 | ||
| 150 | // Missing formats | 149 | constexpr std::array VIEW_CLASS_ASTC_12x10_RGBA{ |
| 151 | // ASTC_2D_12X10_UNORM, | 150 | PixelFormat::ASTC_2D_12X10_UNORM, |
| 152 | // ASTC_2D_12X10_SRGB, | 151 | PixelFormat::ASTC_2D_12X10_SRGB, |
| 152 | }; | ||
| 153 | 153 | ||
| 154 | constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ | 154 | constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ |
| 155 | PixelFormat::ASTC_2D_12X12_UNORM, | 155 | PixelFormat::ASTC_2D_12X12_UNORM, |
| @@ -229,9 +229,11 @@ constexpr Table MakeViewTable() { | |||
| 229 | EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); | 229 | EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); |
| 230 | EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); | 230 | EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); |
| 231 | EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); | 231 | EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); |
| 232 | EnableRange(view, VIEW_CLASS_ASTC_10x5_RGBA); | ||
| 232 | EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA); | 233 | EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA); |
| 233 | EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); | 234 | EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); |
| 234 | EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); | 235 | EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); |
| 236 | EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA); | ||
| 235 | EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); | 237 | EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); |
| 236 | return view; | 238 | return view; |
| 237 | } | 239 | } |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index a126c359c..02e161270 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -77,6 +77,14 @@ void Fermi2D::Blit() { | |||
| 77 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | 77 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); |
| 78 | const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && | 78 | const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && |
| 79 | src.format != regs.dst.format; | 79 | src.format != regs.dst.format; |
| 80 | |||
| 81 | auto srcX = args.src_x0; | ||
| 82 | auto srcY = args.src_y0; | ||
| 83 | if (args.sample_mode.origin == Origin::Corner) { | ||
| 84 | srcX -= (args.du_dx >> 33) << 32; | ||
| 85 | srcY -= (args.dv_dy >> 33) << 32; | ||
| 86 | } | ||
| 87 | |||
| 80 | Config config{ | 88 | Config config{ |
| 81 | .operation = regs.operation, | 89 | .operation = regs.operation, |
| 82 | .filter = args.sample_mode.filter, | 90 | .filter = args.sample_mode.filter, |
| @@ -86,10 +94,10 @@ void Fermi2D::Blit() { | |||
| 86 | .dst_y0 = args.dst_y0, | 94 | .dst_y0 = args.dst_y0, |
| 87 | .dst_x1 = args.dst_x0 + args.dst_width, | 95 | .dst_x1 = args.dst_x0 + args.dst_width, |
| 88 | .dst_y1 = args.dst_y0 + args.dst_height, | 96 | .dst_y1 = args.dst_y0 + args.dst_height, |
| 89 | .src_x0 = static_cast<s32>(args.src_x0 >> 32), | 97 | .src_x0 = static_cast<s32>(srcX >> 32), |
| 90 | .src_y0 = static_cast<s32>(args.src_y0 >> 32), | 98 | .src_y0 = static_cast<s32>(srcY >> 32), |
| 91 | .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), | 99 | .src_x1 = static_cast<s32>((srcX + args.du_dx * args.dst_width) >> 32), |
| 92 | .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), | 100 | .src_y1 = static_cast<s32>((srcY + args.dv_dy * args.dst_height) >> 32), |
| 93 | }; | 101 | }; |
| 94 | 102 | ||
| 95 | const auto need_align_to_pitch = | 103 | const auto need_align_to_pitch = |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 614d61db4..2f986097f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <cstring> | 4 | #include <cstring> |
| 5 | #include <optional> | 5 | #include <optional> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/bit_util.h" | ||
| 7 | #include "common/scope_exit.h" | 8 | #include "common/scope_exit.h" |
| 8 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 9 | #include "core/core.h" | 10 | #include "core/core.h" |
| @@ -222,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 222 | } | 223 | } |
| 223 | 224 | ||
| 224 | void Maxwell3D::RefreshParametersImpl() { | 225 | void Maxwell3D::RefreshParametersImpl() { |
| 226 | if (!Settings::IsGPULevelHigh()) { | ||
| 227 | return; | ||
| 228 | } | ||
| 225 | size_t current_index = 0; | 229 | size_t current_index = 0; |
| 226 | for (auto& segment : macro_segments) { | 230 | for (auto& segment : macro_segments) { |
| 227 | if (segment.first == 0) { | 231 | if (segment.first == 0) { |
| @@ -259,12 +263,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() { | |||
| 259 | size_t Maxwell3D::EstimateIndexBufferSize() { | 263 | size_t Maxwell3D::EstimateIndexBufferSize() { |
| 260 | GPUVAddr start_address = regs.index_buffer.StartAddress(); | 264 | GPUVAddr start_address = regs.index_buffer.StartAddress(); |
| 261 | GPUVAddr end_address = regs.index_buffer.EndAddress(); | 265 | GPUVAddr end_address = regs.index_buffer.EndAddress(); |
| 262 | static constexpr std::array<size_t, 4> max_sizes = { | 266 | static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(), |
| 263 | std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), | 267 | std::numeric_limits<u16>::max(), |
| 264 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | 268 | std::numeric_limits<u32>::max()}; |
| 265 | const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); | 269 | const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); |
| 270 | const size_t log2_byte_size = Common::Log2Ceil64(byte_size); | ||
| 266 | return std::min<size_t>( | 271 | return std::min<size_t>( |
| 267 | memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / | 272 | memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / |
| 268 | byte_size, | 273 | byte_size, |
| 269 | static_cast<size_t>(end_address - start_address)); | 274 | static_cast<size_t>(end_address - start_address)); |
| 270 | } | 275 | } |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c390ac91b..3b2f6aab6 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -4,13 +4,20 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <condition_variable> | ||
| 7 | #include <cstring> | 8 | #include <cstring> |
| 8 | #include <deque> | 9 | #include <deque> |
| 9 | #include <functional> | 10 | #include <functional> |
| 10 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 13 | #include <thread> | ||
| 11 | #include <queue> | 14 | #include <queue> |
| 12 | 15 | ||
| 13 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "common/scope_exit.h" | ||
| 19 | #include "common/settings.h" | ||
| 20 | #include "common/thread.h" | ||
| 14 | #include "video_core/delayed_destruction_ring.h" | 21 | #include "video_core/delayed_destruction_ring.h" |
| 15 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 16 | #include "video_core/host1x/host1x.h" | 23 | #include "video_core/host1x/host1x.h" |
| @@ -23,15 +30,26 @@ class FenceBase { | |||
| 23 | public: | 30 | public: |
| 24 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} | 31 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} |
| 25 | 32 | ||
| 33 | bool IsStubbed() const { | ||
| 34 | return is_stubbed; | ||
| 35 | } | ||
| 36 | |||
| 26 | protected: | 37 | protected: |
| 27 | bool is_stubbed; | 38 | bool is_stubbed; |
| 28 | }; | 39 | }; |
| 29 | 40 | ||
| 30 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | 41 | template <typename Traits> |
| 31 | class FenceManager { | 42 | class FenceManager { |
| 43 | using TFence = typename Traits::FenceType; | ||
| 44 | using TTextureCache = typename Traits::TextureCacheType; | ||
| 45 | using TBufferCache = typename Traits::BufferCacheType; | ||
| 46 | using TQueryCache = typename Traits::QueryCacheType; | ||
| 47 | static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK; | ||
| 48 | |||
| 32 | public: | 49 | public: |
| 33 | /// Notify the fence manager about a new frame | 50 | /// Notify the fence manager about a new frame |
| 34 | void TickFrame() { | 51 | void TickFrame() { |
| 52 | std::unique_lock lock(ring_guard); | ||
| 35 | delayed_destruction_ring.Tick(); | 53 | delayed_destruction_ring.Tick(); |
| 36 | } | 54 | } |
| 37 | 55 | ||
| @@ -46,17 +64,33 @@ public: | |||
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | void SignalFence(std::function<void()>&& func) { | 66 | void SignalFence(std::function<void()>&& func) { |
| 49 | TryReleasePendingFences(); | 67 | rasterizer.InvalidateGPUCache(); |
| 68 | bool delay_fence = Settings::IsGPULevelHigh(); | ||
| 69 | if constexpr (!can_async_check) { | ||
| 70 | TryReleasePendingFences<false>(); | ||
| 71 | } | ||
| 50 | const bool should_flush = ShouldFlush(); | 72 | const bool should_flush = ShouldFlush(); |
| 51 | CommitAsyncFlushes(); | 73 | CommitAsyncFlushes(); |
| 52 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 53 | CommitOperations(); | ||
| 54 | TFence new_fence = CreateFence(!should_flush); | 74 | TFence new_fence = CreateFence(!should_flush); |
| 55 | fences.push(new_fence); | 75 | if constexpr (can_async_check) { |
| 76 | guard.lock(); | ||
| 77 | } | ||
| 78 | if (delay_fence) { | ||
| 79 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 80 | } | ||
| 81 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 56 | QueueFence(new_fence); | 82 | QueueFence(new_fence); |
| 83 | if (!delay_fence) { | ||
| 84 | func(); | ||
| 85 | } | ||
| 86 | fences.push(std::move(new_fence)); | ||
| 57 | if (should_flush) { | 87 | if (should_flush) { |
| 58 | rasterizer.FlushCommands(); | 88 | rasterizer.FlushCommands(); |
| 59 | } | 89 | } |
| 90 | if constexpr (can_async_check) { | ||
| 91 | guard.unlock(); | ||
| 92 | cv.notify_all(); | ||
| 93 | } | ||
| 60 | } | 94 | } |
| 61 | 95 | ||
| 62 | void SignalSyncPoint(u32 value) { | 96 | void SignalSyncPoint(u32 value) { |
| @@ -66,29 +100,30 @@ public: | |||
| 66 | } | 100 | } |
| 67 | 101 | ||
| 68 | void WaitPendingFences() { | 102 | void WaitPendingFences() { |
| 69 | while (!fences.empty()) { | 103 | if constexpr (!can_async_check) { |
| 70 | TFence& current_fence = fences.front(); | 104 | TryReleasePendingFences<true>(); |
| 71 | if (ShouldWait()) { | ||
| 72 | WaitFence(current_fence); | ||
| 73 | } | ||
| 74 | PopAsyncFlushes(); | ||
| 75 | auto operations = std::move(pending_operations.front()); | ||
| 76 | pending_operations.pop_front(); | ||
| 77 | for (auto& operation : operations) { | ||
| 78 | operation(); | ||
| 79 | } | ||
| 80 | PopFence(); | ||
| 81 | } | 105 | } |
| 82 | } | 106 | } |
| 83 | 107 | ||
| 84 | protected: | 108 | protected: |
| 85 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 109 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 86 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 110 | TTextureCache& texture_cache_, TBufferCache& buffer_cache_, |
| 87 | TQueryCache& query_cache_) | 111 | TQueryCache& query_cache_) |
| 88 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, | 112 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, |
| 89 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} | 113 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} { |
| 114 | if constexpr (can_async_check) { | ||
| 115 | fence_thread = | ||
| 116 | std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); }); | ||
| 117 | } | ||
| 118 | } | ||
| 90 | 119 | ||
| 91 | virtual ~FenceManager() = default; | 120 | virtual ~FenceManager() { |
| 121 | if constexpr (can_async_check) { | ||
| 122 | fence_thread.request_stop(); | ||
| 123 | cv.notify_all(); | ||
| 124 | fence_thread.join(); | ||
| 125 | } | ||
| 126 | } | ||
| 92 | 127 | ||
| 93 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is | 128 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is |
| 94 | /// true | 129 | /// true |
| @@ -104,15 +139,20 @@ protected: | |||
| 104 | Tegra::GPU& gpu; | 139 | Tegra::GPU& gpu; |
| 105 | Tegra::Host1x::SyncpointManager& syncpoint_manager; | 140 | Tegra::Host1x::SyncpointManager& syncpoint_manager; |
| 106 | TTextureCache& texture_cache; | 141 | TTextureCache& texture_cache; |
| 107 | TTBufferCache& buffer_cache; | 142 | TBufferCache& buffer_cache; |
| 108 | TQueryCache& query_cache; | 143 | TQueryCache& query_cache; |
| 109 | 144 | ||
| 110 | private: | 145 | private: |
| 146 | template <bool force_wait> | ||
| 111 | void TryReleasePendingFences() { | 147 | void TryReleasePendingFences() { |
| 112 | while (!fences.empty()) { | 148 | while (!fences.empty()) { |
| 113 | TFence& current_fence = fences.front(); | 149 | TFence& current_fence = fences.front(); |
| 114 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | 150 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { |
| 115 | return; | 151 | if constexpr (force_wait) { |
| 152 | WaitFence(current_fence); | ||
| 153 | } else { | ||
| 154 | return; | ||
| 155 | } | ||
| 116 | } | 156 | } |
| 117 | PopAsyncFlushes(); | 157 | PopAsyncFlushes(); |
| 118 | auto operations = std::move(pending_operations.front()); | 158 | auto operations = std::move(pending_operations.front()); |
| @@ -120,7 +160,49 @@ private: | |||
| 120 | for (auto& operation : operations) { | 160 | for (auto& operation : operations) { |
| 121 | operation(); | 161 | operation(); |
| 122 | } | 162 | } |
| 123 | PopFence(); | 163 | { |
| 164 | std::unique_lock lock(ring_guard); | ||
| 165 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 166 | } | ||
| 167 | fences.pop(); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | void ReleaseThreadFunc(std::stop_token stop_token) { | ||
| 172 | std::string name = "GPUFencingThread"; | ||
| 173 | MicroProfileOnThreadCreate(name.c_str()); | ||
| 174 | |||
| 175 | // Cleanup | ||
| 176 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||
| 177 | |||
| 178 | Common::SetCurrentThreadName(name.c_str()); | ||
| 179 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||
| 180 | |||
| 181 | TFence current_fence; | ||
| 182 | std::deque<std::function<void()>> current_operations; | ||
| 183 | while (!stop_token.stop_requested()) { | ||
| 184 | { | ||
| 185 | std::unique_lock lock(guard); | ||
| 186 | cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); }); | ||
| 187 | if (stop_token.stop_requested()) [[unlikely]] { | ||
| 188 | return; | ||
| 189 | } | ||
| 190 | current_fence = std::move(fences.front()); | ||
| 191 | current_operations = std::move(pending_operations.front()); | ||
| 192 | fences.pop(); | ||
| 193 | pending_operations.pop_front(); | ||
| 194 | } | ||
| 195 | if (!current_fence->IsStubbed()) { | ||
| 196 | WaitFence(current_fence); | ||
| 197 | } | ||
| 198 | PopAsyncFlushes(); | ||
| 199 | for (auto& operation : current_operations) { | ||
| 200 | operation(); | ||
| 201 | } | ||
| 202 | { | ||
| 203 | std::unique_lock lock(ring_guard); | ||
| 204 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 205 | } | ||
| 124 | } | 206 | } |
| 125 | } | 207 | } |
| 126 | 208 | ||
| @@ -154,19 +236,16 @@ private: | |||
| 154 | query_cache.CommitAsyncFlushes(); | 236 | query_cache.CommitAsyncFlushes(); |
| 155 | } | 237 | } |
| 156 | 238 | ||
| 157 | void PopFence() { | ||
| 158 | delayed_destruction_ring.Push(std::move(fences.front())); | ||
| 159 | fences.pop(); | ||
| 160 | } | ||
| 161 | |||
| 162 | void CommitOperations() { | ||
| 163 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 164 | } | ||
| 165 | |||
| 166 | std::queue<TFence> fences; | 239 | std::queue<TFence> fences; |
| 167 | std::deque<std::function<void()>> uncommitted_operations; | 240 | std::deque<std::function<void()>> uncommitted_operations; |
| 168 | std::deque<std::deque<std::function<void()>>> pending_operations; | 241 | std::deque<std::deque<std::function<void()>>> pending_operations; |
| 169 | 242 | ||
| 243 | std::mutex guard; | ||
| 244 | std::mutex ring_guard; | ||
| 245 | std::condition_variable cv; | ||
| 246 | |||
| 247 | std::jthread fence_thread; | ||
| 248 | |||
| 170 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | 249 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; |
| 171 | }; | 250 | }; |
| 172 | 251 | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 01fb5b546..7b2cde7a7 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -82,6 +82,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { | 84 | PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { |
| 85 | std::unique_lock<std::mutex> lock(guard); | ||
| 85 | return kind_map.GetValueAt(gpu_addr); | 86 | return kind_map.GetValueAt(gpu_addr); |
| 86 | } | 87 | } |
| 87 | 88 | ||
| @@ -160,7 +161,10 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 160 | } | 161 | } |
| 161 | remaining_size -= big_page_size; | 162 | remaining_size -= big_page_size; |
| 162 | } | 163 | } |
| 163 | kind_map.Map(gpu_addr, gpu_addr + size, kind); | 164 | { |
| 165 | std::unique_lock<std::mutex> lock(guard); | ||
| 166 | kind_map.Map(gpu_addr, gpu_addr + size, kind); | ||
| 167 | } | ||
| 164 | return gpu_addr; | 168 | return gpu_addr; |
| 165 | } | 169 | } |
| 166 | 170 | ||
| @@ -553,6 +557,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 553 | } | 557 | } |
| 554 | 558 | ||
| 555 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { | 559 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { |
| 560 | std::unique_lock<std::mutex> lock(guard); | ||
| 556 | return kind_map.GetContinuousSizeFrom(gpu_addr); | 561 | return kind_map.GetContinuousSizeFrom(gpu_addr); |
| 557 | } | 562 | } |
| 558 | 563 | ||
| @@ -745,10 +750,10 @@ void MemoryManager::FlushCaching() { | |||
| 745 | return; | 750 | return; |
| 746 | } | 751 | } |
| 747 | accumulator->Callback([this](GPUVAddr addr, size_t size) { | 752 | accumulator->Callback([this](GPUVAddr addr, size_t size) { |
| 748 | GetSubmappedRangeImpl<false>(addr, size, page_stash); | 753 | GetSubmappedRangeImpl<false>(addr, size, page_stash2); |
| 749 | }); | 754 | }); |
| 750 | rasterizer->InnerInvalidation(page_stash); | 755 | rasterizer->InnerInvalidation(page_stash2); |
| 751 | page_stash.clear(); | 756 | page_stash2.clear(); |
| 752 | accumulator->Clear(); | 757 | accumulator->Clear(); |
| 753 | } | 758 | } |
| 754 | 759 | ||
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index fbbe856c4..794535122 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <atomic> | 6 | #include <atomic> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <mutex> | ||
| 8 | #include <optional> | 9 | #include <optional> |
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | 11 | ||
| @@ -215,6 +216,9 @@ private: | |||
| 215 | 216 | ||
| 216 | std::vector<u64> big_page_continuous; | 217 | std::vector<u64> big_page_continuous; |
| 217 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | 218 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; |
| 219 | std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; | ||
| 220 | |||
| 221 | mutable std::mutex guard; | ||
| 218 | 222 | ||
| 219 | static constexpr size_t continuous_bits = 64; | 223 | static constexpr size_t continuous_bits = 64; |
| 220 | 224 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 8906ba6d8..941de95c1 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <functional> | ||
| 9 | #include <iterator> | 10 | #include <iterator> |
| 10 | #include <list> | 11 | #include <list> |
| 11 | #include <memory> | 12 | #include <memory> |
| @@ -17,13 +18,19 @@ | |||
| 17 | 18 | ||
| 18 | #include "common/assert.h" | 19 | #include "common/assert.h" |
| 19 | #include "common/settings.h" | 20 | #include "common/settings.h" |
| 21 | #include "core/memory.h" | ||
| 20 | #include "video_core/control/channel_state_cache.h" | 22 | #include "video_core/control/channel_state_cache.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 23 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/memory_manager.h" | 24 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/rasterizer_interface.h" | 25 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | ||
| 24 | 27 | ||
| 25 | namespace VideoCommon { | 28 | namespace VideoCommon { |
| 26 | 29 | ||
| 30 | using AsyncJobId = SlotId; | ||
| 31 | |||
| 32 | static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; | ||
| 33 | |||
| 27 | template <class QueryCache, class HostCounter> | 34 | template <class QueryCache, class HostCounter> |
| 28 | class CounterStreamBase { | 35 | class CounterStreamBase { |
| 29 | public: | 36 | public: |
| @@ -93,9 +100,13 @@ private: | |||
| 93 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 100 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 94 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 101 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 95 | public: | 102 | public: |
| 96 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) | 103 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | 104 | Core::Memory::Memory& cpu_memory_) |
| 98 | VideoCore::QueryType::SamplesPassed}}} {} | 105 | : rasterizer{rasterizer_}, |
| 106 | cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | ||
| 107 | VideoCore::QueryType::SamplesPassed}}} { | ||
| 108 | (void)slot_async_jobs.insert(); // Null value | ||
| 109 | } | ||
| 99 | 110 | ||
| 100 | void InvalidateRegion(VAddr addr, std::size_t size) { | 111 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 101 | std::unique_lock lock{mutex}; | 112 | std::unique_lock lock{mutex}; |
| @@ -126,10 +137,15 @@ public: | |||
| 126 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 137 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); |
| 127 | } | 138 | } |
| 128 | 139 | ||
| 129 | query->BindCounter(Stream(type).Current(), timestamp); | 140 | auto result = query->BindCounter(Stream(type).Current(), timestamp); |
| 130 | if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 141 | if (result) { |
| 131 | AsyncFlushQuery(*cpu_addr); | 142 | auto async_job_id = query->GetAsyncJob(); |
| 143 | auto& async_job = slot_async_jobs[async_job_id]; | ||
| 144 | async_job.collected = true; | ||
| 145 | async_job.value = *result; | ||
| 146 | query->SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 132 | } | 147 | } |
| 148 | AsyncFlushQuery(query, timestamp, lock); | ||
| 133 | } | 149 | } |
| 134 | 150 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 151 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -173,15 +189,18 @@ public: | |||
| 173 | } | 189 | } |
| 174 | 190 | ||
| 175 | void CommitAsyncFlushes() { | 191 | void CommitAsyncFlushes() { |
| 192 | std::unique_lock lock{mutex}; | ||
| 176 | committed_flushes.push_back(uncommitted_flushes); | 193 | committed_flushes.push_back(uncommitted_flushes); |
| 177 | uncommitted_flushes.reset(); | 194 | uncommitted_flushes.reset(); |
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | bool HasUncommittedFlushes() const { | 197 | bool HasUncommittedFlushes() const { |
| 198 | std::unique_lock lock{mutex}; | ||
| 181 | return uncommitted_flushes != nullptr; | 199 | return uncommitted_flushes != nullptr; |
| 182 | } | 200 | } |
| 183 | 201 | ||
| 184 | bool ShouldWaitAsyncFlushes() const { | 202 | bool ShouldWaitAsyncFlushes() const { |
| 203 | std::unique_lock lock{mutex}; | ||
| 185 | if (committed_flushes.empty()) { | 204 | if (committed_flushes.empty()) { |
| 186 | return false; | 205 | return false; |
| 187 | } | 206 | } |
| @@ -189,6 +208,7 @@ public: | |||
| 189 | } | 208 | } |
| 190 | 209 | ||
| 191 | void PopAsyncFlushes() { | 210 | void PopAsyncFlushes() { |
| 211 | std::unique_lock lock{mutex}; | ||
| 192 | if (committed_flushes.empty()) { | 212 | if (committed_flushes.empty()) { |
| 193 | return; | 213 | return; |
| 194 | } | 214 | } |
| @@ -197,15 +217,25 @@ public: | |||
| 197 | committed_flushes.pop_front(); | 217 | committed_flushes.pop_front(); |
| 198 | return; | 218 | return; |
| 199 | } | 219 | } |
| 200 | for (VAddr query_address : *flush_list) { | 220 | for (AsyncJobId async_job_id : *flush_list) { |
| 201 | FlushAndRemoveRegion(query_address, 4); | 221 | AsyncJob& async_job = slot_async_jobs[async_job_id]; |
| 222 | if (!async_job.collected) { | ||
| 223 | FlushAndRemoveRegion(async_job.query_location, 2, true); | ||
| 224 | } | ||
| 202 | } | 225 | } |
| 203 | committed_flushes.pop_front(); | 226 | committed_flushes.pop_front(); |
| 204 | } | 227 | } |
| 205 | 228 | ||
| 206 | private: | 229 | private: |
| 230 | struct AsyncJob { | ||
| 231 | bool collected = false; | ||
| 232 | u64 value = 0; | ||
| 233 | VAddr query_location = 0; | ||
| 234 | std::optional<u64> timestamp{}; | ||
| 235 | }; | ||
| 236 | |||
| 207 | /// Flushes a memory range to guest memory and removes it from the cache. | 237 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 208 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { | 238 | void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) { |
| 209 | const u64 addr_begin = addr; | 239 | const u64 addr_begin = addr; |
| 210 | const u64 addr_end = addr_begin + size; | 240 | const u64 addr_end = addr_begin + size; |
| 211 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { | 241 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { |
| @@ -226,7 +256,16 @@ private: | |||
| 226 | continue; | 256 | continue; |
| 227 | } | 257 | } |
| 228 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); | 258 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 229 | query.Flush(); | 259 | AsyncJobId async_job_id = query.GetAsyncJob(); |
| 260 | auto flush_result = query.Flush(async); | ||
| 261 | if (async_job_id == NULL_ASYNC_JOB_ID) { | ||
| 262 | ASSERT_MSG(false, "This should not be reachable at all"); | ||
| 263 | continue; | ||
| 264 | } | ||
| 265 | AsyncJob& async_job = slot_async_jobs[async_job_id]; | ||
| 266 | async_job.collected = true; | ||
| 267 | async_job.value = flush_result; | ||
| 268 | query.SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 230 | } | 269 | } |
| 231 | std::erase_if(contents, in_range); | 270 | std::erase_if(contents, in_range); |
| 232 | } | 271 | } |
| @@ -253,26 +292,60 @@ private: | |||
| 253 | return found != std::end(contents) ? &*found : nullptr; | 292 | return found != std::end(contents) ? &*found : nullptr; |
| 254 | } | 293 | } |
| 255 | 294 | ||
| 256 | void AsyncFlushQuery(VAddr addr) { | 295 | void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp, |
| 257 | if (!uncommitted_flushes) { | 296 | std::unique_lock<std::recursive_mutex>& lock) { |
| 258 | uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); | 297 | const AsyncJobId new_async_job_id = slot_async_jobs.insert(); |
| 298 | { | ||
| 299 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 300 | query->SetAsyncJob(new_async_job_id); | ||
| 301 | async_job.query_location = query->GetCpuAddr(); | ||
| 302 | async_job.collected = false; | ||
| 303 | |||
| 304 | if (!uncommitted_flushes) { | ||
| 305 | uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>(); | ||
| 306 | } | ||
| 307 | uncommitted_flushes->push_back(new_async_job_id); | ||
| 259 | } | 308 | } |
| 260 | uncommitted_flushes->push_back(addr); | 309 | lock.unlock(); |
| 310 | std::function<void()> operation([this, new_async_job_id, timestamp] { | ||
| 311 | std::unique_lock local_lock{mutex}; | ||
| 312 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 313 | u64 value = async_job.value; | ||
| 314 | VAddr address = async_job.query_location; | ||
| 315 | slot_async_jobs.erase(new_async_job_id); | ||
| 316 | local_lock.unlock(); | ||
| 317 | if (timestamp) { | ||
| 318 | u64 timestamp_value = *timestamp; | ||
| 319 | cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | ||
| 320 | cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||
| 321 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | ||
| 322 | VideoCommon::CacheType::NoQueryCache); | ||
| 323 | } else { | ||
| 324 | u32 small_value = static_cast<u32>(value); | ||
| 325 | cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||
| 326 | rasterizer.InvalidateRegion(address, sizeof(u32), | ||
| 327 | VideoCommon::CacheType::NoQueryCache); | ||
| 328 | } | ||
| 329 | }); | ||
| 330 | rasterizer.SyncOperation(std::move(operation)); | ||
| 261 | } | 331 | } |
| 262 | 332 | ||
| 263 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; | 333 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; |
| 264 | static constexpr unsigned YUZU_PAGEBITS = 12; | 334 | static constexpr unsigned YUZU_PAGEBITS = 12; |
| 265 | 335 | ||
| 336 | SlotVector<AsyncJob> slot_async_jobs; | ||
| 337 | |||
| 266 | VideoCore::RasterizerInterface& rasterizer; | 338 | VideoCore::RasterizerInterface& rasterizer; |
| 339 | Core::Memory::Memory& cpu_memory; | ||
| 267 | 340 | ||
| 268 | std::recursive_mutex mutex; | 341 | mutable std::recursive_mutex mutex; |
| 269 | 342 | ||
| 270 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 343 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 271 | 344 | ||
| 272 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 345 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 273 | 346 | ||
| 274 | std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; | 347 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |
| 275 | std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; | 348 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |
| 276 | }; | 349 | }; |
| 277 | 350 | ||
| 278 | template <class QueryCache, class HostCounter> | 351 | template <class QueryCache, class HostCounter> |
| @@ -291,12 +364,12 @@ public: | |||
| 291 | virtual ~HostCounterBase() = default; | 364 | virtual ~HostCounterBase() = default; |
| 292 | 365 | ||
| 293 | /// Returns the current value of the query. | 366 | /// Returns the current value of the query. |
| 294 | u64 Query() { | 367 | u64 Query(bool async = false) { |
| 295 | if (result) { | 368 | if (result) { |
| 296 | return *result; | 369 | return *result; |
| 297 | } | 370 | } |
| 298 | 371 | ||
| 299 | u64 value = BlockingQuery() + base_result; | 372 | u64 value = BlockingQuery(async) + base_result; |
| 300 | if (dependency) { | 373 | if (dependency) { |
| 301 | value += dependency->Query(); | 374 | value += dependency->Query(); |
| 302 | dependency = nullptr; | 375 | dependency = nullptr; |
| @@ -317,7 +390,7 @@ public: | |||
| 317 | 390 | ||
| 318 | protected: | 391 | protected: |
| 319 | /// Returns the value of query from the backend API blocking as needed. | 392 | /// Returns the value of query from the backend API blocking as needed. |
| 320 | virtual u64 BlockingQuery() const = 0; | 393 | virtual u64 BlockingQuery(bool async = false) const = 0; |
| 321 | 394 | ||
| 322 | private: | 395 | private: |
| 323 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | 396 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. |
| @@ -340,26 +413,33 @@ public: | |||
| 340 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | 413 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; |
| 341 | 414 | ||
| 342 | /// Flushes the query to guest memory. | 415 | /// Flushes the query to guest memory. |
| 343 | virtual void Flush() { | 416 | virtual u64 Flush(bool async = false) { |
| 344 | // When counter is nullptr it means that it's just been reset. We are supposed to write a | 417 | // When counter is nullptr it means that it's just been reset. We are supposed to write a |
| 345 | // zero in these cases. | 418 | // zero in these cases. |
| 346 | const u64 value = counter ? counter->Query() : 0; | 419 | const u64 value = counter ? counter->Query(async) : 0; |
| 420 | if (async) { | ||
| 421 | return value; | ||
| 422 | } | ||
| 347 | std::memcpy(host_ptr, &value, sizeof(u64)); | 423 | std::memcpy(host_ptr, &value, sizeof(u64)); |
| 348 | 424 | ||
| 349 | if (timestamp) { | 425 | if (timestamp) { |
| 350 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | 426 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); |
| 351 | } | 427 | } |
| 428 | return value; | ||
| 352 | } | 429 | } |
| 353 | 430 | ||
| 354 | /// Binds a counter to this query. | 431 | /// Binds a counter to this query. |
| 355 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | 432 | std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_, |
| 433 | std::optional<u64> timestamp_) { | ||
| 434 | std::optional<u64> result{}; | ||
| 356 | if (counter) { | 435 | if (counter) { |
| 357 | // If there's an old counter set it means the query is being rewritten by the game. | 436 | // If there's an old counter set it means the query is being rewritten by the game. |
| 358 | // To avoid losing the data forever, flush here. | 437 | // To avoid losing the data forever, flush here. |
| 359 | Flush(); | 438 | result = std::make_optional(Flush()); |
| 360 | } | 439 | } |
| 361 | counter = std::move(counter_); | 440 | counter = std::move(counter_); |
| 362 | timestamp = timestamp_; | 441 | timestamp = timestamp_; |
| 442 | return result; | ||
| 363 | } | 443 | } |
| 364 | 444 | ||
| 365 | VAddr GetCpuAddr() const noexcept { | 445 | VAddr GetCpuAddr() const noexcept { |
| @@ -374,6 +454,14 @@ public: | |||
| 374 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | 454 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; |
| 375 | } | 455 | } |
| 376 | 456 | ||
| 457 | void SetAsyncJob(AsyncJobId assigned_async_job_) { | ||
| 458 | assigned_async_job = assigned_async_job_; | ||
| 459 | } | ||
| 460 | |||
| 461 | AsyncJobId GetAsyncJob() const { | ||
| 462 | return assigned_async_job; | ||
| 463 | } | ||
| 464 | |||
| 377 | protected: | 465 | protected: |
| 378 | /// Returns true when querying the counter may potentially block. | 466 | /// Returns true when querying the counter may potentially block. |
| 379 | bool WaitPending() const noexcept { | 467 | bool WaitPending() const noexcept { |
| @@ -389,6 +477,7 @@ private: | |||
| 389 | u8* host_ptr; ///< Writable host pointer. | 477 | u8* host_ptr; ///< Writable host pointer. |
| 390 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | 478 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. |
| 391 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | 479 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. |
| 480 | AsyncJobId assigned_async_job; | ||
| 392 | }; | 481 | }; |
| 393 | 482 | ||
| 394 | } // namespace VideoCommon | 483 | } // namespace VideoCommon |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a8c3f8b67..18d3c3ac0 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -200,6 +201,8 @@ private: | |||
| 200 | struct BufferCacheParams { | 201 | struct BufferCacheParams { |
| 201 | using Runtime = OpenGL::BufferCacheRuntime; | 202 | using Runtime = OpenGL::BufferCacheRuntime; |
| 202 | using Buffer = OpenGL::Buffer; | 203 | using Buffer = OpenGL::Buffer; |
| 204 | using Async_Buffer = u32; | ||
| 205 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 203 | 206 | ||
| 204 | static constexpr bool IS_OPENGL = true; | 207 | static constexpr bool IS_OPENGL = true; |
| 205 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | 208 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |
| @@ -208,6 +211,7 @@ struct BufferCacheParams { | |||
| 208 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 211 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 209 | static constexpr bool USE_MEMORY_MAPS = false; | 212 | static constexpr bool USE_MEMORY_MAPS = false; |
| 210 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 213 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 214 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||
| 211 | }; | 215 | }; |
| 212 | 216 | ||
| 213 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 217 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp new file mode 100644 index 000000000..f15ae8e25 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>; | ||
| 9 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 22ed16ebf..400c21981 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -108,7 +108,8 @@ bool IsASTCSupported() { | |||
| 108 | 108 | ||
| 109 | [[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { | 109 | [[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { |
| 110 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 110 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 111 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | 111 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool") || |
| 112 | Settings::values.renderer_debug.GetValue(); | ||
| 112 | } | 113 | } |
| 113 | } // Anonymous namespace | 114 | } // Anonymous namespace |
| 114 | 115 | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index f1446e732..e21b19dcc 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -30,7 +30,17 @@ private: | |||
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | using Fence = std::shared_ptr<GLInnerFence>; | 32 | using Fence = std::shared_ptr<GLInnerFence>; |
| 33 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 33 | |
| 34 | struct FenceManagerParams { | ||
| 35 | using FenceType = Fence; | ||
| 36 | using BufferCacheType = BufferCache; | ||
| 37 | using TextureCacheType = TextureCache; | ||
| 38 | using QueryCacheType = QueryCache; | ||
| 39 | |||
| 40 | static constexpr bool HAS_ASYNC_CHECK = false; | ||
| 41 | }; | ||
| 42 | |||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 34 | 44 | ||
| 35 | class FenceManagerOpenGL final : public GenericFenceManager { | 45 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 36 | public: | 46 | public: |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 5070db441..99d7347f5 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 26 | 26 | ||
| 27 | } // Anonymous namespace | 27 | } // Anonymous namespace |
| 28 | 28 | ||
| 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_) | 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) |
| 30 | : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {} | 30 | : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} |
| 31 | 31 | ||
| 32 | QueryCache::~QueryCache() = default; | 32 | QueryCache::~QueryCache() = default; |
| 33 | 33 | ||
| @@ -74,7 +74,7 @@ void HostCounter::EndQuery() { | |||
| 74 | glEndQuery(GetTarget(type)); | 74 | glEndQuery(GetTarget(type)); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | u64 HostCounter::BlockingQuery() const { | 77 | u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const { |
| 78 | GLint64 value; | 78 | GLint64 value; |
| 79 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); | 79 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); |
| 80 | return static_cast<u64>(value); | 80 | return static_cast<u64>(value); |
| @@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { | |||
| 96 | return *this; | 96 | return *this; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | void CachedQuery::Flush() { | 99 | u64 CachedQuery::Flush([[maybe_unused]] bool async) { |
| 100 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. | 100 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. |
| 101 | // To avoid this disable and re-enable keeping the dependency stream. | 101 | // To avoid this disable and re-enable keeping the dependency stream. |
| 102 | // But we only have to do this if we have pending waits to be done. | 102 | // But we only have to do this if we have pending waits to be done. |
| @@ -106,11 +106,13 @@ void CachedQuery::Flush() { | |||
| 106 | stream.Update(false); | 106 | stream.Update(false); |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | VideoCommon::CachedQueryBase<HostCounter>::Flush(); | 109 | auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); |
| 110 | 110 | ||
| 111 | if (slice_counter) { | 111 | if (slice_counter) { |
| 112 | stream.Update(true); | 112 | stream.Update(true); |
| 113 | } | 113 | } |
| 114 | |||
| 115 | return result; | ||
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | } // namespace OpenGL | 118 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 14ce59990..872513f22 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| 28 | class QueryCache final | 28 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 30 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_); | 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); |
| 32 | ~QueryCache(); | 32 | ~QueryCache(); |
| 33 | 33 | ||
| 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); | 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
| @@ -51,7 +51,7 @@ public: | |||
| 51 | void EndQuery(); | 51 | void EndQuery(); |
| 52 | 52 | ||
| 53 | private: | 53 | private: |
| 54 | u64 BlockingQuery() const override; | 54 | u64 BlockingQuery(bool async = false) const override; |
| 55 | 55 | ||
| 56 | QueryCache& cache; | 56 | QueryCache& cache; |
| 57 | const VideoCore::QueryType type; | 57 | const VideoCore::QueryType type; |
| @@ -70,7 +70,7 @@ public: | |||
| 70 | CachedQuery(const CachedQuery&) = delete; | 70 | CachedQuery(const CachedQuery&) = delete; |
| 71 | CachedQuery& operator=(const CachedQuery&) = delete; | 71 | CachedQuery& operator=(const CachedQuery&) = delete; |
| 72 | 72 | ||
| 73 | void Flush() override; | 73 | u64 Flush(bool async = false) override; |
| 74 | 74 | ||
| 75 | private: | 75 | private: |
| 76 | QueryCache* cache; | 76 | QueryCache* cache; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 90e35e307..0089b4b27 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 63 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 63 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 64 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | 64 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, |
| 65 | state_tracker, gpu.ShaderNotify()), | 65 | state_tracker, gpu.ShaderNotify()), |
| 66 | query_cache(*this), accelerate_dma(buffer_cache, texture_cache), | 66 | query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), |
| 67 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 67 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 68 | blit_image(program_manager_) {} | 68 | blit_image(program_manager_) {} |
| 69 | 69 | ||
| @@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 1287 | } | 1287 | } |
| 1288 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); | 1288 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); |
| 1289 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | 1289 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; |
| 1290 | const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing | 1290 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 1291 | : VideoCommon::ObtainBufferOperation::MarkAsWritten; | ||
| 1292 | const auto [buffer, offset] = | 1291 | const auto [buffer, offset] = |
| 1293 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); | 1292 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); |
| 1294 | 1293 | ||
| @@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 1299 | if constexpr (IS_IMAGE_UPLOAD) { | 1298 | if constexpr (IS_IMAGE_UPLOAD) { |
| 1300 | image->UploadMemory(buffer->Handle(), offset, copy_span); | 1299 | image->UploadMemory(buffer->Handle(), offset, copy_span); |
| 1301 | } else { | 1300 | } else { |
| 1302 | image->DownloadMemory(buffer->Handle(), offset, copy_span); | 1301 | texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, |
| 1302 | buffer_operand.address, buffer_size); | ||
| 1303 | } | 1303 | } |
| 1304 | return true; | 1304 | return true; |
| 1305 | } | 1305 | } |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0b9c4a904..052456f61 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 803 | 803 | ||
| 804 | void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, | 804 | void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, |
| 805 | std::span<const VideoCommon::BufferImageCopy> copies) { | 805 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 806 | std::array buffer_handles{buffer_handle}; | ||
| 807 | std::array buffer_offsets{buffer_offset}; | ||
| 808 | DownloadMemory(buffer_handles, buffer_offsets, copies); | ||
| 809 | } | ||
| 810 | |||
| 811 | void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets, | ||
| 812 | std::span<const VideoCommon::BufferImageCopy> copies) { | ||
| 806 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | 813 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); |
| 807 | if (is_rescaled) { | 814 | if (is_rescaled) { |
| 808 | ScaleDown(); | 815 | ScaleDown(); |
| 809 | } | 816 | } |
| 810 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 817 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 811 | glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); | 818 | for (size_t i = 0; i < buffer_handles.size(); i++) { |
| 812 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 819 | auto& buffer_handle = buffer_handles[i]; |
| 820 | glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); | ||
| 821 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||
| 813 | 822 | ||
| 814 | u32 current_row_length = std::numeric_limits<u32>::max(); | 823 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 815 | u32 current_image_height = std::numeric_limits<u32>::max(); | 824 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 816 | 825 | ||
| 817 | for (const VideoCommon::BufferImageCopy& copy : copies) { | 826 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 818 | if (copy.image_subresource.base_level >= gl_num_levels) { | 827 | if (copy.image_subresource.base_level >= gl_num_levels) { |
| 819 | continue; | 828 | continue; |
| 820 | } | 829 | } |
| 821 | if (current_row_length != copy.buffer_row_length) { | 830 | if (current_row_length != copy.buffer_row_length) { |
| 822 | current_row_length = copy.buffer_row_length; | 831 | current_row_length = copy.buffer_row_length; |
| 823 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); | 832 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); |
| 824 | } | 833 | } |
| 825 | if (current_image_height != copy.buffer_image_height) { | 834 | if (current_image_height != copy.buffer_image_height) { |
| 826 | current_image_height = copy.buffer_image_height; | 835 | current_image_height = copy.buffer_image_height; |
| 827 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | 836 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); |
| 837 | } | ||
| 838 | CopyImageToBuffer(copy, buffer_offsets[i]); | ||
| 828 | } | 839 | } |
| 829 | CopyImageToBuffer(copy, buffer_offset); | ||
| 830 | } | 840 | } |
| 831 | if (is_rescaled) { | 841 | if (is_rescaled) { |
| 832 | ScaleUp(true); | 842 | ScaleUp(true); |
| @@ -851,9 +861,12 @@ GLuint Image::StorageHandle() noexcept { | |||
| 851 | case PixelFormat::ASTC_2D_8X5_SRGB: | 861 | case PixelFormat::ASTC_2D_8X5_SRGB: |
| 852 | case PixelFormat::ASTC_2D_5X4_SRGB: | 862 | case PixelFormat::ASTC_2D_5X4_SRGB: |
| 853 | case PixelFormat::ASTC_2D_5X5_SRGB: | 863 | case PixelFormat::ASTC_2D_5X5_SRGB: |
| 864 | case PixelFormat::ASTC_2D_10X5_SRGB: | ||
| 865 | case PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 854 | case PixelFormat::ASTC_2D_10X8_SRGB: | 866 | case PixelFormat::ASTC_2D_10X8_SRGB: |
| 855 | case PixelFormat::ASTC_2D_6X6_SRGB: | 867 | case PixelFormat::ASTC_2D_6X6_SRGB: |
| 856 | case PixelFormat::ASTC_2D_10X10_SRGB: | 868 | case PixelFormat::ASTC_2D_10X10_SRGB: |
| 869 | case PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 857 | case PixelFormat::ASTC_2D_12X12_SRGB: | 870 | case PixelFormat::ASTC_2D_12X12_SRGB: |
| 858 | case PixelFormat::ASTC_2D_8X6_SRGB: | 871 | case PixelFormat::ASTC_2D_8X6_SRGB: |
| 859 | case PixelFormat::ASTC_2D_6X5_SRGB: | 872 | case PixelFormat::ASTC_2D_6X5_SRGB: |
| @@ -1113,7 +1126,8 @@ bool Image::ScaleDown(bool ignore) { | |||
| 1113 | 1126 | ||
| 1114 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 1127 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 1115 | ImageId image_id_, Image& image, const SlotVector<Image>&) | 1128 | ImageId image_id_, Image& image, const SlotVector<Image>&) |
| 1116 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { | 1129 | : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr}, |
| 1130 | views{runtime.null_image_views} { | ||
| 1117 | const Device& device = runtime.device; | 1131 | const Device& device = runtime.device; |
| 1118 | if (True(image.flags & ImageFlagBits::Converted)) { | 1132 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 1119 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 1133 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| @@ -1204,12 +1218,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 1204 | 1218 | ||
| 1205 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 1219 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 1206 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | 1220 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) |
| 1207 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | 1221 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, |
| 1208 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | 1222 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} |
| 1209 | 1223 | ||
| 1210 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 1224 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 1211 | const VideoCommon::ImageViewInfo& view_info) | 1225 | const VideoCommon::ImageViewInfo& view_info) |
| 1212 | : VideoCommon::ImageViewBase{info, view_info} {} | 1226 | : VideoCommon::ImageViewBase{info, view_info, 0} {} |
| 1213 | 1227 | ||
| 1214 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) | 1228 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) |
| 1215 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} | 1229 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| @@ -1269,7 +1283,7 @@ GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { | |||
| 1269 | ApplySwizzle(view.handle, format, casted_swizzle); | 1283 | ApplySwizzle(view.handle, format, casted_swizzle); |
| 1270 | } | 1284 | } |
| 1271 | if (set_object_label) { | 1285 | if (set_object_label) { |
| 1272 | const std::string name = VideoCommon::Name(*this); | 1286 | const std::string name = VideoCommon::Name(*this, gpu_addr); |
| 1273 | glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data()); | 1287 | glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data()); |
| 1274 | } | 1288 | } |
| 1275 | return view.handle; | 1289 | return view.handle; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 911e4607a..1190999a8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -215,6 +215,9 @@ public: | |||
| 215 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, | 215 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, |
| 216 | std::span<const VideoCommon::BufferImageCopy> copies); | 216 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 217 | 217 | ||
| 218 | void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, | ||
| 219 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 220 | |||
| 218 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | 221 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 219 | 222 | ||
| 220 | GLuint StorageHandle() noexcept; | 223 | GLuint StorageHandle() noexcept; |
| @@ -311,7 +314,6 @@ private: | |||
| 311 | std::unique_ptr<StorageViews> storage_views; | 314 | std::unique_ptr<StorageViews> storage_views; |
| 312 | GLenum internal_format = GL_NONE; | 315 | GLenum internal_format = GL_NONE; |
| 313 | GLuint default_handle = 0; | 316 | GLuint default_handle = 0; |
| 314 | GPUVAddr gpu_addr = 0; | ||
| 315 | u32 buffer_size = 0; | 317 | u32 buffer_size = 0; |
| 316 | GLuint original_texture = 0; | 318 | GLuint original_texture = 0; |
| 317 | int num_samples = 0; | 319 | int num_samples = 0; |
| @@ -376,6 +378,7 @@ struct TextureCacheParams { | |||
| 376 | using Sampler = OpenGL::Sampler; | 378 | using Sampler = OpenGL::Sampler; |
| 377 | using Framebuffer = OpenGL::Framebuffer; | 379 | using Framebuffer = OpenGL::Framebuffer; |
| 378 | using AsyncBuffer = u32; | 380 | using AsyncBuffer = u32; |
| 381 | using BufferType = GLuint; | ||
| 379 | }; | 382 | }; |
| 380 | 383 | ||
| 381 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | 384 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ef1190e1f..c7dc7e0a1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -100,10 +100,13 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB | |||
| 100 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | 100 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM |
| 101 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | 101 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB |
| 102 | {GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM | 102 | {GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM |
| 103 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR}, // ASTC_2D_10X6_SRGB | ||
| 103 | {GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM | 104 | {GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM |
| 104 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB | 105 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB |
| 105 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | 106 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM |
| 106 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | 107 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB |
| 108 | {GL_COMPRESSED_RGBA_ASTC_12x10_KHR}, // ASTC_2D_12X10_UNORM | ||
| 109 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR}, // ASTC_2D_12X10_SRGB | ||
| 107 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | 110 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM |
| 108 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | 111 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB |
| 109 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | 112 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 5dce51be8..8853cf0f7 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -197,10 +197,13 @@ struct FormatTuple { | |||
| 197 | {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM | 197 | {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM |
| 198 | {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB | 198 | {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB |
| 199 | {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM | 199 | {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM |
| 200 | {VK_FORMAT_ASTC_10x6_SRGB_BLOCK}, // ASTC_2D_10X6_SRGB | ||
| 200 | {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM | 201 | {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM |
| 201 | {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB | 202 | {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB |
| 202 | {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM | 203 | {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM |
| 203 | {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB | 204 | {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB |
| 205 | {VK_FORMAT_ASTC_12x10_UNORM_BLOCK}, // ASTC_2D_12X10_UNORM | ||
| 206 | {VK_FORMAT_ASTC_12x10_SRGB_BLOCK}, // ASTC_2D_12X10_SRGB | ||
| 204 | {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM | 207 | {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM |
| 205 | {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB | 208 | {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB |
| 206 | {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM | 209 | {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2a8d9e377..8e31eba34 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -88,13 +88,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 88 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 88 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 89 | Settings::values.renderer_debug.GetValue())), | 89 | Settings::values.renderer_debug.GetValue())), |
| 90 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 90 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), |
| 91 | surface(CreateSurface(instance, render_window)), | 91 | surface(CreateSurface(instance, render_window.GetWindowInfo())), |
| 92 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), | 92 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), |
| 93 | state_tracker(), scheduler(device, state_tracker), | 93 | state_tracker(), scheduler(device, state_tracker), |
| 94 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | 94 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, |
| 95 | render_window.GetFramebufferLayout().height, false), | 95 | render_window.GetFramebufferLayout().height, false), |
| 96 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | 96 | present_manager(render_window, device, memory_allocator, scheduler, swapchain), |
| 97 | screen_info), | 97 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, |
| 98 | scheduler, screen_info), | ||
| 98 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | 99 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, |
| 99 | state_tracker, scheduler) { | 100 | state_tracker, scheduler) { |
| 100 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | 101 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { |
| @@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 121 | return; | 122 | return; |
| 122 | } | 123 | } |
| 123 | // Update screen info if the framebuffer size has changed. | 124 | // Update screen info if the framebuffer size has changed. |
| 124 | if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) { | 125 | screen_info.width = framebuffer->width; |
| 125 | screen_info.width = framebuffer->width; | 126 | screen_info.height = framebuffer->height; |
| 126 | screen_info.height = framebuffer->height; | 127 | |
| 127 | } | ||
| 128 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 128 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 129 | const bool use_accelerated = | 129 | const bool use_accelerated = |
| 130 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 130 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 131 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 131 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 132 | RenderScreenshot(*framebuffer, use_accelerated); | 132 | RenderScreenshot(*framebuffer, use_accelerated); |
| 133 | 133 | ||
| 134 | bool has_been_recreated = false; | 134 | Frame* frame = present_manager.GetRenderFrame(); |
| 135 | const auto recreate_swapchain = [&](u32 width, u32 height) { | 135 | blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); |
| 136 | if (!has_been_recreated) { | 136 | scheduler.Flush(*frame->render_ready); |
| 137 | has_been_recreated = true; | 137 | present_manager.Present(frame); |
| 138 | scheduler.Finish(); | ||
| 139 | } | ||
| 140 | swapchain.Create(width, height, is_srgb); | ||
| 141 | }; | ||
| 142 | |||
| 143 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); | ||
| 144 | if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width || | ||
| 145 | swapchain.GetHeight() != layout.height) { | ||
| 146 | recreate_swapchain(layout.width, layout.height); | ||
| 147 | } | ||
| 148 | bool is_outdated; | ||
| 149 | do { | ||
| 150 | swapchain.AcquireNextImage(); | ||
| 151 | is_outdated = swapchain.IsOutDated(); | ||
| 152 | if (is_outdated) { | ||
| 153 | recreate_swapchain(layout.width, layout.height); | ||
| 154 | } | ||
| 155 | } while (is_outdated); | ||
| 156 | if (has_been_recreated) { | ||
| 157 | blit_screen.Recreate(); | ||
| 158 | } | ||
| 159 | const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); | ||
| 160 | const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); | ||
| 161 | scheduler.Flush(render_semaphore, present_semaphore); | ||
| 162 | scheduler.WaitWorker(); | ||
| 163 | swapchain.Present(render_semaphore); | ||
| 164 | 138 | ||
| 165 | gpu.RendererFrameEndNotify(); | 139 | gpu.RendererFrameEndNotify(); |
| 166 | rasterizer.TickFrame(); | 140 | rasterizer.TickFrame(); |
| @@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 246 | }); | 220 | }); |
| 247 | const VkExtent2D render_area{.width = layout.width, .height = layout.height}; | 221 | const VkExtent2D render_area{.width = layout.width, .height = layout.height}; |
| 248 | const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); | 222 | const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); |
| 249 | // Since we're not rendering to the screen, ignore the render semaphore. | 223 | blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated); |
| 250 | void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated)); | ||
| 251 | 224 | ||
| 252 | const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); | 225 | const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); |
| 253 | const VkBufferCreateInfo dst_buffer_info{ | 226 | const VkBufferCreateInfo dst_buffer_info{ |
| @@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 270 | .pNext = nullptr, | 243 | .pNext = nullptr, |
| 271 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | 244 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| 272 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | 245 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 273 | .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, | 246 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 274 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | 247 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, |
| 275 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 248 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 276 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 249 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 009e75e0d..f44367cb2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/dynamic_library.h" | 9 | #include "common/dynamic_library.h" |
| 10 | #include "video_core/renderer_base.h" | 10 | #include "video_core/renderer_base.h" |
| 11 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 11 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 12 | #include "video_core/renderer_vulkan/vk_present_manager.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 13 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| @@ -76,6 +77,7 @@ private: | |||
| 76 | StateTracker state_tracker; | 77 | StateTracker state_tracker; |
| 77 | Scheduler scheduler; | 78 | Scheduler scheduler; |
| 78 | Swapchain swapchain; | 79 | Swapchain swapchain; |
| 80 | PresentManager present_manager; | ||
| 79 | BlitScreen blit_screen; | 81 | BlitScreen blit_screen; |
| 80 | RasterizerVulkan rasterizer; | 82 | RasterizerVulkan rasterizer; |
| 81 | std::optional<TurboMode> turbo_mode; | 83 | std::optional<TurboMode> turbo_mode; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2f0cc27e8..1e0fdd3d9 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -122,10 +122,12 @@ struct BlitScreen::BufferData { | |||
| 122 | 122 | ||
| 123 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | 123 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, |
| 124 | const Device& device_, MemoryAllocator& memory_allocator_, | 124 | const Device& device_, MemoryAllocator& memory_allocator_, |
| 125 | Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_) | 125 | Swapchain& swapchain_, PresentManager& present_manager_, |
| 126 | Scheduler& scheduler_, const ScreenInfo& screen_info_) | ||
| 126 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | 127 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, |
| 127 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, | 128 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, |
| 128 | image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 129 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_}, |
| 130 | current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} { | ||
| 129 | resource_ticks.resize(image_count); | 131 | resource_ticks.resize(image_count); |
| 130 | 132 | ||
| 131 | CreateStaticResources(); | 133 | CreateStaticResources(); |
| @@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin | |||
| 135 | BlitScreen::~BlitScreen() = default; | 137 | BlitScreen::~BlitScreen() = default; |
| 136 | 138 | ||
| 137 | void BlitScreen::Recreate() { | 139 | void BlitScreen::Recreate() { |
| 140 | present_manager.WaitPresent(); | ||
| 141 | scheduler.Finish(); | ||
| 142 | device.GetLogical().WaitIdle(); | ||
| 138 | CreateDynamicResources(); | 143 | CreateDynamicResources(); |
| 139 | } | 144 | } |
| 140 | 145 | ||
| 141 | VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | 146 | void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, |
| 142 | const VkFramebuffer& host_framebuffer, | 147 | const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, |
| 143 | const Layout::FramebufferLayout layout, VkExtent2D render_area, | 148 | VkExtent2D render_area, bool use_accelerated) { |
| 144 | bool use_accelerated) { | ||
| 145 | RefreshResources(framebuffer); | 149 | RefreshResources(framebuffer); |
| 146 | 150 | ||
| 147 | // Finish any pending renderpass | 151 | // Finish any pending renderpass |
| 148 | scheduler.RequestOutsideRenderPassOperationContext(); | 152 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 149 | 153 | ||
| 150 | if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) { | ||
| 151 | image_count = swapchain_images; | ||
| 152 | Recreate(); | ||
| 153 | } | ||
| 154 | |||
| 155 | const std::size_t image_index = swapchain.GetImageIndex(); | ||
| 156 | |||
| 157 | scheduler.Wait(resource_ticks[image_index]); | 154 | scheduler.Wait(resource_ticks[image_index]); |
| 158 | resource_ticks[image_index] = scheduler.CurrentTick(); | 155 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 159 | 156 | ||
| @@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 169 | std::memcpy(mapped_span.data(), &data, sizeof(data)); | 166 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 170 | 167 | ||
| 171 | if (!use_accelerated) { | 168 | if (!use_accelerated) { |
| 172 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 169 | const u64 image_offset = GetRawImageOffset(framebuffer); |
| 173 | 170 | ||
| 174 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 171 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 175 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 172 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| @@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 204 | .depth = 1, | 201 | .depth = 1, |
| 205 | }, | 202 | }, |
| 206 | }; | 203 | }; |
| 207 | scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { | 204 | scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { |
| 208 | const VkImage image = *raw_images[image_index]; | 205 | const VkImage image = *raw_images[index]; |
| 209 | const VkImageMemoryBarrier base_barrier{ | 206 | const VkImageMemoryBarrier base_barrier{ |
| 210 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 207 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 211 | .pNext = nullptr, | 208 | .pNext = nullptr, |
| @@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 245 | 242 | ||
| 246 | const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); | 243 | const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); |
| 247 | if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { | 244 | if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { |
| 248 | UpdateAADescriptorSet(image_index, source_image_view, false); | 245 | UpdateAADescriptorSet(source_image_view, false); |
| 249 | const u32 up_scale = Settings::values.resolution_info.up_scale; | 246 | const u32 up_scale = Settings::values.resolution_info.up_scale; |
| 250 | const u32 down_shift = Settings::values.resolution_info.down_shift; | 247 | const u32 down_shift = Settings::values.resolution_info.down_shift; |
| 251 | VkExtent2D size{ | 248 | VkExtent2D size{ |
| 252 | .width = (up_scale * framebuffer.width) >> down_shift, | 249 | .width = (up_scale * framebuffer.width) >> down_shift, |
| 253 | .height = (up_scale * framebuffer.height) >> down_shift, | 250 | .height = (up_scale * framebuffer.height) >> down_shift, |
| 254 | }; | 251 | }; |
| 255 | scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { | 252 | scheduler.Record([this, index = image_index, size, |
| 253 | anti_alias_pass](vk::CommandBuffer cmdbuf) { | ||
| 256 | const VkImageMemoryBarrier base_barrier{ | 254 | const VkImageMemoryBarrier base_barrier{ |
| 257 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 255 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 258 | .pNext = nullptr, | 256 | .pNext = nullptr, |
| @@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 326 | 324 | ||
| 327 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); | 325 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 328 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, | 326 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, |
| 329 | aa_descriptor_sets[image_index], {}); | 327 | aa_descriptor_sets[index], {}); |
| 330 | cmdbuf.Draw(4, 1, 0, 0); | 328 | cmdbuf.Draw(4, 1, 0, 0); |
| 331 | cmdbuf.EndRenderPass(); | 329 | cmdbuf.EndRenderPass(); |
| 332 | 330 | ||
| @@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 369 | }; | 367 | }; |
| 370 | VkImageView fsr_image_view = | 368 | VkImageView fsr_image_view = |
| 371 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); | 369 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); |
| 372 | UpdateDescriptorSet(image_index, fsr_image_view, true); | 370 | UpdateDescriptorSet(fsr_image_view, true); |
| 373 | } else { | 371 | } else { |
| 374 | const bool is_nn = | 372 | const bool is_nn = |
| 375 | Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; | 373 | Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; |
| 376 | UpdateDescriptorSet(image_index, source_image_view, is_nn); | 374 | UpdateDescriptorSet(source_image_view, is_nn); |
| 377 | } | 375 | } |
| 378 | 376 | ||
| 379 | scheduler.Record( | 377 | scheduler.Record([this, host_framebuffer, index = image_index, |
| 380 | [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { | 378 | size = render_area](vk::CommandBuffer cmdbuf) { |
| 381 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | 379 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; |
| 382 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; | 380 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; |
| 383 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; | 381 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; |
| 384 | const VkClearValue clear_color{ | 382 | const VkClearValue clear_color{ |
| 385 | .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, | 383 | .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, |
| 386 | }; | 384 | }; |
| 387 | const VkRenderPassBeginInfo renderpass_bi{ | 385 | const VkRenderPassBeginInfo renderpass_bi{ |
| 388 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 386 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 389 | .pNext = nullptr, | 387 | .pNext = nullptr, |
| 390 | .renderPass = *renderpass, | 388 | .renderPass = *renderpass, |
| 391 | .framebuffer = host_framebuffer, | 389 | .framebuffer = host_framebuffer, |
| 392 | .renderArea = | 390 | .renderArea = |
| 393 | { | 391 | { |
| 394 | .offset = {0, 0}, | 392 | .offset = {0, 0}, |
| 395 | .extent = size, | 393 | .extent = size, |
| 396 | }, | 394 | }, |
| 397 | .clearValueCount = 1, | 395 | .clearValueCount = 1, |
| 398 | .pClearValues = &clear_color, | 396 | .pClearValues = &clear_color, |
| 399 | }; | 397 | }; |
| 400 | const VkViewport viewport{ | 398 | const VkViewport viewport{ |
| 401 | .x = 0.0f, | 399 | .x = 0.0f, |
| 402 | .y = 0.0f, | 400 | .y = 0.0f, |
| 403 | .width = static_cast<float>(size.width), | 401 | .width = static_cast<float>(size.width), |
| 404 | .height = static_cast<float>(size.height), | 402 | .height = static_cast<float>(size.height), |
| 405 | .minDepth = 0.0f, | 403 | .minDepth = 0.0f, |
| 406 | .maxDepth = 1.0f, | 404 | .maxDepth = 1.0f, |
| 407 | }; | 405 | }; |
| 408 | const VkRect2D scissor{ | 406 | const VkRect2D scissor{ |
| 409 | .offset = {0, 0}, | 407 | .offset = {0, 0}, |
| 410 | .extent = size, | 408 | .extent = size, |
| 411 | }; | 409 | }; |
| 412 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 410 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 413 | auto graphics_pipeline = [this]() { | 411 | auto graphics_pipeline = [this]() { |
| 414 | switch (Settings::values.scaling_filter.GetValue()) { | 412 | switch (Settings::values.scaling_filter.GetValue()) { |
| 415 | case Settings::ScalingFilter::NearestNeighbor: | 413 | case Settings::ScalingFilter::NearestNeighbor: |
| 416 | case Settings::ScalingFilter::Bilinear: | 414 | case Settings::ScalingFilter::Bilinear: |
| 417 | return *bilinear_pipeline; | 415 | return *bilinear_pipeline; |
| 418 | case Settings::ScalingFilter::Bicubic: | 416 | case Settings::ScalingFilter::Bicubic: |
| 419 | return *bicubic_pipeline; | 417 | return *bicubic_pipeline; |
| 420 | case Settings::ScalingFilter::Gaussian: | 418 | case Settings::ScalingFilter::Gaussian: |
| 421 | return *gaussian_pipeline; | 419 | return *gaussian_pipeline; |
| 422 | case Settings::ScalingFilter::ScaleForce: | 420 | case Settings::ScalingFilter::ScaleForce: |
| 423 | return *scaleforce_pipeline; | 421 | return *scaleforce_pipeline; |
| 424 | default: | 422 | default: |
| 425 | return *bilinear_pipeline; | 423 | return *bilinear_pipeline; |
| 426 | } | 424 | } |
| 427 | }(); | 425 | }(); |
| 428 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); | 426 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); |
| 429 | cmdbuf.SetViewport(0, viewport); | 427 | cmdbuf.SetViewport(0, viewport); |
| 430 | cmdbuf.SetScissor(0, scissor); | 428 | cmdbuf.SetScissor(0, scissor); |
| 431 | 429 | ||
| 432 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); | 430 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 433 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | 431 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, |
| 434 | descriptor_sets[image_index], {}); | 432 | descriptor_sets[index], {}); |
| 435 | cmdbuf.Draw(4, 1, 0, 0); | 433 | cmdbuf.Draw(4, 1, 0, 0); |
| 436 | cmdbuf.EndRenderPass(); | 434 | cmdbuf.EndRenderPass(); |
| 437 | }); | 435 | }); |
| 438 | return *semaphores[image_index]; | ||
| 439 | } | 436 | } |
| 440 | 437 | ||
| 441 | VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, | 438 | void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, |
| 442 | bool use_accelerated) { | 439 | bool use_accelerated, bool is_srgb) { |
| 443 | const std::size_t image_index = swapchain.GetImageIndex(); | 440 | // Recreate dynamic resources if the the image count or colorspace changed |
| 444 | const VkExtent2D render_area = swapchain.GetSize(); | 441 | if (const std::size_t swapchain_images = swapchain.GetImageCount(); |
| 442 | swapchain_images != image_count || current_srgb != is_srgb) { | ||
| 443 | current_srgb = is_srgb; | ||
| 444 | image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; | ||
| 445 | image_count = swapchain_images; | ||
| 446 | Recreate(); | ||
| 447 | } | ||
| 448 | |||
| 449 | // Recreate the presentation frame if the dimensions of the window changed | ||
| 445 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); | 450 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); |
| 446 | return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); | 451 | if (layout.width != frame->width || layout.height != frame->height || |
| 452 | is_srgb != frame->is_srgb) { | ||
| 453 | Recreate(); | ||
| 454 | present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb, | ||
| 455 | image_view_format, *renderpass); | ||
| 456 | } | ||
| 457 | |||
| 458 | const VkExtent2D render_area{frame->width, frame->height}; | ||
| 459 | Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated); | ||
| 460 | if (++image_index >= image_count) { | ||
| 461 | image_index = 0; | ||
| 462 | } | ||
| 447 | } | 463 | } |
| 448 | 464 | ||
| 449 | vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { | 465 | vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { |
| @@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() { | |||
| 471 | } | 487 | } |
| 472 | 488 | ||
| 473 | void BlitScreen::CreateDynamicResources() { | 489 | void BlitScreen::CreateDynamicResources() { |
| 474 | CreateSemaphores(); | ||
| 475 | CreateDescriptorPool(); | 490 | CreateDescriptorPool(); |
| 476 | CreateDescriptorSetLayout(); | 491 | CreateDescriptorSetLayout(); |
| 477 | CreateDescriptorSets(); | 492 | CreateDescriptorSets(); |
| 478 | CreatePipelineLayout(); | 493 | CreatePipelineLayout(); |
| 479 | CreateRenderPass(); | 494 | CreateRenderPass(); |
| 480 | CreateFramebuffers(); | ||
| 481 | CreateGraphicsPipeline(); | 495 | CreateGraphicsPipeline(); |
| 482 | fsr.reset(); | 496 | fsr.reset(); |
| 483 | smaa.reset(); | 497 | smaa.reset(); |
| @@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() { | |||
| 525 | } | 539 | } |
| 526 | } | 540 | } |
| 527 | 541 | ||
| 528 | void BlitScreen::CreateSemaphores() { | ||
| 529 | semaphores.resize(image_count); | ||
| 530 | std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); | ||
| 531 | } | ||
| 532 | |||
| 533 | void BlitScreen::CreateDescriptorPool() { | 542 | void BlitScreen::CreateDescriptorPool() { |
| 534 | const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ | 543 | const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ |
| 535 | { | 544 | { |
| @@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() { | |||
| 571 | } | 580 | } |
| 572 | 581 | ||
| 573 | void BlitScreen::CreateRenderPass() { | 582 | void BlitScreen::CreateRenderPass() { |
| 574 | renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); | 583 | renderpass = CreateRenderPassImpl(image_view_format); |
| 575 | } | 584 | } |
| 576 | 585 | ||
| 577 | vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { | 586 | vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) { |
| 578 | const VkAttachmentDescription color_attachment{ | 587 | const VkAttachmentDescription color_attachment{ |
| 579 | .flags = 0, | 588 | .flags = 0, |
| 580 | .format = format, | 589 | .format = format, |
| @@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present | |||
| 584 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, | 593 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, |
| 585 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, | 594 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, |
| 586 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 595 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 587 | .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, | 596 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 588 | }; | 597 | }; |
| 589 | 598 | ||
| 590 | const VkAttachmentReference color_attachment_ref{ | 599 | const VkAttachmentReference color_attachment_ref{ |
| @@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() { | |||
| 1052 | nn_sampler = device.GetLogical().CreateSampler(ci_nn); | 1061 | nn_sampler = device.GetLogical().CreateSampler(ci_nn); |
| 1053 | } | 1062 | } |
| 1054 | 1063 | ||
| 1055 | void BlitScreen::CreateFramebuffers() { | ||
| 1056 | const VkExtent2D size{swapchain.GetSize()}; | ||
| 1057 | framebuffers.resize(image_count); | ||
| 1058 | |||
| 1059 | for (std::size_t i = 0; i < image_count; ++i) { | ||
| 1060 | const VkImageView image_view{swapchain.GetImageViewIndex(i)}; | ||
| 1061 | framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | void BlitScreen::ReleaseRawImages() { | 1064 | void BlitScreen::ReleaseRawImages() { |
| 1066 | for (const u64 tick : resource_ticks) { | 1065 | for (const u64 tick : resource_ticks) { |
| 1067 | scheduler.Wait(tick); | 1066 | scheduler.Wait(tick); |
| @@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1175 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | 1174 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); |
| 1176 | return; | 1175 | return; |
| 1177 | } | 1176 | } |
| 1178 | aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); | 1177 | aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer)); |
| 1179 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | 1178 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); |
| 1180 | 1179 | ||
| 1181 | const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ | 1180 | const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ |
| @@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1319 | aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); | 1318 | aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); |
| 1320 | } | 1319 | } |
| 1321 | 1320 | ||
| 1322 | void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, | 1321 | void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const { |
| 1323 | bool nn) const { | ||
| 1324 | const VkDescriptorImageInfo image_info{ | 1322 | const VkDescriptorImageInfo image_info{ |
| 1325 | .sampler = nn ? *nn_sampler : *sampler, | 1323 | .sampler = nn ? *nn_sampler : *sampler, |
| 1326 | .imageView = image_view, | 1324 | .imageView = image_view, |
| @@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag | |||
| 1356 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); | 1354 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); |
| 1357 | } | 1355 | } |
| 1358 | 1356 | ||
| 1359 | void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, | 1357 | void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { |
| 1360 | bool nn) const { | ||
| 1361 | const VkDescriptorBufferInfo buffer_info{ | 1358 | const VkDescriptorBufferInfo buffer_info{ |
| 1362 | .buffer = *buffer, | 1359 | .buffer = *buffer, |
| 1363 | .offset = offsetof(BufferData, uniform), | 1360 | .offset = offsetof(BufferData, uniform), |
| @@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) | |||
| 1480 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; | 1477 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; |
| 1481 | } | 1478 | } |
| 1482 | 1479 | ||
| 1483 | u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, | 1480 | u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { |
| 1484 | std::size_t image_index) const { | ||
| 1485 | constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); | 1481 | constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); |
| 1486 | return first_image_offset + GetSizeInBytes(framebuffer) * image_index; | 1482 | return first_image_offset + GetSizeInBytes(framebuffer) * image_index; |
| 1487 | } | 1483 | } |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ebe10b08b..68ec20253 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | 7 | ||
| 8 | #include "core/frontend/framebuffer_layout.h" | ||
| 8 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 9 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 10 | 11 | ||
| @@ -42,6 +43,9 @@ class RasterizerVulkan; | |||
| 42 | class Scheduler; | 43 | class Scheduler; |
| 43 | class SMAA; | 44 | class SMAA; |
| 44 | class Swapchain; | 45 | class Swapchain; |
| 46 | class PresentManager; | ||
| 47 | |||
| 48 | struct Frame; | ||
| 45 | 49 | ||
| 46 | struct ScreenInfo { | 50 | struct ScreenInfo { |
| 47 | VkImage image{}; | 51 | VkImage image{}; |
| @@ -55,18 +59,17 @@ class BlitScreen { | |||
| 55 | public: | 59 | public: |
| 56 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, | 60 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, |
| 57 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | 61 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, |
| 58 | Scheduler& scheduler, const ScreenInfo& screen_info); | 62 | PresentManager& present_manager, Scheduler& scheduler, |
| 63 | const ScreenInfo& screen_info); | ||
| 59 | ~BlitScreen(); | 64 | ~BlitScreen(); |
| 60 | 65 | ||
| 61 | void Recreate(); | 66 | void Recreate(); |
| 62 | 67 | ||
| 63 | [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, | 68 | void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, |
| 64 | const VkFramebuffer& host_framebuffer, | 69 | const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); |
| 65 | const Layout::FramebufferLayout layout, VkExtent2D render_area, | ||
| 66 | bool use_accelerated); | ||
| 67 | 70 | ||
| 68 | [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, | 71 | void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, |
| 69 | bool use_accelerated); | 72 | bool use_accelerated, bool is_srgb); |
| 70 | 73 | ||
| 71 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, | 74 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, |
| 72 | VkExtent2D extent); | 75 | VkExtent2D extent); |
| @@ -79,10 +82,9 @@ private: | |||
| 79 | 82 | ||
| 80 | void CreateStaticResources(); | 83 | void CreateStaticResources(); |
| 81 | void CreateShaders(); | 84 | void CreateShaders(); |
| 82 | void CreateSemaphores(); | ||
| 83 | void CreateDescriptorPool(); | 85 | void CreateDescriptorPool(); |
| 84 | void CreateRenderPass(); | 86 | void CreateRenderPass(); |
| 85 | vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); | 87 | vk::RenderPass CreateRenderPassImpl(VkFormat format); |
| 86 | void CreateDescriptorSetLayout(); | 88 | void CreateDescriptorSetLayout(); |
| 87 | void CreateDescriptorSets(); | 89 | void CreateDescriptorSets(); |
| 88 | void CreatePipelineLayout(); | 90 | void CreatePipelineLayout(); |
| @@ -90,15 +92,14 @@ private: | |||
| 90 | void CreateSampler(); | 92 | void CreateSampler(); |
| 91 | 93 | ||
| 92 | void CreateDynamicResources(); | 94 | void CreateDynamicResources(); |
| 93 | void CreateFramebuffers(); | ||
| 94 | 95 | ||
| 95 | void RefreshResources(const Tegra::FramebufferConfig& framebuffer); | 96 | void RefreshResources(const Tegra::FramebufferConfig& framebuffer); |
| 96 | void ReleaseRawImages(); | 97 | void ReleaseRawImages(); |
| 97 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); | 98 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); |
| 98 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); | 99 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); |
| 99 | 100 | ||
| 100 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; | 101 | void UpdateDescriptorSet(VkImageView image_view, bool nn) const; |
| 101 | void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; | 102 | void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; |
| 102 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; | 103 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; |
| 103 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, | 104 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, |
| 104 | const Layout::FramebufferLayout layout) const; | 105 | const Layout::FramebufferLayout layout) const; |
| @@ -107,16 +108,17 @@ private: | |||
| 107 | void CreateFSR(); | 108 | void CreateFSR(); |
| 108 | 109 | ||
| 109 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | 110 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |
| 110 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, | 111 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; |
| 111 | std::size_t image_index) const; | ||
| 112 | 112 | ||
| 113 | Core::Memory::Memory& cpu_memory; | 113 | Core::Memory::Memory& cpu_memory; |
| 114 | Core::Frontend::EmuWindow& render_window; | 114 | Core::Frontend::EmuWindow& render_window; |
| 115 | const Device& device; | 115 | const Device& device; |
| 116 | MemoryAllocator& memory_allocator; | 116 | MemoryAllocator& memory_allocator; |
| 117 | Swapchain& swapchain; | 117 | Swapchain& swapchain; |
| 118 | PresentManager& present_manager; | ||
| 118 | Scheduler& scheduler; | 119 | Scheduler& scheduler; |
| 119 | std::size_t image_count; | 120 | std::size_t image_count; |
| 121 | std::size_t image_index{}; | ||
| 120 | const ScreenInfo& screen_info; | 122 | const ScreenInfo& screen_info; |
| 121 | 123 | ||
| 122 | vk::ShaderModule vertex_shader; | 124 | vk::ShaderModule vertex_shader; |
| @@ -135,7 +137,6 @@ private: | |||
| 135 | vk::Pipeline gaussian_pipeline; | 137 | vk::Pipeline gaussian_pipeline; |
| 136 | vk::Pipeline scaleforce_pipeline; | 138 | vk::Pipeline scaleforce_pipeline; |
| 137 | vk::RenderPass renderpass; | 139 | vk::RenderPass renderpass; |
| 138 | std::vector<vk::Framebuffer> framebuffers; | ||
| 139 | vk::DescriptorSets descriptor_sets; | 140 | vk::DescriptorSets descriptor_sets; |
| 140 | vk::Sampler nn_sampler; | 141 | vk::Sampler nn_sampler; |
| 141 | vk::Sampler sampler; | 142 | vk::Sampler sampler; |
| @@ -145,7 +146,6 @@ private: | |||
| 145 | 146 | ||
| 146 | std::vector<u64> resource_ticks; | 147 | std::vector<u64> resource_ticks; |
| 147 | 148 | ||
| 148 | std::vector<vk::Semaphore> semaphores; | ||
| 149 | std::vector<vk::Image> raw_images; | 149 | std::vector<vk::Image> raw_images; |
| 150 | std::vector<vk::ImageView> raw_image_views; | 150 | std::vector<vk::ImageView> raw_image_views; |
| 151 | std::vector<MemoryCommit> raw_buffer_commits; | 151 | std::vector<MemoryCommit> raw_buffer_commits; |
| @@ -164,6 +164,8 @@ private: | |||
| 164 | u32 raw_width = 0; | 164 | u32 raw_width = 0; |
| 165 | u32 raw_height = 0; | 165 | u32 raw_height = 0; |
| 166 | Service::android::PixelFormat pixel_format{}; | 166 | Service::android::PixelFormat pixel_format{}; |
| 167 | bool current_srgb; | ||
| 168 | VkFormat image_view_format; | ||
| 167 | 169 | ||
| 168 | std::unique_ptr<FSR> fsr; | 170 | std::unique_ptr<FSR> fsr; |
| 169 | std::unique_ptr<SMAA> smaa; | 171 | std::unique_ptr<SMAA> smaa; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 9cbcb3c8f..510602e8e 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | |||
| 314 | return staging_pool.Request(size, MemoryUsage::Upload); | 314 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 315 | } | 315 | } |
| 316 | 316 | ||
| 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { |
| 318 | return staging_pool.Request(size, MemoryUsage::Download); | 318 | return staging_pool.Request(size, MemoryUsage::Download, deferred); |
| 319 | } | ||
| 320 | |||
| 321 | void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { | ||
| 322 | staging_pool.FreeDeferred(ref); | ||
| 319 | } | 323 | } |
| 320 | 324 | ||
| 321 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { | 325 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 183b33632..879f1ed94 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -3,7 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "video_core/buffer_cache/buffer_cache.h" | 6 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 7 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 9 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -75,7 +76,9 @@ public: | |||
| 75 | 76 | ||
| 76 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 77 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 77 | 78 | ||
| 78 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 79 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
| 80 | |||
| 81 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); | ||
| 79 | 82 | ||
| 80 | void PreCopyBarrier(); | 83 | void PreCopyBarrier(); |
| 81 | 84 | ||
| @@ -142,6 +145,8 @@ private: | |||
| 142 | struct BufferCacheParams { | 145 | struct BufferCacheParams { |
| 143 | using Runtime = Vulkan::BufferCacheRuntime; | 146 | using Runtime = Vulkan::BufferCacheRuntime; |
| 144 | using Buffer = Vulkan::Buffer; | 147 | using Buffer = Vulkan::Buffer; |
| 148 | using Async_Buffer = Vulkan::StagingBufferRef; | ||
| 149 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 145 | 150 | ||
| 146 | static constexpr bool IS_OPENGL = false; | 151 | static constexpr bool IS_OPENGL = false; |
| 147 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | 152 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |
| @@ -150,6 +155,7 @@ struct BufferCacheParams { | |||
| 150 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 155 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 151 | static constexpr bool USE_MEMORY_MAPS = true; | 156 | static constexpr bool USE_MEMORY_MAPS = true; |
| 152 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | 157 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; |
| 158 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||
| 153 | }; | 159 | }; |
| 154 | 160 | ||
| 155 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 161 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp new file mode 100644 index 000000000..f9e271507 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>; | ||
| 9 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0214b103a..fad9e3832 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 8 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 9 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 9 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | 11 | #include "video_core/vulkan_common/vulkan_device.h" |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7fe2afcd9..145359d4e 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -40,7 +40,16 @@ private: | |||
| 40 | }; | 40 | }; |
| 41 | using Fence = std::shared_ptr<InnerFence>; | 41 | using Fence = std::shared_ptr<InnerFence>; |
| 42 | 42 | ||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 43 | struct FenceManagerParams { |
| 44 | using FenceType = Fence; | ||
| 45 | using BufferCacheType = BufferCache; | ||
| 46 | using TextureCacheType = TextureCache; | ||
| 47 | using QueryCacheType = QueryCache; | ||
| 48 | |||
| 49 | static constexpr bool HAS_ASYNC_CHECK = true; | ||
| 50 | }; | ||
| 51 | |||
| 52 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 44 | 53 | ||
| 45 | class FenceManager final : public GenericFenceManager { | 54 | class FenceManager final : public GenericFenceManager { |
| 46 | public: | 55 | public: |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 985cc3203..a318d643e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -696,6 +696,13 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 696 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | 696 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 697 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, | 697 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 698 | PipelineStatistics* statistics, bool build_in_parallel) try { | 698 | PipelineStatistics* statistics, bool build_in_parallel) try { |
| 699 | // TODO: Remove this when Intel fixes their shader compiler. | ||
| 700 | // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159 | ||
| 701 | if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | ||
| 702 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); | ||
| 703 | return nullptr; | ||
| 704 | } | ||
| 705 | |||
| 699 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 706 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 700 | 707 | ||
| 701 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 708 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp new file mode 100644 index 000000000..c49583013 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp | |||
| @@ -0,0 +1,457 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/microprofile.h" | ||
| 5 | #include "common/settings.h" | ||
| 6 | #include "common/thread.h" | ||
| 7 | #include "video_core/renderer_vulkan/vk_present_manager.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_swapchain.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 11 | |||
| 12 | namespace Vulkan { | ||
| 13 | |||
| 14 | MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); | ||
| 15 | MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192)); | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) { | ||
| 20 | const VkFormatProperties props{physical_device.GetFormatProperties(format)}; | ||
| 21 | return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT); | ||
| 22 | } | ||
| 23 | |||
| 24 | [[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() { | ||
| 25 | return VkImageSubresourceLayers{ | ||
| 26 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 27 | .mipLevel = 0, | ||
| 28 | .baseArrayLayer = 0, | ||
| 29 | .layerCount = 1, | ||
| 30 | }; | ||
| 31 | } | ||
| 32 | |||
| 33 | [[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width, | ||
| 34 | s32 swapchain_height) { | ||
| 35 | return VkImageBlit{ | ||
| 36 | .srcSubresource = MakeImageSubresourceLayers(), | ||
| 37 | .srcOffsets = | ||
| 38 | { | ||
| 39 | { | ||
| 40 | .x = 0, | ||
| 41 | .y = 0, | ||
| 42 | .z = 0, | ||
| 43 | }, | ||
| 44 | { | ||
| 45 | .x = frame_width, | ||
| 46 | .y = frame_height, | ||
| 47 | .z = 1, | ||
| 48 | }, | ||
| 49 | }, | ||
| 50 | .dstSubresource = MakeImageSubresourceLayers(), | ||
| 51 | .dstOffsets = | ||
| 52 | { | ||
| 53 | { | ||
| 54 | .x = 0, | ||
| 55 | .y = 0, | ||
| 56 | .z = 0, | ||
| 57 | }, | ||
| 58 | { | ||
| 59 | .x = swapchain_width, | ||
| 60 | .y = swapchain_height, | ||
| 61 | .z = 1, | ||
| 62 | }, | ||
| 63 | }, | ||
| 64 | }; | ||
| 65 | } | ||
| 66 | |||
| 67 | [[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width, | ||
| 68 | u32 swapchain_height) { | ||
| 69 | return VkImageCopy{ | ||
| 70 | .srcSubresource = MakeImageSubresourceLayers(), | ||
| 71 | .srcOffset = | ||
| 72 | { | ||
| 73 | .x = 0, | ||
| 74 | .y = 0, | ||
| 75 | .z = 0, | ||
| 76 | }, | ||
| 77 | .dstSubresource = MakeImageSubresourceLayers(), | ||
| 78 | .dstOffset = | ||
| 79 | { | ||
| 80 | .x = 0, | ||
| 81 | .y = 0, | ||
| 82 | .z = 0, | ||
| 83 | }, | ||
| 84 | .extent = | ||
| 85 | { | ||
| 86 | .width = std::min(frame_width, swapchain_width), | ||
| 87 | .height = std::min(frame_height, swapchain_height), | ||
| 88 | .depth = 1, | ||
| 89 | }, | ||
| 90 | }; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // Anonymous namespace | ||
| 94 | |||
| 95 | PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_, | ||
| 96 | MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||
| 97 | Swapchain& swapchain_) | ||
| 98 | : render_window{render_window_}, device{device_}, | ||
| 99 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_}, | ||
| 100 | blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, | ||
| 101 | use_present_thread{Settings::values.async_presentation.GetValue()}, | ||
| 102 | image_count{swapchain.GetImageCount()} { | ||
| 103 | |||
| 104 | auto& dld = device.GetLogical(); | ||
| 105 | cmdpool = dld.CreateCommandPool({ | ||
| 106 | .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | ||
| 107 | .pNext = nullptr, | ||
| 108 | .flags = | ||
| 109 | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, | ||
| 110 | .queueFamilyIndex = device.GetGraphicsFamily(), | ||
| 111 | }); | ||
| 112 | auto cmdbuffers = cmdpool.Allocate(image_count); | ||
| 113 | |||
| 114 | frames.resize(image_count); | ||
| 115 | for (u32 i = 0; i < frames.size(); i++) { | ||
| 116 | Frame& frame = frames[i]; | ||
| 117 | frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()}; | ||
| 118 | frame.render_ready = dld.CreateSemaphore({ | ||
| 119 | .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, | ||
| 120 | .pNext = nullptr, | ||
| 121 | .flags = 0, | ||
| 122 | }); | ||
| 123 | frame.present_done = dld.CreateFence({ | ||
| 124 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, | ||
| 125 | .pNext = nullptr, | ||
| 126 | .flags = VK_FENCE_CREATE_SIGNALED_BIT, | ||
| 127 | }); | ||
| 128 | free_queue.push(&frame); | ||
| 129 | } | ||
| 130 | |||
| 131 | if (use_present_thread) { | ||
| 132 | present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); }); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | PresentManager::~PresentManager() = default; | ||
| 137 | |||
| 138 | Frame* PresentManager::GetRenderFrame() { | ||
| 139 | MICROPROFILE_SCOPE(Vulkan_WaitPresent); | ||
| 140 | |||
| 141 | // Wait for free presentation frames | ||
| 142 | std::unique_lock lock{free_mutex}; | ||
| 143 | free_cv.wait(lock, [this] { return !free_queue.empty(); }); | ||
| 144 | |||
| 145 | // Take the frame from the queue | ||
| 146 | Frame* frame = free_queue.front(); | ||
| 147 | free_queue.pop(); | ||
| 148 | |||
| 149 | // Wait for the presentation to be finished so all frame resources are free | ||
| 150 | frame->present_done.Wait(); | ||
| 151 | frame->present_done.Reset(); | ||
| 152 | |||
| 153 | return frame; | ||
| 154 | } | ||
| 155 | |||
| 156 | void PresentManager::Present(Frame* frame) { | ||
| 157 | if (!use_present_thread) { | ||
| 158 | scheduler.WaitWorker(); | ||
| 159 | CopyToSwapchain(frame); | ||
| 160 | free_queue.push(frame); | ||
| 161 | return; | ||
| 162 | } | ||
| 163 | |||
| 164 | scheduler.Record([this, frame](vk::CommandBuffer) { | ||
| 165 | std::unique_lock lock{queue_mutex}; | ||
| 166 | present_queue.push(frame); | ||
| 167 | frame_cv.notify_one(); | ||
| 168 | }); | ||
| 169 | } | ||
| 170 | |||
| 171 | void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, | ||
| 172 | VkFormat image_view_format, VkRenderPass rd) { | ||
| 173 | auto& dld = device.GetLogical(); | ||
| 174 | |||
| 175 | frame->width = width; | ||
| 176 | frame->height = height; | ||
| 177 | frame->is_srgb = is_srgb; | ||
| 178 | |||
| 179 | frame->image = dld.CreateImage({ | ||
| 180 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | ||
| 181 | .pNext = nullptr, | ||
| 182 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, | ||
| 183 | .imageType = VK_IMAGE_TYPE_2D, | ||
| 184 | .format = swapchain.GetImageFormat(), | ||
| 185 | .extent = | ||
| 186 | { | ||
| 187 | .width = width, | ||
| 188 | .height = height, | ||
| 189 | .depth = 1, | ||
| 190 | }, | ||
| 191 | .mipLevels = 1, | ||
| 192 | .arrayLayers = 1, | ||
| 193 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 194 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 195 | .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, | ||
| 196 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 197 | .queueFamilyIndexCount = 0, | ||
| 198 | .pQueueFamilyIndices = nullptr, | ||
| 199 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 200 | }); | ||
| 201 | |||
| 202 | frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal); | ||
| 203 | |||
| 204 | frame->image_view = dld.CreateImageView({ | ||
| 205 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 206 | .pNext = nullptr, | ||
| 207 | .flags = 0, | ||
| 208 | .image = *frame->image, | ||
| 209 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 210 | .format = image_view_format, | ||
| 211 | .components = | ||
| 212 | { | ||
| 213 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 214 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 215 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 216 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 217 | }, | ||
| 218 | .subresourceRange = | ||
| 219 | { | ||
| 220 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 221 | .baseMipLevel = 0, | ||
| 222 | .levelCount = 1, | ||
| 223 | .baseArrayLayer = 0, | ||
| 224 | .layerCount = 1, | ||
| 225 | }, | ||
| 226 | }); | ||
| 227 | |||
| 228 | const VkImageView image_view{*frame->image_view}; | ||
| 229 | frame->framebuffer = dld.CreateFramebuffer({ | ||
| 230 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 231 | .pNext = nullptr, | ||
| 232 | .flags = 0, | ||
| 233 | .renderPass = rd, | ||
| 234 | .attachmentCount = 1, | ||
| 235 | .pAttachments = &image_view, | ||
| 236 | .width = width, | ||
| 237 | .height = height, | ||
| 238 | .layers = 1, | ||
| 239 | }); | ||
| 240 | } | ||
| 241 | |||
| 242 | void PresentManager::WaitPresent() { | ||
| 243 | if (!use_present_thread) { | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | |||
| 247 | // Wait for the present queue to be empty | ||
| 248 | { | ||
| 249 | std::unique_lock queue_lock{queue_mutex}; | ||
| 250 | frame_cv.wait(queue_lock, [this] { return present_queue.empty(); }); | ||
| 251 | } | ||
| 252 | |||
| 253 | // The above condition will be satisfied when the last frame is taken from the queue. | ||
| 254 | // To ensure that frame has been presented as well take hold of the swapchain | ||
| 255 | // mutex. | ||
| 256 | std::scoped_lock swapchain_lock{swapchain_mutex}; | ||
| 257 | } | ||
| 258 | |||
| 259 | void PresentManager::PresentThread(std::stop_token token) { | ||
| 260 | Common::SetCurrentThreadName("VulkanPresent"); | ||
| 261 | while (!token.stop_requested()) { | ||
| 262 | std::unique_lock lock{queue_mutex}; | ||
| 263 | |||
| 264 | // Wait for presentation frames | ||
| 265 | Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); }); | ||
| 266 | if (token.stop_requested()) { | ||
| 267 | return; | ||
| 268 | } | ||
| 269 | |||
| 270 | // Take the frame and notify anyone waiting | ||
| 271 | Frame* frame = present_queue.front(); | ||
| 272 | present_queue.pop(); | ||
| 273 | frame_cv.notify_one(); | ||
| 274 | |||
| 275 | // By exchanging the lock ownership we take the swapchain lock | ||
| 276 | // before the queue lock goes out of scope. This way the swapchain | ||
| 277 | // lock in WaitPresent is guaranteed to occur after here. | ||
| 278 | std::exchange(lock, std::unique_lock{swapchain_mutex}); | ||
| 279 | |||
| 280 | CopyToSwapchain(frame); | ||
| 281 | |||
| 282 | // Free the frame for reuse | ||
| 283 | std::scoped_lock fl{free_mutex}; | ||
| 284 | free_queue.push(frame); | ||
| 285 | free_cv.notify_one(); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | void PresentManager::CopyToSwapchain(Frame* frame) { | ||
| 290 | MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); | ||
| 291 | |||
| 292 | const auto recreate_swapchain = [&] { | ||
| 293 | swapchain.Create(frame->width, frame->height, frame->is_srgb); | ||
| 294 | image_count = swapchain.GetImageCount(); | ||
| 295 | }; | ||
| 296 | |||
| 297 | // If the size or colorspace of the incoming frames has changed, recreate the swapchain | ||
| 298 | // to account for that. | ||
| 299 | const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb); | ||
| 300 | const bool size_changed = | ||
| 301 | swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; | ||
| 302 | if (srgb_changed || size_changed) { | ||
| 303 | recreate_swapchain(); | ||
| 304 | } | ||
| 305 | |||
| 306 | while (swapchain.AcquireNextImage()) { | ||
| 307 | recreate_swapchain(); | ||
| 308 | } | ||
| 309 | |||
| 310 | const vk::CommandBuffer cmdbuf{frame->cmdbuf}; | ||
| 311 | cmdbuf.Begin({ | ||
| 312 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 313 | .pNext = nullptr, | ||
| 314 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 315 | .pInheritanceInfo = nullptr, | ||
| 316 | }); | ||
| 317 | |||
| 318 | const VkImage image{swapchain.CurrentImage()}; | ||
| 319 | const VkExtent2D extent = swapchain.GetExtent(); | ||
| 320 | const std::array pre_barriers{ | ||
| 321 | VkImageMemoryBarrier{ | ||
| 322 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 323 | .pNext = nullptr, | ||
| 324 | .srcAccessMask = 0, | ||
| 325 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 326 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 327 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 328 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 329 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 330 | .image = image, | ||
| 331 | .subresourceRange{ | ||
| 332 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 333 | .baseMipLevel = 0, | ||
| 334 | .levelCount = 1, | ||
| 335 | .baseArrayLayer = 0, | ||
| 336 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 337 | }, | ||
| 338 | }, | ||
| 339 | VkImageMemoryBarrier{ | ||
| 340 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 341 | .pNext = nullptr, | ||
| 342 | .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, | ||
| 343 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 344 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 345 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 346 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 347 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 348 | .image = *frame->image, | ||
| 349 | .subresourceRange{ | ||
| 350 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 351 | .baseMipLevel = 0, | ||
| 352 | .levelCount = 1, | ||
| 353 | .baseArrayLayer = 0, | ||
| 354 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 355 | }, | ||
| 356 | }, | ||
| 357 | }; | ||
| 358 | const std::array post_barriers{ | ||
| 359 | VkImageMemoryBarrier{ | ||
| 360 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 361 | .pNext = nullptr, | ||
| 362 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 363 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, | ||
| 364 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 365 | .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, | ||
| 366 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 367 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 368 | .image = image, | ||
| 369 | .subresourceRange{ | ||
| 370 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 371 | .baseMipLevel = 0, | ||
| 372 | .levelCount = 1, | ||
| 373 | .baseArrayLayer = 0, | ||
| 374 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 375 | }, | ||
| 376 | }, | ||
| 377 | VkImageMemoryBarrier{ | ||
| 378 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 379 | .pNext = nullptr, | ||
| 380 | .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 381 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 382 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 383 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 384 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 385 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 386 | .image = *frame->image, | ||
| 387 | .subresourceRange{ | ||
| 388 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 389 | .baseMipLevel = 0, | ||
| 390 | .levelCount = 1, | ||
| 391 | .baseArrayLayer = 0, | ||
| 392 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 393 | }, | ||
| 394 | }, | ||
| 395 | }; | ||
| 396 | |||
| 397 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {}, | ||
| 398 | {}, {}, pre_barriers); | ||
| 399 | |||
| 400 | if (blit_supported) { | ||
| 401 | cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, | ||
| 402 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 403 | MakeImageBlit(frame->width, frame->height, extent.width, extent.height), | ||
| 404 | VK_FILTER_LINEAR); | ||
| 405 | } else { | ||
| 406 | cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, | ||
| 407 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 408 | MakeImageCopy(frame->width, frame->height, extent.width, extent.height)); | ||
| 409 | } | ||
| 410 | |||
| 411 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {}, | ||
| 412 | {}, {}, post_barriers); | ||
| 413 | |||
| 414 | cmdbuf.End(); | ||
| 415 | |||
| 416 | const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); | ||
| 417 | const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore(); | ||
| 418 | const std::array wait_semaphores = {present_semaphore, *frame->render_ready}; | ||
| 419 | |||
| 420 | static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ | ||
| 421 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||
| 422 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 423 | }; | ||
| 424 | |||
| 425 | const VkSubmitInfo submit_info{ | ||
| 426 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | ||
| 427 | .pNext = nullptr, | ||
| 428 | .waitSemaphoreCount = 2U, | ||
| 429 | .pWaitSemaphores = wait_semaphores.data(), | ||
| 430 | .pWaitDstStageMask = wait_stage_masks.data(), | ||
| 431 | .commandBufferCount = 1, | ||
| 432 | .pCommandBuffers = cmdbuf.address(), | ||
| 433 | .signalSemaphoreCount = 1U, | ||
| 434 | .pSignalSemaphores = &render_semaphore, | ||
| 435 | }; | ||
| 436 | |||
| 437 | // Submit the image copy/blit to the swapchain | ||
| 438 | { | ||
| 439 | std::scoped_lock lock{scheduler.submit_mutex}; | ||
| 440 | switch (const VkResult result = | ||
| 441 | device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) { | ||
| 442 | case VK_SUCCESS: | ||
| 443 | break; | ||
| 444 | case VK_ERROR_DEVICE_LOST: | ||
| 445 | device.ReportLoss(); | ||
| 446 | [[fallthrough]]; | ||
| 447 | default: | ||
| 448 | vk::Check(result); | ||
| 449 | break; | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | // Present | ||
| 454 | swapchain.Present(render_semaphore); | ||
| 455 | } | ||
| 456 | |||
| 457 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h new file mode 100644 index 000000000..420a775e2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <condition_variable> | ||
| 7 | #include <mutex> | ||
| 8 | #include <queue> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/polyfill_thread.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | |||
| 15 | namespace Core::Frontend { | ||
| 16 | class EmuWindow; | ||
| 17 | } // namespace Core::Frontend | ||
| 18 | |||
| 19 | namespace Vulkan { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | class Scheduler; | ||
| 23 | class Swapchain; | ||
| 24 | |||
| 25 | struct Frame { | ||
| 26 | u32 width; | ||
| 27 | u32 height; | ||
| 28 | bool is_srgb; | ||
| 29 | vk::Image image; | ||
| 30 | vk::ImageView image_view; | ||
| 31 | vk::Framebuffer framebuffer; | ||
| 32 | MemoryCommit image_commit; | ||
| 33 | vk::CommandBuffer cmdbuf; | ||
| 34 | vk::Semaphore render_ready; | ||
| 35 | vk::Fence present_done; | ||
| 36 | }; | ||
| 37 | |||
| 38 | class PresentManager { | ||
| 39 | public: | ||
| 40 | PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device, | ||
| 41 | MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain); | ||
| 42 | ~PresentManager(); | ||
| 43 | |||
| 44 | /// Returns the last used presentation frame | ||
| 45 | Frame* GetRenderFrame(); | ||
| 46 | |||
| 47 | /// Pushes a frame for presentation | ||
| 48 | void Present(Frame* frame); | ||
| 49 | |||
| 50 | /// Recreates the present frame to match the provided parameters | ||
| 51 | void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, | ||
| 52 | VkFormat image_view_format, VkRenderPass rd); | ||
| 53 | |||
| 54 | /// Waits for the present thread to finish presenting all queued frames. | ||
| 55 | void WaitPresent(); | ||
| 56 | |||
| 57 | private: | ||
| 58 | void PresentThread(std::stop_token token); | ||
| 59 | |||
| 60 | void CopyToSwapchain(Frame* frame); | ||
| 61 | |||
| 62 | private: | ||
| 63 | Core::Frontend::EmuWindow& render_window; | ||
| 64 | const Device& device; | ||
| 65 | MemoryAllocator& memory_allocator; | ||
| 66 | Scheduler& scheduler; | ||
| 67 | Swapchain& swapchain; | ||
| 68 | vk::CommandPool cmdpool; | ||
| 69 | std::vector<Frame> frames; | ||
| 70 | std::queue<Frame*> present_queue; | ||
| 71 | std::queue<Frame*> free_queue; | ||
| 72 | std::condition_variable_any frame_cv; | ||
| 73 | std::condition_variable free_cv; | ||
| 74 | std::mutex swapchain_mutex; | ||
| 75 | std::mutex queue_mutex; | ||
| 76 | std::mutex free_mutex; | ||
| 77 | std::jthread present_thread; | ||
| 78 | bool blit_supported; | ||
| 79 | bool use_present_thread; | ||
| 80 | std::size_t image_count; | ||
| 81 | }; | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 929c8ece6..d67490449 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | |||
| 66 | } | 66 | } |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 70 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 70 | Scheduler& scheduler_) | 71 | Scheduler& scheduler_) |
| 71 | : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, | 72 | : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, |
| 72 | query_pools{ | 73 | query_pools{ |
| 73 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, |
| 74 | } {} | 75 | } {} |
| @@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend | |||
| 98 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { | 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { |
| 99 | const vk::Device* logical = &cache.GetDevice().GetLogical(); | 100 | const vk::Device* logical = &cache.GetDevice().GetLogical(); |
| 100 | cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { | 101 | cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { |
| 102 | const bool use_precise = Settings::IsGPULevelHigh(); | ||
| 101 | logical->ResetQueryPool(query.first, query.second, 1); | 103 | logical->ResetQueryPool(query.first, query.second, 1); |
| 102 | cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); | 104 | cmdbuf.BeginQuery(query.first, query.second, |
| 105 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | ||
| 103 | }); | 106 | }); |
| 104 | } | 107 | } |
| 105 | 108 | ||
| @@ -112,8 +115,10 @@ void HostCounter::EndQuery() { | |||
| 112 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); | 115 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); |
| 113 | } | 116 | } |
| 114 | 117 | ||
| 115 | u64 HostCounter::BlockingQuery() const { | 118 | u64 HostCounter::BlockingQuery(bool async) const { |
| 116 | cache.GetScheduler().Wait(tick); | 119 | if (!async) { |
| 120 | cache.GetScheduler().Wait(tick); | ||
| 121 | } | ||
| 117 | u64 data; | 122 | u64 data; |
| 118 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 123 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( |
| 119 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 124 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 26762ee09..c1b9552eb 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -52,7 +52,8 @@ private: | |||
| 52 | class QueryCache final | 52 | class QueryCache final |
| 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 54 | public: | 54 | public: |
| 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 56 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 56 | Scheduler& scheduler_); | 57 | Scheduler& scheduler_); |
| 57 | ~QueryCache(); | 58 | ~QueryCache(); |
| 58 | 59 | ||
| @@ -83,7 +84,7 @@ public: | |||
| 83 | void EndQuery(); | 84 | void EndQuery(); |
| 84 | 85 | ||
| 85 | private: | 86 | private: |
| 86 | u64 BlockingQuery() const override; | 87 | u64 BlockingQuery(bool async = false) const override; |
| 87 | 88 | ||
| 88 | QueryCache& cache; | 89 | QueryCache& cache; |
| 89 | const VideoCore::QueryType type; | 90 | const VideoCore::QueryType type; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 673ab478e..d1489fc95 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, | 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 175 | query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), | 175 | query_cache{*this, cpu_memory_, device, scheduler}, |
| 176 | accelerate_dma(buffer_cache, texture_cache, scheduler), | ||
| 176 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 177 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 177 | wfi_event(device.GetLogical().CreateEvent()) { | 178 | wfi_event(device.GetLogical().CreateEvent()) { |
| 178 | scheduler.SetQueryCache(query_cache); | 179 | scheduler.SetQueryCache(query_cache); |
| @@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() { | |||
| 675 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; | 676 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; |
| 676 | Maxwell::ReportSemaphore::Compare cmp; | 677 | Maxwell::ReportSemaphore::Compare cmp; |
| 677 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), | 678 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), |
| 678 | VideoCommon::CacheType::BufferCache)) { | 679 | VideoCommon::CacheType::BufferCache | |
| 680 | VideoCommon::CacheType::QueryCache)) { | ||
| 679 | return true; | 681 | return true; |
| 680 | } | 682 | } |
| 681 | return false; | 683 | return false; |
| @@ -781,8 +783,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 781 | } | 783 | } |
| 782 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); | 784 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); |
| 783 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | 785 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; |
| 784 | const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing | 786 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 785 | : VideoCommon::ObtainBufferOperation::MarkAsWritten; | ||
| 786 | const auto [buffer, offset] = | 787 | const auto [buffer, offset] = |
| 787 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); | 788 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); |
| 788 | 789 | ||
| @@ -793,7 +794,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 793 | if constexpr (IS_IMAGE_UPLOAD) { | 794 | if constexpr (IS_IMAGE_UPLOAD) { |
| 794 | image->UploadMemory(buffer->Handle(), offset, copy_span); | 795 | image->UploadMemory(buffer->Handle(), offset, copy_span); |
| 795 | } else { | 796 | } else { |
| 796 | image->DownloadMemory(buffer->Handle(), offset, copy_span); | 797 | texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, |
| 798 | buffer_operand.address, buffer_size); | ||
| 797 | } | 799 | } |
| 798 | return true; | 800 | return true; |
| 799 | } | 801 | } |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 057e16967..80455ec08 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) | |||
| 46 | 46 | ||
| 47 | Scheduler::~Scheduler() = default; | 47 | Scheduler::~Scheduler() = default; |
| 48 | 48 | ||
| 49 | void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 49 | u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| 50 | // When flushing, we only send data to the worker thread; no waiting is necessary. | 50 | // When flushing, we only send data to the worker thread; no waiting is necessary. |
| 51 | SubmitExecution(signal_semaphore, wait_semaphore); | 51 | const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore); |
| 52 | AllocateNewContext(); | 52 | AllocateNewContext(); |
| 53 | return signal_value; | ||
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 56 | void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| @@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() { | |||
| 205 | }); | 206 | }); |
| 206 | } | 207 | } |
| 207 | 208 | ||
| 208 | void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 209 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| 209 | EndPendingOperations(); | 210 | EndPendingOperations(); |
| 210 | InvalidateState(); | 211 | InvalidateState(); |
| 211 | 212 | ||
| @@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s | |||
| 217 | on_submit(); | 218 | on_submit(); |
| 218 | } | 219 | } |
| 219 | 220 | ||
| 221 | std::scoped_lock lock{submit_mutex}; | ||
| 220 | switch (const VkResult result = master_semaphore->SubmitQueue( | 222 | switch (const VkResult result = master_semaphore->SubmitQueue( |
| 221 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { | 223 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { |
| 222 | case VK_SUCCESS: | 224 | case VK_SUCCESS: |
| @@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s | |||
| 231 | }); | 233 | }); |
| 232 | chunk->MarkSubmit(); | 234 | chunk->MarkSubmit(); |
| 233 | DispatchWork(); | 235 | DispatchWork(); |
| 236 | return signal_value; | ||
| 234 | } | 237 | } |
| 235 | 238 | ||
| 236 | void Scheduler::AllocateNewContext() { | 239 | void Scheduler::AllocateNewContext() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 8d75ce987..475c682eb 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -34,7 +34,7 @@ public: | |||
| 34 | ~Scheduler(); | 34 | ~Scheduler(); |
| 35 | 35 | ||
| 36 | /// Sends the current execution context to the GPU. | 36 | /// Sends the current execution context to the GPU. |
| 37 | void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); | 37 | u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); |
| 38 | 38 | ||
| 39 | /// Sends the current execution context to the GPU and waits for it to complete. | 39 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 40 | void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); | 40 | void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); |
| @@ -106,6 +106,8 @@ public: | |||
| 106 | return *master_semaphore; | 106 | return *master_semaphore; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | std::mutex submit_mutex; | ||
| 110 | |||
| 109 | private: | 111 | private: |
| 110 | class Command { | 112 | class Command { |
| 111 | public: | 113 | public: |
| @@ -201,7 +203,7 @@ private: | |||
| 201 | 203 | ||
| 202 | void AllocateWorkerCommandBuffer(); | 204 | void AllocateWorkerCommandBuffer(); |
| 203 | 205 | ||
| 204 | void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); | 206 | u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); |
| 205 | 207 | ||
| 206 | void AllocateNewContext(); | 208 | void AllocateNewContext(); |
| 207 | 209 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 85fdce6e5..1e80ce463 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 14 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 15 | #include "video_core/vulkan_common/vulkan_device.h" | 15 | #include "video_core/vulkan_common/vulkan_device.h" |
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 17 | #include "vulkan/vulkan_core.h" | ||
| 17 | 18 | ||
| 18 | namespace Vulkan { | 19 | namespace Vulkan { |
| 19 | 20 | ||
| @@ -33,23 +34,47 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) | |||
| 33 | return found != formats.end() ? *found : formats[0]; | 34 | return found != formats.end() ? *found : formats[0]; |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { | 37 | static constexpr VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, |
| 37 | // Mailbox (triple buffering) doesn't lock the application like fifo (vsync), | 38 | bool has_fifo_relaxed) { |
| 38 | // prefer it if vsync option is not selected | 39 | // Mailbox doesn't lock the application like FIFO (vsync) |
| 39 | const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); | 40 | // FIFO present mode locks the framerate to the monitor's refresh rate |
| 40 | if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless && | 41 | Settings::VSyncMode setting = [has_imm, has_mailbox]() { |
| 41 | found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) { | 42 | // Choose Mailbox or Immediate if unlocked and those modes are supported |
| 42 | return VK_PRESENT_MODE_MAILBOX_KHR; | 43 | const auto mode = Settings::values.vsync_mode.GetValue(); |
| 43 | } | 44 | if (Settings::values.use_speed_limit.GetValue()) { |
| 44 | if (!Settings::values.use_speed_limit.GetValue()) { | 45 | return mode; |
| 45 | // FIFO present mode locks the framerate to the monitor's refresh rate, | 46 | } |
| 46 | // Find an alternative to surpass this limitation if FPS is unlocked. | 47 | switch (mode) { |
| 47 | const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR); | 48 | case Settings::VSyncMode::FIFO: |
| 48 | if (found_imm != modes.end()) { | 49 | case Settings::VSyncMode::FIFORelaxed: |
| 49 | return VK_PRESENT_MODE_IMMEDIATE_KHR; | 50 | if (has_mailbox) { |
| 51 | return Settings::VSyncMode::Mailbox; | ||
| 52 | } else if (has_imm) { | ||
| 53 | return Settings::VSyncMode::Immediate; | ||
| 54 | } | ||
| 55 | [[fallthrough]]; | ||
| 56 | default: | ||
| 57 | return mode; | ||
| 50 | } | 58 | } |
| 59 | }(); | ||
| 60 | if ((setting == Settings::VSyncMode::Mailbox && !has_mailbox) || | ||
| 61 | (setting == Settings::VSyncMode::Immediate && !has_imm) || | ||
| 62 | (setting == Settings::VSyncMode::FIFORelaxed && !has_fifo_relaxed)) { | ||
| 63 | setting = Settings::VSyncMode::FIFO; | ||
| 64 | } | ||
| 65 | |||
| 66 | switch (setting) { | ||
| 67 | case Settings::VSyncMode::Immediate: | ||
| 68 | return VK_PRESENT_MODE_IMMEDIATE_KHR; | ||
| 69 | case Settings::VSyncMode::Mailbox: | ||
| 70 | return VK_PRESENT_MODE_MAILBOX_KHR; | ||
| 71 | case Settings::VSyncMode::FIFO: | ||
| 72 | return VK_PRESENT_MODE_FIFO_KHR; | ||
| 73 | case Settings::VSyncMode::FIFORelaxed: | ||
| 74 | return VK_PRESENT_MODE_FIFO_RELAXED_KHR; | ||
| 75 | default: | ||
| 76 | return VK_PRESENT_MODE_FIFO_KHR; | ||
| 51 | } | 77 | } |
| 52 | return VK_PRESENT_MODE_FIFO_KHR; | ||
| 53 | } | 78 | } |
| 54 | 79 | ||
| 55 | VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { | 80 | VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { |
| @@ -65,6 +90,18 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi | |||
| 65 | return extent; | 90 | return extent; |
| 66 | } | 91 | } |
| 67 | 92 | ||
| 93 | VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) { | ||
| 94 | if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) { | ||
| 95 | return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; | ||
| 96 | } else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { | ||
| 97 | return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR; | ||
| 98 | } else { | ||
| 99 | LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}", | ||
| 100 | capabilities.supportedCompositeAlpha); | ||
| 101 | return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 68 | } // Anonymous namespace | 105 | } // Anonymous namespace |
| 69 | 106 | ||
| 70 | Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, | 107 | Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, |
| @@ -87,18 +124,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) { | |||
| 87 | return; | 124 | return; |
| 88 | } | 125 | } |
| 89 | 126 | ||
| 90 | device.GetLogical().WaitIdle(); | ||
| 91 | Destroy(); | 127 | Destroy(); |
| 92 | 128 | ||
| 93 | CreateSwapchain(capabilities, srgb); | 129 | CreateSwapchain(capabilities, srgb); |
| 94 | CreateSemaphores(); | 130 | CreateSemaphores(); |
| 95 | CreateImageViews(); | ||
| 96 | 131 | ||
| 97 | resource_ticks.clear(); | 132 | resource_ticks.clear(); |
| 98 | resource_ticks.resize(image_count); | 133 | resource_ticks.resize(image_count); |
| 99 | } | 134 | } |
| 100 | 135 | ||
| 101 | void Swapchain::AcquireNextImage() { | 136 | bool Swapchain::AcquireNextImage() { |
| 102 | const VkResult result = device.GetLogical().AcquireNextImageKHR( | 137 | const VkResult result = device.GetLogical().AcquireNextImageKHR( |
| 103 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], | 138 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], |
| 104 | VK_NULL_HANDLE, &image_index); | 139 | VK_NULL_HANDLE, &image_index); |
| @@ -115,8 +150,11 @@ void Swapchain::AcquireNextImage() { | |||
| 115 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); | 150 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); |
| 116 | break; | 151 | break; |
| 117 | } | 152 | } |
| 153 | |||
| 118 | scheduler.Wait(resource_ticks[image_index]); | 154 | scheduler.Wait(resource_ticks[image_index]); |
| 119 | resource_ticks[image_index] = scheduler.CurrentTick(); | 155 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 156 | |||
| 157 | return is_suboptimal || is_outdated; | ||
| 120 | } | 158 | } |
| 121 | 159 | ||
| 122 | void Swapchain::Present(VkSemaphore render_semaphore) { | 160 | void Swapchain::Present(VkSemaphore render_semaphore) { |
| @@ -131,6 +169,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) { | |||
| 131 | .pImageIndices = &image_index, | 169 | .pImageIndices = &image_index, |
| 132 | .pResults = nullptr, | 170 | .pResults = nullptr, |
| 133 | }; | 171 | }; |
| 172 | std::scoped_lock lock{scheduler.submit_mutex}; | ||
| 134 | switch (const VkResult result = present_queue.Present(present_info)) { | 173 | switch (const VkResult result = present_queue.Present(present_info)) { |
| 135 | case VK_SUCCESS: | 174 | case VK_SUCCESS: |
| 136 | break; | 175 | break; |
| @@ -153,10 +192,17 @@ void Swapchain::Present(VkSemaphore render_semaphore) { | |||
| 153 | void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) { | 192 | void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) { |
| 154 | const auto physical_device{device.GetPhysical()}; | 193 | const auto physical_device{device.GetPhysical()}; |
| 155 | const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; | 194 | const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; |
| 156 | const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; | 195 | const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface); |
| 196 | has_mailbox = std::find(present_modes.begin(), present_modes.end(), | ||
| 197 | VK_PRESENT_MODE_MAILBOX_KHR) != present_modes.end(); | ||
| 198 | has_imm = std::find(present_modes.begin(), present_modes.end(), | ||
| 199 | VK_PRESENT_MODE_IMMEDIATE_KHR) != present_modes.end(); | ||
| 200 | has_fifo_relaxed = std::find(present_modes.begin(), present_modes.end(), | ||
| 201 | VK_PRESENT_MODE_FIFO_RELAXED_KHR) != present_modes.end(); | ||
| 157 | 202 | ||
| 158 | const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; | 203 | const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)}; |
| 159 | present_mode = ChooseSwapPresentMode(present_modes); | 204 | surface_format = ChooseSwapSurfaceFormat(formats); |
| 205 | present_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed); | ||
| 160 | 206 | ||
| 161 | u32 requested_image_count{capabilities.minImageCount + 1}; | 207 | u32 requested_image_count{capabilities.minImageCount + 1}; |
| 162 | // Ensure Triple buffering if possible. | 208 | // Ensure Triple buffering if possible. |
| @@ -180,12 +226,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo | |||
| 180 | .imageColorSpace = surface_format.colorSpace, | 226 | .imageColorSpace = surface_format.colorSpace, |
| 181 | .imageExtent = {}, | 227 | .imageExtent = {}, |
| 182 | .imageArrayLayers = 1, | 228 | .imageArrayLayers = 1, |
| 183 | .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, | 229 | .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| 184 | .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, | 230 | .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 185 | .queueFamilyIndexCount = 0, | 231 | .queueFamilyIndexCount = 0, |
| 186 | .pQueueFamilyIndices = nullptr, | 232 | .pQueueFamilyIndices = nullptr, |
| 187 | .preTransform = capabilities.currentTransform, | 233 | .preTransform = capabilities.currentTransform, |
| 188 | .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, | 234 | .compositeAlpha = alpha_flags, |
| 189 | .presentMode = present_mode, | 235 | .presentMode = present_mode, |
| 190 | .clipped = VK_FALSE, | 236 | .clipped = VK_FALSE, |
| 191 | .oldSwapchain = nullptr, | 237 | .oldSwapchain = nullptr, |
| @@ -217,7 +263,6 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo | |||
| 217 | 263 | ||
| 218 | extent = swapchain_ci.imageExtent; | 264 | extent = swapchain_ci.imageExtent; |
| 219 | current_srgb = srgb; | 265 | current_srgb = srgb; |
| 220 | current_fps_unlocked = !Settings::values.use_speed_limit.GetValue(); | ||
| 221 | 266 | ||
| 222 | images = swapchain.GetImages(); | 267 | images = swapchain.GetImages(); |
| 223 | image_count = static_cast<u32>(images.size()); | 268 | image_count = static_cast<u32>(images.size()); |
| @@ -228,56 +273,20 @@ void Swapchain::CreateSemaphores() { | |||
| 228 | present_semaphores.resize(image_count); | 273 | present_semaphores.resize(image_count); |
| 229 | std::ranges::generate(present_semaphores, | 274 | std::ranges::generate(present_semaphores, |
| 230 | [this] { return device.GetLogical().CreateSemaphore(); }); | 275 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 231 | } | 276 | render_semaphores.resize(image_count); |
| 232 | 277 | std::ranges::generate(render_semaphores, | |
| 233 | void Swapchain::CreateImageViews() { | 278 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 234 | VkImageViewCreateInfo ci{ | ||
| 235 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 236 | .pNext = nullptr, | ||
| 237 | .flags = 0, | ||
| 238 | .image = {}, | ||
| 239 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 240 | .format = image_view_format, | ||
| 241 | .components = | ||
| 242 | { | ||
| 243 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 244 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 245 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 246 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 247 | }, | ||
| 248 | .subresourceRange = | ||
| 249 | { | ||
| 250 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 251 | .baseMipLevel = 0, | ||
| 252 | .levelCount = 1, | ||
| 253 | .baseArrayLayer = 0, | ||
| 254 | .layerCount = 1, | ||
| 255 | }, | ||
| 256 | }; | ||
| 257 | |||
| 258 | image_views.resize(image_count); | ||
| 259 | for (std::size_t i = 0; i < image_count; i++) { | ||
| 260 | ci.image = images[i]; | ||
| 261 | image_views[i] = device.GetLogical().CreateImageView(ci); | ||
| 262 | } | ||
| 263 | } | 279 | } |
| 264 | 280 | ||
| 265 | void Swapchain::Destroy() { | 281 | void Swapchain::Destroy() { |
| 266 | frame_index = 0; | 282 | frame_index = 0; |
| 267 | present_semaphores.clear(); | 283 | present_semaphores.clear(); |
| 268 | framebuffers.clear(); | ||
| 269 | image_views.clear(); | ||
| 270 | swapchain.reset(); | 284 | swapchain.reset(); |
| 271 | } | 285 | } |
| 272 | 286 | ||
| 273 | bool Swapchain::HasFpsUnlockChanged() const { | ||
| 274 | return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue(); | ||
| 275 | } | ||
| 276 | |||
| 277 | bool Swapchain::NeedsPresentModeUpdate() const { | 287 | bool Swapchain::NeedsPresentModeUpdate() const { |
| 278 | // Mailbox present mode is the ideal for all scenarios. If it is not available, | 288 | const auto requested_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed); |
| 279 | // A different present mode is needed to support unlocked FPS above the monitor's refresh rate. | 289 | return present_mode != requested_mode; |
| 280 | return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged(); | ||
| 281 | } | 290 | } |
| 282 | 291 | ||
| 283 | } // namespace Vulkan | 292 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index caf1ff32b..bf1ea7254 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -27,7 +27,7 @@ public: | |||
| 27 | void Create(u32 width, u32 height, bool srgb); | 27 | void Create(u32 width, u32 height, bool srgb); |
| 28 | 28 | ||
| 29 | /// Acquires the next image in the swapchain, waits as needed. | 29 | /// Acquires the next image in the swapchain, waits as needed. |
| 30 | void AcquireNextImage(); | 30 | bool AcquireNextImage(); |
| 31 | 31 | ||
| 32 | /// Presents the rendered image to the swapchain. | 32 | /// Presents the rendered image to the swapchain. |
| 33 | void Present(VkSemaphore render_semaphore); | 33 | void Present(VkSemaphore render_semaphore); |
| @@ -52,6 +52,11 @@ public: | |||
| 52 | return is_suboptimal; | 52 | return is_suboptimal; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | /// Returns true when the swapchain format is in the srgb color space | ||
| 56 | bool IsSrgb() const { | ||
| 57 | return current_srgb; | ||
| 58 | } | ||
| 59 | |||
| 55 | VkExtent2D GetSize() const { | 60 | VkExtent2D GetSize() const { |
| 56 | return extent; | 61 | return extent; |
| 57 | } | 62 | } |
| @@ -64,22 +69,34 @@ public: | |||
| 64 | return image_index; | 69 | return image_index; |
| 65 | } | 70 | } |
| 66 | 71 | ||
| 72 | std::size_t GetFrameIndex() const { | ||
| 73 | return frame_index; | ||
| 74 | } | ||
| 75 | |||
| 67 | VkImage GetImageIndex(std::size_t index) const { | 76 | VkImage GetImageIndex(std::size_t index) const { |
| 68 | return images[index]; | 77 | return images[index]; |
| 69 | } | 78 | } |
| 70 | 79 | ||
| 71 | VkImageView GetImageViewIndex(std::size_t index) const { | 80 | VkImage CurrentImage() const { |
| 72 | return *image_views[index]; | 81 | return images[image_index]; |
| 73 | } | 82 | } |
| 74 | 83 | ||
| 75 | VkFormat GetImageViewFormat() const { | 84 | VkFormat GetImageViewFormat() const { |
| 76 | return image_view_format; | 85 | return image_view_format; |
| 77 | } | 86 | } |
| 78 | 87 | ||
| 88 | VkFormat GetImageFormat() const { | ||
| 89 | return surface_format.format; | ||
| 90 | } | ||
| 91 | |||
| 79 | VkSemaphore CurrentPresentSemaphore() const { | 92 | VkSemaphore CurrentPresentSemaphore() const { |
| 80 | return *present_semaphores[frame_index]; | 93 | return *present_semaphores[frame_index]; |
| 81 | } | 94 | } |
| 82 | 95 | ||
| 96 | VkSemaphore CurrentRenderSemaphore() const { | ||
| 97 | return *render_semaphores[frame_index]; | ||
| 98 | } | ||
| 99 | |||
| 83 | u32 GetWidth() const { | 100 | u32 GetWidth() const { |
| 84 | return width; | 101 | return width; |
| 85 | } | 102 | } |
| @@ -88,6 +105,10 @@ public: | |||
| 88 | return height; | 105 | return height; |
| 89 | } | 106 | } |
| 90 | 107 | ||
| 108 | VkExtent2D GetExtent() const { | ||
| 109 | return extent; | ||
| 110 | } | ||
| 111 | |||
| 91 | private: | 112 | private: |
| 92 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); | 113 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); |
| 93 | void CreateSemaphores(); | 114 | void CreateSemaphores(); |
| @@ -95,8 +116,6 @@ private: | |||
| 95 | 116 | ||
| 96 | void Destroy(); | 117 | void Destroy(); |
| 97 | 118 | ||
| 98 | bool HasFpsUnlockChanged() const; | ||
| 99 | |||
| 100 | bool NeedsPresentModeUpdate() const; | 119 | bool NeedsPresentModeUpdate() const; |
| 101 | 120 | ||
| 102 | const VkSurfaceKHR surface; | 121 | const VkSurfaceKHR surface; |
| @@ -107,10 +126,9 @@ private: | |||
| 107 | 126 | ||
| 108 | std::size_t image_count{}; | 127 | std::size_t image_count{}; |
| 109 | std::vector<VkImage> images; | 128 | std::vector<VkImage> images; |
| 110 | std::vector<vk::ImageView> image_views; | ||
| 111 | std::vector<vk::Framebuffer> framebuffers; | ||
| 112 | std::vector<u64> resource_ticks; | 129 | std::vector<u64> resource_ticks; |
| 113 | std::vector<vk::Semaphore> present_semaphores; | 130 | std::vector<vk::Semaphore> present_semaphores; |
| 131 | std::vector<vk::Semaphore> render_semaphores; | ||
| 114 | 132 | ||
| 115 | u32 width; | 133 | u32 width; |
| 116 | u32 height; | 134 | u32 height; |
| @@ -121,9 +139,12 @@ private: | |||
| 121 | VkFormat image_view_format{}; | 139 | VkFormat image_view_format{}; |
| 122 | VkExtent2D extent{}; | 140 | VkExtent2D extent{}; |
| 123 | VkPresentModeKHR present_mode{}; | 141 | VkPresentModeKHR present_mode{}; |
| 142 | VkSurfaceFormatKHR surface_format{}; | ||
| 143 | bool has_imm{false}; | ||
| 144 | bool has_mailbox{false}; | ||
| 145 | bool has_fifo_relaxed{false}; | ||
| 124 | 146 | ||
| 125 | bool current_srgb{}; | 147 | bool current_srgb{}; |
| 126 | bool current_fps_unlocked{}; | ||
| 127 | bool is_outdated{}; | 148 | bool is_outdated{}; |
| 128 | bool is_suboptimal{}; | 149 | bool is_suboptimal{}; |
| 129 | }; | 150 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae15f6976..99dd1260a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <span> | 6 | #include <span> |
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 10 | #include "common/bit_util.h" | 11 | #include "common/bit_util.h" |
| @@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag | |||
| 1343 | 1344 | ||
| 1344 | void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | 1345 | void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, |
| 1345 | std::span<const VideoCommon::BufferImageCopy> copies) { | 1346 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 1347 | std::array buffer_handles{ | ||
| 1348 | buffer, | ||
| 1349 | }; | ||
| 1350 | std::array buffer_offsets{ | ||
| 1351 | offset, | ||
| 1352 | }; | ||
| 1353 | DownloadMemory(buffer_handles, buffer_offsets, copies); | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span, | ||
| 1357 | std::span<const VideoCommon::BufferImageCopy> copies) { | ||
| 1346 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | 1358 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); |
| 1347 | if (is_rescaled) { | 1359 | if (is_rescaled) { |
| 1348 | ScaleDown(); | 1360 | ScaleDown(); |
| 1349 | } | 1361 | } |
| 1350 | std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); | 1362 | boost::container::small_vector<VkBuffer, 1> buffers_vector{}; |
| 1363 | boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; | ||
| 1364 | for (size_t index = 0; index < buffers_span.size(); index++) { | ||
| 1365 | buffers_vector.emplace_back(buffers_span[index]); | ||
| 1366 | vk_copies.emplace_back( | ||
| 1367 | TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); | ||
| 1368 | } | ||
| 1351 | scheduler->RequestOutsideRenderPassOperationContext(); | 1369 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 1352 | scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask, | 1370 | scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, |
| 1353 | vk_copies](vk::CommandBuffer cmdbuf) { | 1371 | aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { |
| 1354 | const VkImageMemoryBarrier read_barrier{ | 1372 | const VkImageMemoryBarrier read_barrier{ |
| 1355 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 1373 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 1356 | .pNext = nullptr, | 1374 | .pNext = nullptr, |
| @@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1369 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 1387 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 1370 | }, | 1388 | }, |
| 1371 | }; | 1389 | }; |
| 1390 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1391 | 0, read_barrier); | ||
| 1392 | |||
| 1393 | for (size_t index = 0; index < buffers.size(); index++) { | ||
| 1394 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], | ||
| 1395 | vk_copies[index]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | const VkMemoryBarrier memory_write_barrier{ | ||
| 1399 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1400 | .pNext = nullptr, | ||
| 1401 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1402 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1403 | }; | ||
| 1372 | const VkImageMemoryBarrier image_write_barrier{ | 1404 | const VkImageMemoryBarrier image_write_barrier{ |
| 1373 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 1405 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 1374 | .pNext = nullptr, | 1406 | .pNext = nullptr, |
| @@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1387 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 1419 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 1388 | }, | 1420 | }, |
| 1389 | }; | 1421 | }; |
| 1390 | const VkMemoryBarrier memory_write_barrier{ | ||
| 1391 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1392 | .pNext = nullptr, | ||
| 1393 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1394 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1395 | }; | ||
| 1396 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1397 | 0, read_barrier); | ||
| 1398 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||
| 1399 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | 1422 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 1400 | 0, memory_write_barrier, nullptr, image_write_barrier); | 1423 | 0, memory_write_barrier, nullptr, image_write_barrier); |
| 1401 | }); | 1424 | }); |
| @@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1405 | } | 1428 | } |
| 1406 | 1429 | ||
| 1407 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 1430 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 1408 | DownloadMemory(map.buffer, map.offset, copies); | 1431 | std::array buffers{ |
| 1432 | map.buffer, | ||
| 1433 | }; | ||
| 1434 | std::array offsets{ | ||
| 1435 | map.offset, | ||
| 1436 | }; | ||
| 1437 | DownloadMemory(buffers, offsets, copies); | ||
| 1409 | } | 1438 | } |
| 1410 | 1439 | ||
| 1411 | bool Image::IsRescaled() const noexcept { | 1440 | bool Image::IsRescaled() const noexcept { |
| @@ -1555,8 +1584,9 @@ bool Image::NeedsScaleHelper() const { | |||
| 1555 | 1584 | ||
| 1556 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 1585 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 1557 | ImageId image_id_, Image& image) | 1586 | ImageId image_id_, Image& image) |
| 1558 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, | 1587 | : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr}, |
| 1559 | image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) { | 1588 | device{&runtime.device}, image_handle{image.Handle()}, |
| 1589 | samples(ConvertSampleCount(image.info.num_samples)) { | ||
| 1560 | using Shader::TextureType; | 1590 | using Shader::TextureType; |
| 1561 | 1591 | ||
| 1562 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); | 1592 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); |
| @@ -1602,7 +1632,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 1602 | } | 1632 | } |
| 1603 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); | 1633 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); |
| 1604 | if (device->HasDebuggingToolAttached()) { | 1634 | if (device->HasDebuggingToolAttached()) { |
| 1605 | handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1635 | handle.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str()); |
| 1606 | } | 1636 | } |
| 1607 | image_views[static_cast<size_t>(tex_type)] = std::move(handle); | 1637 | image_views[static_cast<size_t>(tex_type)] = std::move(handle); |
| 1608 | }; | 1638 | }; |
| @@ -1643,7 +1673,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 1643 | 1673 | ||
| 1644 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 1674 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 1645 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | 1675 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) |
| 1646 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | 1676 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, |
| 1647 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | 1677 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} |
| 1648 | 1678 | ||
| 1649 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) | 1679 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d5ee23f8d..6f360177a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| @@ -141,6 +141,9 @@ public: | |||
| 141 | void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | 141 | void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, |
| 142 | std::span<const VideoCommon::BufferImageCopy> copies); | 142 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 143 | 143 | ||
| 144 | void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets, | ||
| 145 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 146 | |||
| 144 | void DownloadMemory(const StagingBufferRef& map, | 147 | void DownloadMemory(const StagingBufferRef& map, |
| 145 | std::span<const VideoCommon::BufferImageCopy> copies); | 148 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 146 | 149 | ||
| @@ -262,7 +265,6 @@ private: | |||
| 262 | VkImage image_handle = VK_NULL_HANDLE; | 265 | VkImage image_handle = VK_NULL_HANDLE; |
| 263 | VkImageView render_target = VK_NULL_HANDLE; | 266 | VkImageView render_target = VK_NULL_HANDLE; |
| 264 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 267 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
| 265 | GPUVAddr gpu_addr = 0; | ||
| 266 | u32 buffer_size = 0; | 268 | u32 buffer_size = 0; |
| 267 | }; | 269 | }; |
| 268 | 270 | ||
| @@ -371,6 +373,7 @@ struct TextureCacheParams { | |||
| 371 | using Sampler = Vulkan::Sampler; | 373 | using Sampler = Vulkan::Sampler; |
| 372 | using Framebuffer = Vulkan::Framebuffer; | 374 | using Framebuffer = Vulkan::Framebuffer; |
| 373 | using AsyncBuffer = Vulkan::StagingBufferRef; | 375 | using AsyncBuffer = Vulkan::StagingBufferRef; |
| 376 | using BufferType = VkBuffer; | ||
| 374 | }; | 377 | }; |
| 375 | 378 | ||
| 376 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | 379 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 009dab0b6..0630ebda5 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -14,13 +14,18 @@ namespace Vulkan { | |||
| 14 | 14 | ||
| 15 | UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) | 15 | UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) |
| 16 | : device{device_}, scheduler{scheduler_} { | 16 | : device{device_}, scheduler{scheduler_} { |
| 17 | payload_start = payload.data(); | ||
| 17 | payload_cursor = payload.data(); | 18 | payload_cursor = payload.data(); |
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; | 21 | UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; |
| 21 | 22 | ||
| 22 | void UpdateDescriptorQueue::TickFrame() { | 23 | void UpdateDescriptorQueue::TickFrame() { |
| 23 | payload_cursor = payload.data(); | 24 | if (++frame_index >= FRAMES_IN_FLIGHT) { |
| 25 | frame_index = 0; | ||
| 26 | } | ||
| 27 | payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE; | ||
| 28 | payload_cursor = payload_start; | ||
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | void UpdateDescriptorQueue::Acquire() { | 31 | void UpdateDescriptorQueue::Acquire() { |
| @@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() { | |||
| 28 | // This is the maximum number of entries a single draw call might use. | 33 | // This is the maximum number of entries a single draw call might use. |
| 29 | static constexpr size_t MIN_ENTRIES = 0x400; | 34 | static constexpr size_t MIN_ENTRIES = 0x400; |
| 30 | 35 | ||
| 31 | if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { | 36 | if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { |
| 32 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); | 37 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); |
| 33 | scheduler.WaitWorker(); | 38 | scheduler.WaitWorker(); |
| 34 | payload_cursor = payload.data(); | 39 | payload_cursor = payload_start; |
| 35 | } | 40 | } |
| 36 | upload_start = payload_cursor; | 41 | upload_start = payload_cursor; |
| 37 | } | 42 | } |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 625bcc809..1c1a7020b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -29,6 +29,12 @@ struct DescriptorUpdateEntry { | |||
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | class UpdateDescriptorQueue final { | 31 | class UpdateDescriptorQueue final { |
| 32 | // This should be plenty for the vast majority of cases. Most desktop platforms only | ||
| 33 | // provide up to 3 swapchain images. | ||
| 34 | static constexpr size_t FRAMES_IN_FLIGHT = 5; | ||
| 35 | static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000; | ||
| 36 | static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; | ||
| 37 | |||
| 32 | public: | 38 | public: |
| 33 | explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); | 39 | explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); |
| 34 | ~UpdateDescriptorQueue(); | 40 | ~UpdateDescriptorQueue(); |
| @@ -73,9 +79,11 @@ private: | |||
| 73 | const Device& device; | 79 | const Device& device; |
| 74 | Scheduler& scheduler; | 80 | Scheduler& scheduler; |
| 75 | 81 | ||
| 82 | size_t frame_index{0}; | ||
| 76 | DescriptorUpdateEntry* payload_cursor = nullptr; | 83 | DescriptorUpdateEntry* payload_cursor = nullptr; |
| 84 | DescriptorUpdateEntry* payload_start = nullptr; | ||
| 77 | const DescriptorUpdateEntry* upload_start = nullptr; | 85 | const DescriptorUpdateEntry* upload_start = nullptr; |
| 78 | std::array<DescriptorUpdateEntry, 0x10000> payload; | 86 | std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload; |
| 79 | }; | 87 | }; |
| 80 | 88 | ||
| 81 | } // namespace Vulkan | 89 | } // namespace Vulkan |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index d9482371b..c5213875b 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -228,14 +228,14 @@ const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu | |||
| 228 | auto info = std::make_unique<ShaderInfo>(); | 228 | auto info = std::make_unique<ShaderInfo>(); |
| 229 | if (const std::optional<u64> cached_hash{env.Analyze()}) { | 229 | if (const std::optional<u64> cached_hash{env.Analyze()}) { |
| 230 | info->unique_hash = *cached_hash; | 230 | info->unique_hash = *cached_hash; |
| 231 | info->size_bytes = env.CachedSize(); | 231 | info->size_bytes = env.CachedSizeBytes(); |
| 232 | } else { | 232 | } else { |
| 233 | // Slow path, not really hit on commercial games | 233 | // Slow path, not really hit on commercial games |
| 234 | // Build a control flow graph to get the real shader size | 234 | // Build a control flow graph to get the real shader size |
| 235 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | 235 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 236 | Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; | 236 | Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; |
| 237 | info->unique_hash = env.CalculateHash(); | 237 | info->unique_hash = env.CalculateHash(); |
| 238 | info->size_bytes = env.ReadSize(); | 238 | info->size_bytes = env.ReadSizeBytes(); |
| 239 | } | 239 | } |
| 240 | const size_t size_bytes{info->size_bytes}; | 240 | const size_t size_bytes{info->size_bytes}; |
| 241 | const ShaderInfo* const result{info.get()}; | 241 | const ShaderInfo* const result{info.get()}; |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 574760f80..c7cb56243 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -170,15 +170,19 @@ std::optional<u64> GenericEnvironment::Analyze() { | |||
| 170 | void GenericEnvironment::SetCachedSize(size_t size_bytes) { | 170 | void GenericEnvironment::SetCachedSize(size_t size_bytes) { |
| 171 | cached_lowest = start_address; | 171 | cached_lowest = start_address; |
| 172 | cached_highest = start_address + static_cast<u32>(size_bytes); | 172 | cached_highest = start_address + static_cast<u32>(size_bytes); |
| 173 | code.resize(CachedSize()); | 173 | code.resize(CachedSizeWords()); |
| 174 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); | 174 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | size_t GenericEnvironment::CachedSize() const noexcept { | 177 | size_t GenericEnvironment::CachedSizeWords() const noexcept { |
| 178 | return cached_highest - cached_lowest + INST_SIZE; | 178 | return CachedSizeBytes() / INST_SIZE; |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | size_t GenericEnvironment::ReadSize() const noexcept { | 181 | size_t GenericEnvironment::CachedSizeBytes() const noexcept { |
| 182 | return static_cast<size_t>(cached_highest) - cached_lowest + INST_SIZE; | ||
| 183 | } | ||
| 184 | |||
| 185 | size_t GenericEnvironment::ReadSizeBytes() const noexcept { | ||
| 182 | return read_highest - read_lowest + INST_SIZE; | 186 | return read_highest - read_lowest + INST_SIZE; |
| 183 | } | 187 | } |
| 184 | 188 | ||
| @@ -187,7 +191,7 @@ bool GenericEnvironment::CanBeSerialized() const noexcept { | |||
| 187 | } | 191 | } |
| 188 | 192 | ||
| 189 | u64 GenericEnvironment::CalculateHash() const { | 193 | u64 GenericEnvironment::CalculateHash() const { |
| 190 | const size_t size{ReadSize()}; | 194 | const size_t size{ReadSizeBytes()}; |
| 191 | const auto data{std::make_unique<char[]>(size)}; | 195 | const auto data{std::make_unique<char[]>(size)}; |
| 192 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); | 196 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); |
| 193 | return Common::CityHash64(data.get(), size); | 197 | return Common::CityHash64(data.get(), size); |
| @@ -198,7 +202,7 @@ void GenericEnvironment::Dump(u64 hash) { | |||
| 198 | } | 202 | } |
| 199 | 203 | ||
| 200 | void GenericEnvironment::Serialize(std::ofstream& file) const { | 204 | void GenericEnvironment::Serialize(std::ofstream& file) const { |
| 201 | const u64 code_size{static_cast<u64>(CachedSize())}; | 205 | const u64 code_size{static_cast<u64>(CachedSizeBytes())}; |
| 202 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | 206 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; |
| 203 | const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; | 207 | const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; |
| 204 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | 208 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index d75987a52..a0f61cbda 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -48,9 +48,11 @@ public: | |||
| 48 | 48 | ||
| 49 | void SetCachedSize(size_t size_bytes); | 49 | void SetCachedSize(size_t size_bytes); |
| 50 | 50 | ||
| 51 | [[nodiscard]] size_t CachedSize() const noexcept; | 51 | [[nodiscard]] size_t CachedSizeWords() const noexcept; |
| 52 | 52 | ||
| 53 | [[nodiscard]] size_t ReadSize() const noexcept; | 53 | [[nodiscard]] size_t CachedSizeBytes() const noexcept; |
| 54 | |||
| 55 | [[nodiscard]] size_t ReadSizeBytes() const noexcept; | ||
| 54 | 56 | ||
| 55 | [[nodiscard]] bool CanBeSerialized() const noexcept; | 57 | [[nodiscard]] bool CanBeSerialized() const noexcept; |
| 56 | 58 | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1a76d4178..cb51529e4 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -250,10 +250,13 @@ bool IsPixelFormatASTC(PixelFormat format) { | |||
| 250 | case PixelFormat::ASTC_2D_6X6_UNORM: | 250 | case PixelFormat::ASTC_2D_6X6_UNORM: |
| 251 | case PixelFormat::ASTC_2D_6X6_SRGB: | 251 | case PixelFormat::ASTC_2D_6X6_SRGB: |
| 252 | case PixelFormat::ASTC_2D_10X6_UNORM: | 252 | case PixelFormat::ASTC_2D_10X6_UNORM: |
| 253 | case PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 253 | case PixelFormat::ASTC_2D_10X5_UNORM: | 254 | case PixelFormat::ASTC_2D_10X5_UNORM: |
| 254 | case PixelFormat::ASTC_2D_10X5_SRGB: | 255 | case PixelFormat::ASTC_2D_10X5_SRGB: |
| 255 | case PixelFormat::ASTC_2D_10X10_UNORM: | 256 | case PixelFormat::ASTC_2D_10X10_UNORM: |
| 256 | case PixelFormat::ASTC_2D_10X10_SRGB: | 257 | case PixelFormat::ASTC_2D_10X10_SRGB: |
| 258 | case PixelFormat::ASTC_2D_12X10_UNORM: | ||
| 259 | case PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 257 | case PixelFormat::ASTC_2D_12X12_UNORM: | 260 | case PixelFormat::ASTC_2D_12X12_UNORM: |
| 258 | case PixelFormat::ASTC_2D_12X12_SRGB: | 261 | case PixelFormat::ASTC_2D_12X12_SRGB: |
| 259 | case PixelFormat::ASTC_2D_8X6_UNORM: | 262 | case PixelFormat::ASTC_2D_8X6_UNORM: |
| @@ -279,11 +282,13 @@ bool IsPixelFormatSRGB(PixelFormat format) { | |||
| 279 | case PixelFormat::ASTC_2D_8X5_SRGB: | 282 | case PixelFormat::ASTC_2D_8X5_SRGB: |
| 280 | case PixelFormat::ASTC_2D_5X4_SRGB: | 283 | case PixelFormat::ASTC_2D_5X4_SRGB: |
| 281 | case PixelFormat::ASTC_2D_5X5_SRGB: | 284 | case PixelFormat::ASTC_2D_5X5_SRGB: |
| 285 | case PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 282 | case PixelFormat::ASTC_2D_10X8_SRGB: | 286 | case PixelFormat::ASTC_2D_10X8_SRGB: |
| 283 | case PixelFormat::ASTC_2D_6X6_SRGB: | 287 | case PixelFormat::ASTC_2D_6X6_SRGB: |
| 284 | case PixelFormat::ASTC_2D_10X5_SRGB: | 288 | case PixelFormat::ASTC_2D_10X5_SRGB: |
| 285 | case PixelFormat::ASTC_2D_10X10_SRGB: | 289 | case PixelFormat::ASTC_2D_10X10_SRGB: |
| 286 | case PixelFormat::ASTC_2D_12X12_SRGB: | 290 | case PixelFormat::ASTC_2D_12X12_SRGB: |
| 291 | case PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 287 | case PixelFormat::ASTC_2D_8X6_SRGB: | 292 | case PixelFormat::ASTC_2D_8X6_SRGB: |
| 288 | case PixelFormat::ASTC_2D_6X5_SRGB: | 293 | case PixelFormat::ASTC_2D_6X5_SRGB: |
| 289 | return true; | 294 | return true; |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 44b79af20..0225d3287 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -95,10 +95,13 @@ enum class PixelFormat { | |||
| 95 | ASTC_2D_6X6_UNORM, | 95 | ASTC_2D_6X6_UNORM, |
| 96 | ASTC_2D_6X6_SRGB, | 96 | ASTC_2D_6X6_SRGB, |
| 97 | ASTC_2D_10X6_UNORM, | 97 | ASTC_2D_10X6_UNORM, |
| 98 | ASTC_2D_10X6_SRGB, | ||
| 98 | ASTC_2D_10X5_UNORM, | 99 | ASTC_2D_10X5_UNORM, |
| 99 | ASTC_2D_10X5_SRGB, | 100 | ASTC_2D_10X5_SRGB, |
| 100 | ASTC_2D_10X10_UNORM, | 101 | ASTC_2D_10X10_UNORM, |
| 101 | ASTC_2D_10X10_SRGB, | 102 | ASTC_2D_10X10_SRGB, |
| 103 | ASTC_2D_12X10_UNORM, | ||
| 104 | ASTC_2D_12X10_SRGB, | ||
| 102 | ASTC_2D_12X12_UNORM, | 105 | ASTC_2D_12X12_UNORM, |
| 103 | ASTC_2D_12X12_SRGB, | 106 | ASTC_2D_12X12_SRGB, |
| 104 | ASTC_2D_8X6_UNORM, | 107 | ASTC_2D_8X6_UNORM, |
| @@ -232,10 +235,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{ | |||
| 232 | 6, // ASTC_2D_6X6_UNORM | 235 | 6, // ASTC_2D_6X6_UNORM |
| 233 | 6, // ASTC_2D_6X6_SRGB | 236 | 6, // ASTC_2D_6X6_SRGB |
| 234 | 10, // ASTC_2D_10X6_UNORM | 237 | 10, // ASTC_2D_10X6_UNORM |
| 238 | 10, // ASTC_2D_10X6_SRGB | ||
| 235 | 10, // ASTC_2D_10X5_UNORM | 239 | 10, // ASTC_2D_10X5_UNORM |
| 236 | 10, // ASTC_2D_10X5_SRGB | 240 | 10, // ASTC_2D_10X5_SRGB |
| 237 | 10, // ASTC_2D_10X10_UNORM | 241 | 10, // ASTC_2D_10X10_UNORM |
| 238 | 10, // ASTC_2D_10X10_SRGB | 242 | 10, // ASTC_2D_10X10_SRGB |
| 243 | 12, // ASTC_2D_12X10_UNORM | ||
| 244 | 12, // ASTC_2D_12X10_SRGB | ||
| 239 | 12, // ASTC_2D_12X12_UNORM | 245 | 12, // ASTC_2D_12X12_UNORM |
| 240 | 12, // ASTC_2D_12X12_SRGB | 246 | 12, // ASTC_2D_12X12_SRGB |
| 241 | 8, // ASTC_2D_8X6_UNORM | 247 | 8, // ASTC_2D_8X6_UNORM |
| @@ -338,10 +344,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{ | |||
| 338 | 6, // ASTC_2D_6X6_UNORM | 344 | 6, // ASTC_2D_6X6_UNORM |
| 339 | 6, // ASTC_2D_6X6_SRGB | 345 | 6, // ASTC_2D_6X6_SRGB |
| 340 | 6, // ASTC_2D_10X6_UNORM | 346 | 6, // ASTC_2D_10X6_UNORM |
| 347 | 6, // ASTC_2D_10X6_SRGB | ||
| 341 | 5, // ASTC_2D_10X5_UNORM | 348 | 5, // ASTC_2D_10X5_UNORM |
| 342 | 5, // ASTC_2D_10X5_SRGB | 349 | 5, // ASTC_2D_10X5_SRGB |
| 343 | 10, // ASTC_2D_10X10_UNORM | 350 | 10, // ASTC_2D_10X10_UNORM |
| 344 | 10, // ASTC_2D_10X10_SRGB | 351 | 10, // ASTC_2D_10X10_SRGB |
| 352 | 10, // ASTC_2D_12X10_UNORM | ||
| 353 | 10, // ASTC_2D_12X10_SRGB | ||
| 345 | 12, // ASTC_2D_12X12_UNORM | 354 | 12, // ASTC_2D_12X12_UNORM |
| 346 | 12, // ASTC_2D_12X12_SRGB | 355 | 12, // ASTC_2D_12X12_SRGB |
| 347 | 6, // ASTC_2D_8X6_UNORM | 356 | 6, // ASTC_2D_8X6_UNORM |
| @@ -444,10 +453,13 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{ | |||
| 444 | 128, // ASTC_2D_6X6_UNORM | 453 | 128, // ASTC_2D_6X6_UNORM |
| 445 | 128, // ASTC_2D_6X6_SRGB | 454 | 128, // ASTC_2D_6X6_SRGB |
| 446 | 128, // ASTC_2D_10X6_UNORM | 455 | 128, // ASTC_2D_10X6_UNORM |
| 456 | 128, // ASTC_2D_10X6_SRGB | ||
| 447 | 128, // ASTC_2D_10X5_UNORM | 457 | 128, // ASTC_2D_10X5_UNORM |
| 448 | 128, // ASTC_2D_10X5_SRGB | 458 | 128, // ASTC_2D_10X5_SRGB |
| 449 | 128, // ASTC_2D_10X10_UNORM | 459 | 128, // ASTC_2D_10X10_UNORM |
| 450 | 128, // ASTC_2D_10X10_SRGB | 460 | 128, // ASTC_2D_10X10_SRGB |
| 461 | 128, // ASTC_2D_12X10_UNORM | ||
| 462 | 128, // ASTC_2D_12X10_SRGB | ||
| 451 | 128, // ASTC_2D_12X12_UNORM | 463 | 128, // ASTC_2D_12X12_UNORM |
| 452 | 128, // ASTC_2D_12X12_SRGB | 464 | 128, // ASTC_2D_12X12_SRGB |
| 453 | 128, // ASTC_2D_8X6_UNORM | 465 | 128, // ASTC_2D_8X6_UNORM |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 5fc2b2fec..11ced6c38 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -210,6 +210,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, | |||
| 210 | return PixelFormat::ASTC_2D_6X6_SRGB; | 210 | return PixelFormat::ASTC_2D_6X6_SRGB; |
| 211 | case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR): | 211 | case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR): |
| 212 | return PixelFormat::ASTC_2D_10X6_UNORM; | 212 | return PixelFormat::ASTC_2D_10X6_UNORM; |
| 213 | case Hash(TextureFormat::ASTC_2D_10X6, UNORM, SRGB): | ||
| 214 | return PixelFormat::ASTC_2D_10X6_SRGB; | ||
| 213 | case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR): | 215 | case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR): |
| 214 | return PixelFormat::ASTC_2D_10X5_UNORM; | 216 | return PixelFormat::ASTC_2D_10X5_UNORM; |
| 215 | case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB): | 217 | case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB): |
| @@ -218,6 +220,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, | |||
| 218 | return PixelFormat::ASTC_2D_10X10_UNORM; | 220 | return PixelFormat::ASTC_2D_10X10_UNORM; |
| 219 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): | 221 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): |
| 220 | return PixelFormat::ASTC_2D_10X10_SRGB; | 222 | return PixelFormat::ASTC_2D_10X10_SRGB; |
| 223 | case Hash(TextureFormat::ASTC_2D_12X10, UNORM, LINEAR): | ||
| 224 | return PixelFormat::ASTC_2D_12X10_UNORM; | ||
| 225 | case Hash(TextureFormat::ASTC_2D_12X10, UNORM, SRGB): | ||
| 226 | return PixelFormat::ASTC_2D_12X10_SRGB; | ||
| 221 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): | 227 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): |
| 222 | return PixelFormat::ASTC_2D_12X12_UNORM; | 228 | return PixelFormat::ASTC_2D_12X12_UNORM; |
| 223 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): | 229 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index 30f72361d..6279d8e9e 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -46,7 +46,7 @@ std::string Name(const ImageBase& image) { | |||
| 46 | return "Invalid"; | 46 | return "Invalid"; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | std::string Name(const ImageViewBase& image_view) { | 49 | std::string Name(const ImageViewBase& image_view, GPUVAddr addr) { |
| 50 | const u32 width = image_view.size.width; | 50 | const u32 width = image_view.size.width; |
| 51 | const u32 height = image_view.size.height; | 51 | const u32 height = image_view.size.height; |
| 52 | const u32 depth = image_view.size.depth; | 52 | const u32 depth = image_view.size.depth; |
| @@ -56,23 +56,25 @@ std::string Name(const ImageViewBase& image_view) { | |||
| 56 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | 56 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; |
| 57 | switch (image_view.type) { | 57 | switch (image_view.type) { |
| 58 | case ImageViewType::e1D: | 58 | case ImageViewType::e1D: |
| 59 | return fmt::format("ImageView 1D {}{}", width, level); | 59 | return fmt::format("ImageView 1D 0x{:X} {}{}", addr, width, level); |
| 60 | case ImageViewType::e2D: | 60 | case ImageViewType::e2D: |
| 61 | return fmt::format("ImageView 2D {}x{}{}", width, height, level); | 61 | return fmt::format("ImageView 2D 0x{:X} {}x{}{}", addr, width, height, level); |
| 62 | case ImageViewType::Cube: | 62 | case ImageViewType::Cube: |
| 63 | return fmt::format("ImageView Cube {}x{}{}", width, height, level); | 63 | return fmt::format("ImageView Cube 0x{:X} {}x{}{}", addr, width, height, level); |
| 64 | case ImageViewType::e3D: | 64 | case ImageViewType::e3D: |
| 65 | return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); | 65 | return fmt::format("ImageView 3D 0x{:X} {}x{}x{}{}", addr, width, height, depth, level); |
| 66 | case ImageViewType::e1DArray: | 66 | case ImageViewType::e1DArray: |
| 67 | return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); | 67 | return fmt::format("ImageView 1DArray 0x{:X} {}{}|{}", addr, width, level, num_layers); |
| 68 | case ImageViewType::e2DArray: | 68 | case ImageViewType::e2DArray: |
| 69 | return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); | 69 | return fmt::format("ImageView 2DArray 0x{:X} {}x{}{}|{}", addr, width, height, level, |
| 70 | num_layers); | ||
| 70 | case ImageViewType::CubeArray: | 71 | case ImageViewType::CubeArray: |
| 71 | return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); | 72 | return fmt::format("ImageView CubeArray 0x{:X} {}x{}{}|{}", addr, width, height, level, |
| 73 | num_layers); | ||
| 72 | case ImageViewType::Rect: | 74 | case ImageViewType::Rect: |
| 73 | return fmt::format("ImageView Rect {}x{}{}", width, height, level); | 75 | return fmt::format("ImageView Rect 0x{:X} {}x{}{}", addr, width, height, level); |
| 74 | case ImageViewType::Buffer: | 76 | case ImageViewType::Buffer: |
| 75 | return fmt::format("BufferView {}", width); | 77 | return fmt::format("BufferView 0x{:X} {}", addr, width); |
| 76 | } | 78 | } |
| 77 | return "Invalid"; | 79 | return "Invalid"; |
| 78 | } | 80 | } |
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index f1f0a057b..9ee57a076 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -179,6 +179,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str | |||
| 179 | return "ASTC_2D_6X6_SRGB"; | 179 | return "ASTC_2D_6X6_SRGB"; |
| 180 | case PixelFormat::ASTC_2D_10X6_UNORM: | 180 | case PixelFormat::ASTC_2D_10X6_UNORM: |
| 181 | return "ASTC_2D_10X6_UNORM"; | 181 | return "ASTC_2D_10X6_UNORM"; |
| 182 | case PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 183 | return "ASTC_2D_10X6_SRGB"; | ||
| 182 | case PixelFormat::ASTC_2D_10X5_UNORM: | 184 | case PixelFormat::ASTC_2D_10X5_UNORM: |
| 183 | return "ASTC_2D_10X5_UNORM"; | 185 | return "ASTC_2D_10X5_UNORM"; |
| 184 | case PixelFormat::ASTC_2D_10X5_SRGB: | 186 | case PixelFormat::ASTC_2D_10X5_SRGB: |
| @@ -187,6 +189,10 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str | |||
| 187 | return "ASTC_2D_10X10_UNORM"; | 189 | return "ASTC_2D_10X10_UNORM"; |
| 188 | case PixelFormat::ASTC_2D_10X10_SRGB: | 190 | case PixelFormat::ASTC_2D_10X10_SRGB: |
| 189 | return "ASTC_2D_10X10_SRGB"; | 191 | return "ASTC_2D_10X10_SRGB"; |
| 192 | case PixelFormat::ASTC_2D_12X10_UNORM: | ||
| 193 | return "ASTC_2D_12X10_UNORM"; | ||
| 194 | case PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 195 | return "ASTC_2D_12X10_SRGB"; | ||
| 190 | case PixelFormat::ASTC_2D_12X12_UNORM: | 196 | case PixelFormat::ASTC_2D_12X12_UNORM: |
| 191 | return "ASTC_2D_12X12_UNORM"; | 197 | return "ASTC_2D_12X12_UNORM"; |
| 192 | case PixelFormat::ASTC_2D_12X12_SRGB: | 198 | case PixelFormat::ASTC_2D_12X12_SRGB: |
| @@ -268,7 +274,7 @@ struct RenderTargets; | |||
| 268 | 274 | ||
| 269 | [[nodiscard]] std::string Name(const ImageBase& image); | 275 | [[nodiscard]] std::string Name(const ImageBase& image); |
| 270 | 276 | ||
| 271 | [[nodiscard]] std::string Name(const ImageViewBase& image_view); | 277 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, GPUVAddr addr); |
| 272 | 278 | ||
| 273 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | 279 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); |
| 274 | 280 | ||
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 04fb84bfa..bcad40353 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | namespace VideoCommon { | 16 | namespace VideoCommon { |
| 17 | 17 | ||
| 18 | ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | 18 | ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, |
| 19 | ImageId image_id_) | 19 | ImageId image_id_, GPUVAddr addr) |
| 20 | : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, | 20 | : image_id{image_id_}, gpu_addr{addr}, format{info.format}, type{info.type}, range{info.range}, |
| 21 | size{ | 21 | size{ |
| 22 | .width = std::max(image_info.size.width >> range.base.level, 1u), | 22 | .width = std::max(image_info.size.width >> range.base.level, 1u), |
| 23 | .height = std::max(image_info.size.height >> range.base.level, 1u), | 23 | .height = std::max(image_info.size.height >> range.base.level, 1u), |
| @@ -35,8 +35,8 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i | |||
| 35 | } | 35 | } |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) | 38 | ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr) |
| 39 | : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, | 39 | : image_id{NULL_IMAGE_ID}, gpu_addr{addr}, format{info.format}, type{ImageViewType::Buffer}, |
| 40 | size{ | 40 | size{ |
| 41 | .width = info.size.width, | 41 | .width = info.size.width, |
| 42 | .height = 1, | 42 | .height = 1, |
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 69c9776e7..a25ae1d4a 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -24,9 +24,9 @@ enum class ImageViewFlagBits : u16 { | |||
| 24 | DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) | 24 | DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) |
| 25 | 25 | ||
| 26 | struct ImageViewBase { | 26 | struct ImageViewBase { |
| 27 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | 27 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id, |
| 28 | ImageId image_id); | 28 | GPUVAddr addr); |
| 29 | explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); | 29 | explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr); |
| 30 | explicit ImageViewBase(const NullImageViewParams&); | 30 | explicit ImageViewBase(const NullImageViewParams&); |
| 31 | 31 | ||
| 32 | [[nodiscard]] bool IsBuffer() const noexcept { | 32 | [[nodiscard]] bool IsBuffer() const noexcept { |
| @@ -34,6 +34,7 @@ struct ImageViewBase { | |||
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | ImageId image_id{}; | 36 | ImageId image_id{}; |
| 37 | GPUVAddr gpu_addr = 0; | ||
| 37 | PixelFormat format{}; | 38 | PixelFormat format{}; |
| 38 | ImageViewType type{}; | 39 | ImageViewType type{}; |
| 39 | SubresourceRange range; | 40 | SubresourceRange range; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a1c2cc1d8..b5297e76b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <unordered_set> | 6 | #include <unordered_set> |
| 7 | #include <boost/container/small_vector.hpp> | ||
| 7 | 8 | ||
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 9 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| @@ -17,15 +18,10 @@ | |||
| 17 | 18 | ||
| 18 | namespace VideoCommon { | 19 | namespace VideoCommon { |
| 19 | 20 | ||
| 20 | using Tegra::Texture::SwizzleSource; | ||
| 21 | using Tegra::Texture::TextureType; | ||
| 22 | using Tegra::Texture::TICEntry; | 21 | using Tegra::Texture::TICEntry; |
| 23 | using Tegra::Texture::TSCEntry; | 22 | using Tegra::Texture::TSCEntry; |
| 24 | using VideoCore::Surface::GetFormatType; | 23 | using VideoCore::Surface::GetFormatType; |
| 25 | using VideoCore::Surface::IsCopyCompatible; | ||
| 26 | using VideoCore::Surface::PixelFormat; | 24 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 28 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 29 | using VideoCore::Surface::SurfaceType; | 25 | using VideoCore::Surface::SurfaceType; |
| 30 | using namespace Common::Literals; | 26 | using namespace Common::Literals; |
| 31 | 27 | ||
| @@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() { | |||
| 143 | runtime.TickFrame(); | 139 | runtime.TickFrame(); |
| 144 | critical_gc = 0; | 140 | critical_gc = 0; |
| 145 | ++frame_tick; | 141 | ++frame_tick; |
| 142 | |||
| 143 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 144 | for (auto& buffer : async_buffers_death_ring) { | ||
| 145 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 146 | } | ||
| 147 | async_buffers_death_ring.clear(); | ||
| 148 | } | ||
| 146 | } | 149 | } |
| 147 | 150 | ||
| 148 | template <class P> | 151 | template <class P> |
| @@ -661,25 +664,39 @@ template <class P> | |||
| 661 | void TextureCache<P>::CommitAsyncFlushes() { | 664 | void TextureCache<P>::CommitAsyncFlushes() { |
| 662 | // This is intentionally passing the value by copy | 665 | // This is intentionally passing the value by copy |
| 663 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 666 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 664 | const std::span<const ImageId> download_ids = uncommitted_downloads; | 667 | auto& download_ids = uncommitted_downloads; |
| 665 | if (download_ids.empty()) { | 668 | if (download_ids.empty()) { |
| 666 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 669 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 667 | uncommitted_downloads.clear(); | 670 | uncommitted_downloads.clear(); |
| 668 | async_buffers.emplace_back(std::optional<AsyncBuffer>{}); | 671 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 672 | uncommitted_async_buffers.clear(); | ||
| 669 | return; | 673 | return; |
| 670 | } | 674 | } |
| 671 | size_t total_size_bytes = 0; | 675 | size_t total_size_bytes = 0; |
| 672 | for (const ImageId image_id : download_ids) { | 676 | size_t last_async_buffer_id = uncommitted_async_buffers.size(); |
| 673 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 677 | bool any_none_dma = false; |
| 678 | for (PendingDownload& download_info : download_ids) { | ||
| 679 | if (download_info.is_swizzle) { | ||
| 680 | total_size_bytes += | ||
| 681 | Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); | ||
| 682 | any_none_dma = true; | ||
| 683 | download_info.async_buffer_id = last_async_buffer_id; | ||
| 684 | } | ||
| 674 | } | 685 | } |
| 675 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); | 686 | if (any_none_dma) { |
| 676 | for (const ImageId image_id : download_ids) { | 687 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 677 | Image& image = slot_images[image_id]; | 688 | for (const PendingDownload& download_info : download_ids) { |
| 678 | const auto copies = FullDownloadCopies(image.info); | 689 | if (download_info.is_swizzle) { |
| 679 | image.DownloadMemory(download_map, copies); | 690 | Image& image = slot_images[download_info.object_id]; |
| 680 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | 691 | const auto copies = FullDownloadCopies(image.info); |
| 692 | image.DownloadMemory(download_map, copies); | ||
| 693 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | ||
| 694 | } | ||
| 695 | } | ||
| 696 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 681 | } | 697 | } |
| 682 | async_buffers.emplace_back(download_map); | 698 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 699 | uncommitted_async_buffers.clear(); | ||
| 683 | } | 700 | } |
| 684 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 701 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 685 | uncommitted_downloads.clear(); | 702 | uncommitted_downloads.clear(); |
| @@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 691 | return; | 708 | return; |
| 692 | } | 709 | } |
| 693 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 710 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 694 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 711 | const auto& download_ids = committed_downloads.front(); |
| 695 | if (download_ids.empty()) { | 712 | if (download_ids.empty()) { |
| 696 | committed_downloads.pop_front(); | 713 | committed_downloads.pop_front(); |
| 697 | async_buffers.pop_front(); | 714 | async_buffers.pop_front(); |
| 698 | return; | 715 | return; |
| 699 | } | 716 | } |
| 700 | auto download_map = *async_buffers.front(); | 717 | auto download_map = std::move(async_buffers.front()); |
| 701 | std::span<u8> download_span = download_map.mapped_span; | ||
| 702 | for (size_t i = download_ids.size(); i > 0; i--) { | 718 | for (size_t i = download_ids.size(); i > 0; i--) { |
| 703 | const ImageBase& image = slot_images[download_ids[i - 1]]; | 719 | auto& download_info = download_ids[i - 1]; |
| 704 | const auto copies = FullDownloadCopies(image.info); | 720 | auto& download_buffer = download_map[download_info.async_buffer_id]; |
| 705 | download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); | 721 | if (download_info.is_swizzle) { |
| 706 | std::span<u8> download_span_alt = download_span.subspan(download_map.offset); | 722 | const ImageBase& image = slot_images[download_info.object_id]; |
| 707 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, | 723 | const auto copies = FullDownloadCopies(image.info); |
| 708 | swizzle_data_buffer); | 724 | download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); |
| 725 | std::span<u8> download_span = | ||
| 726 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 727 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||
| 728 | swizzle_data_buffer); | ||
| 729 | } else { | ||
| 730 | const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; | ||
| 731 | std::span<u8> download_span = | ||
| 732 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 733 | gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), | ||
| 734 | buffer_info.size); | ||
| 735 | slot_buffer_downloads.erase(download_info.object_id); | ||
| 736 | } | ||
| 737 | } | ||
| 738 | for (auto& download_buffer : download_map) { | ||
| 739 | async_buffers_death_ring.emplace_back(download_buffer); | ||
| 709 | } | 740 | } |
| 710 | runtime.FreeDeferredStagingBuffer(download_map); | ||
| 711 | committed_downloads.pop_front(); | 741 | committed_downloads.pop_front(); |
| 712 | async_buffers.pop_front(); | 742 | async_buffers.pop_front(); |
| 713 | } else { | 743 | } else { |
| 714 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 744 | const auto& download_ids = committed_downloads.front(); |
| 715 | if (download_ids.empty()) { | 745 | if (download_ids.empty()) { |
| 716 | committed_downloads.pop_front(); | 746 | committed_downloads.pop_front(); |
| 717 | return; | 747 | return; |
| 718 | } | 748 | } |
| 719 | size_t total_size_bytes = 0; | 749 | size_t total_size_bytes = 0; |
| 720 | for (const ImageId image_id : download_ids) { | 750 | for (const PendingDownload& download_info : download_ids) { |
| 721 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 751 | if (download_info.is_swizzle) { |
| 752 | total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; | ||
| 753 | } | ||
| 722 | } | 754 | } |
| 723 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 755 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 724 | const size_t original_offset = download_map.offset; | 756 | const size_t original_offset = download_map.offset; |
| 725 | for (const ImageId image_id : download_ids) { | 757 | for (const PendingDownload& download_info : download_ids) { |
| 726 | Image& image = slot_images[image_id]; | 758 | if (!download_info.is_swizzle) { |
| 759 | continue; | ||
| 760 | } | ||
| 761 | Image& image = slot_images[download_info.object_id]; | ||
| 727 | const auto copies = FullDownloadCopies(image.info); | 762 | const auto copies = FullDownloadCopies(image.info); |
| 728 | image.DownloadMemory(download_map, copies); | 763 | image.DownloadMemory(download_map, copies); |
| 729 | download_map.offset += image.unswizzled_size_bytes; | 764 | download_map.offset += image.unswizzled_size_bytes; |
| @@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 732 | runtime.Finish(); | 767 | runtime.Finish(); |
| 733 | download_map.offset = original_offset; | 768 | download_map.offset = original_offset; |
| 734 | std::span<u8> download_span = download_map.mapped_span; | 769 | std::span<u8> download_span = download_map.mapped_span; |
| 735 | for (const ImageId image_id : download_ids) { | 770 | for (const PendingDownload& download_info : download_ids) { |
| 736 | const ImageBase& image = slot_images[image_id]; | 771 | if (!download_info.is_swizzle) { |
| 772 | continue; | ||
| 773 | } | ||
| 774 | const ImageBase& image = slot_images[download_info.object_id]; | ||
| 737 | const auto copies = FullDownloadCopies(image.info); | 775 | const auto copies = FullDownloadCopies(image.info); |
| 738 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | 776 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, |
| 739 | swizzle_data_buffer); | 777 | swizzle_data_buffer); |
| @@ -834,6 +872,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm | |||
| 834 | } | 872 | } |
| 835 | 873 | ||
| 836 | template <class P> | 874 | template <class P> |
| 875 | void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image, | ||
| 876 | typename TextureCache<P>::BufferType buffer, | ||
| 877 | size_t buffer_offset, | ||
| 878 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 879 | GPUVAddr address, size_t size) { | ||
| 880 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 881 | const BufferDownload new_buffer_download{address, size}; | ||
| 882 | auto slot = slot_buffer_downloads.insert(new_buffer_download); | ||
| 883 | const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; | ||
| 884 | uncommitted_downloads.emplace_back(new_download); | ||
| 885 | auto download_map = runtime.DownloadStagingBuffer(size, true); | ||
| 886 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 887 | std::array buffers{ | ||
| 888 | buffer, | ||
| 889 | download_map.buffer, | ||
| 890 | }; | ||
| 891 | std::array<u64, 2> buffer_offsets{ | ||
| 892 | buffer_offset, | ||
| 893 | download_map.offset, | ||
| 894 | }; | ||
| 895 | image->DownloadMemory(buffers, buffer_offsets, copies); | ||
| 896 | } else { | ||
| 897 | image->DownloadMemory(buffer, buffer_offset, copies); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | |||
| 901 | template <class P> | ||
| 837 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | 902 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 838 | if (False(image.flags & ImageFlagBits::CpuModified)) { | 903 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 839 | // Only upload modified images | 904 | // Only upload modified images |
| @@ -2215,7 +2280,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) | |||
| 2215 | if (new_id) { | 2280 | if (new_id) { |
| 2216 | const ImageViewBase& old_view = slot_image_views[new_id]; | 2281 | const ImageViewBase& old_view = slot_image_views[new_id]; |
| 2217 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | 2282 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { |
| 2218 | uncommitted_downloads.push_back(old_view.image_id); | 2283 | const PendingDownload new_download{true, 0, old_view.image_id}; |
| 2284 | uncommitted_downloads.emplace_back(new_download); | ||
| 2219 | } | 2285 | } |
| 2220 | } | 2286 | } |
| 2221 | *old_id = new_id; | 2287 | *old_id = new_id; |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5a5b4179c..758b7e212 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| @@ -40,14 +40,9 @@ struct ChannelState; | |||
| 40 | 40 | ||
| 41 | namespace VideoCommon { | 41 | namespace VideoCommon { |
| 42 | 42 | ||
| 43 | using Tegra::Texture::SwizzleSource; | ||
| 44 | using Tegra::Texture::TICEntry; | 43 | using Tegra::Texture::TICEntry; |
| 45 | using Tegra::Texture::TSCEntry; | 44 | using Tegra::Texture::TSCEntry; |
| 46 | using VideoCore::Surface::GetFormatType; | ||
| 47 | using VideoCore::Surface::IsCopyCompatible; | ||
| 48 | using VideoCore::Surface::PixelFormat; | 45 | using VideoCore::Surface::PixelFormat; |
| 49 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 50 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 51 | using namespace Common::Literals; | 46 | using namespace Common::Literals; |
| 52 | 47 | ||
| 53 | struct ImageViewInOut { | 48 | struct ImageViewInOut { |
| @@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
| 119 | using Sampler = typename P::Sampler; | 114 | using Sampler = typename P::Sampler; |
| 120 | using Framebuffer = typename P::Framebuffer; | 115 | using Framebuffer = typename P::Framebuffer; |
| 121 | using AsyncBuffer = typename P::AsyncBuffer; | 116 | using AsyncBuffer = typename P::AsyncBuffer; |
| 117 | using BufferType = typename P::BufferType; | ||
| 122 | 118 | ||
| 123 | struct BlitImages { | 119 | struct BlitImages { |
| 124 | ImageId dst_id; | 120 | ImageId dst_id; |
| @@ -215,6 +211,10 @@ public: | |||
| 215 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, | 211 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, |
| 216 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); | 212 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); |
| 217 | 213 | ||
| 214 | void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, | ||
| 215 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 216 | GPUVAddr address = 0, size_t size = 0); | ||
| 217 | |||
| 218 | /// Return true when a CPU region is modified from the GPU | 218 | /// Return true when a CPU region is modified from the GPU |
| 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 220 | 220 | ||
| @@ -424,17 +424,32 @@ private: | |||
| 424 | u64 critical_memory; | 424 | u64 critical_memory; |
| 425 | size_t critical_gc; | 425 | size_t critical_gc; |
| 426 | 426 | ||
| 427 | struct BufferDownload { | ||
| 428 | GPUVAddr address; | ||
| 429 | size_t size; | ||
| 430 | }; | ||
| 431 | |||
| 432 | struct PendingDownload { | ||
| 433 | bool is_swizzle; | ||
| 434 | size_t async_buffer_id; | ||
| 435 | SlotId object_id; | ||
| 436 | }; | ||
| 437 | |||
| 427 | SlotVector<Image> slot_images; | 438 | SlotVector<Image> slot_images; |
| 428 | SlotVector<ImageMapView> slot_map_views; | 439 | SlotVector<ImageMapView> slot_map_views; |
| 429 | SlotVector<ImageView> slot_image_views; | 440 | SlotVector<ImageView> slot_image_views; |
| 430 | SlotVector<ImageAlloc> slot_image_allocs; | 441 | SlotVector<ImageAlloc> slot_image_allocs; |
| 431 | SlotVector<Sampler> slot_samplers; | 442 | SlotVector<Sampler> slot_samplers; |
| 432 | SlotVector<Framebuffer> slot_framebuffers; | 443 | SlotVector<Framebuffer> slot_framebuffers; |
| 444 | SlotVector<BufferDownload> slot_buffer_downloads; | ||
| 433 | 445 | ||
| 434 | // TODO: This data structure is not optimal and it should be reworked | 446 | // TODO: This data structure is not optimal and it should be reworked |
| 435 | std::vector<ImageId> uncommitted_downloads; | 447 | |
| 436 | std::deque<std::vector<ImageId>> committed_downloads; | 448 | std::vector<PendingDownload> uncommitted_downloads; |
| 437 | std::deque<std::optional<AsyncBuffer>> async_buffers; | 449 | std::deque<std::vector<PendingDownload>> committed_downloads; |
| 450 | std::vector<AsyncBuffer> uncommitted_async_buffers; | ||
| 451 | std::deque<std::vector<AsyncBuffer>> async_buffers; | ||
| 452 | std::deque<AsyncBuffer> async_buffers_death_ring; | ||
| 438 | 453 | ||
| 439 | struct LRUItemParams { | 454 | struct LRUItemParams { |
| 440 | using ObjectType = ImageId; | 455 | using ObjectType = ImageId; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6f288b3f8..6ffca2af2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -617,7 +617,9 @@ bool Device::ShouldBoostClocks() const { | |||
| 617 | 617 | ||
| 618 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; | 618 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; |
| 619 | 619 | ||
| 620 | return validated_driver && !is_steam_deck; | 620 | const bool is_debugging = this->HasDebuggingToolAttached(); |
| 621 | |||
| 622 | return validated_driver && !is_steam_deck && !is_debugging; | ||
| 621 | } | 623 | } |
| 622 | 624 | ||
| 623 | bool Device::GetSuitability(bool requires_swapchain) { | 625 | bool Device::GetSuitability(bool requires_swapchain) { |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 7d5018151..5f1c63ff9 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/settings.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 15 | ||
| 15 | // Define all features which may be used by the implementation here. | 16 | // Define all features which may be used by the implementation here. |
| @@ -510,7 +511,7 @@ public: | |||
| 510 | 511 | ||
| 511 | /// Returns true when a known debugging tool is attached. | 512 | /// Returns true when a known debugging tool is attached. |
| 512 | bool HasDebuggingToolAttached() const { | 513 | bool HasDebuggingToolAttached() const { |
| 513 | return has_renderdoc || has_nsight_graphics; | 514 | return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); |
| 514 | } | 515 | } |
| 515 | 516 | ||
| 516 | /// Returns true when the device does not properly support cube compatibility. | 517 | /// Returns true when the device does not properly support cube compatibility. |
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp index fa9bafa20..c34599365 100644 --- a/src/video_core/vulkan_common/vulkan_surface.cpp +++ b/src/video_core/vulkan_common/vulkan_surface.cpp | |||
| @@ -23,10 +23,10 @@ | |||
| 23 | 23 | ||
| 24 | namespace Vulkan { | 24 | namespace Vulkan { |
| 25 | 25 | ||
| 26 | vk::SurfaceKHR CreateSurface(const vk::Instance& instance, | 26 | vk::SurfaceKHR CreateSurface( |
| 27 | const Core::Frontend::EmuWindow& emu_window) { | 27 | const vk::Instance& instance, |
| 28 | [[maybe_unused]] const Core::Frontend::EmuWindow::WindowSystemInfo& window_info) { | ||
| 28 | [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); | 29 | [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); |
| 29 | [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo(); | ||
| 30 | VkSurfaceKHR unsafe_surface = nullptr; | 30 | VkSurfaceKHR unsafe_surface = nullptr; |
| 31 | 31 | ||
| 32 | #ifdef _WIN32 | 32 | #ifdef _WIN32 |
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h index 5725143e6..5e18c06c4 100644 --- a/src/video_core/vulkan_common/vulkan_surface.h +++ b/src/video_core/vulkan_common/vulkan_surface.h | |||
| @@ -3,15 +3,12 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "core/frontend/emu_window.h" | ||
| 6 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 7 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 7 | 8 | ||
| 8 | namespace Core::Frontend { | ||
| 9 | class EmuWindow; | ||
| 10 | } | ||
| 11 | |||
| 12 | namespace Vulkan { | 9 | namespace Vulkan { |
| 13 | 10 | ||
| 14 | [[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance, | 11 | [[nodiscard]] vk::SurfaceKHR CreateSurface( |
| 15 | const Core::Frontend::EmuWindow& emu_window); | 12 | const vk::Instance& instance, const Core::Frontend::EmuWindow::WindowSystemInfo& window_info); |
| 16 | 13 | ||
| 17 | } // namespace Vulkan | 14 | } // namespace Vulkan |
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 0f8c1e6a6..2d7b9ab65 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt | |||
| @@ -189,6 +189,8 @@ add_executable(yuzu | |||
| 189 | multiplayer/state.h | 189 | multiplayer/state.h |
| 190 | multiplayer/validation.h | 190 | multiplayer/validation.h |
| 191 | precompiled_headers.h | 191 | precompiled_headers.h |
| 192 | qt_common.cpp | ||
| 193 | qt_common.h | ||
| 192 | startup_checks.cpp | 194 | startup_checks.cpp |
| 193 | startup_checks.h | 195 | startup_checks.h |
| 194 | uisettings.cpp | 196 | uisettings.cpp |
diff --git a/src/yuzu/applets/qt_profile_select.cpp b/src/yuzu/applets/qt_profile_select.cpp index 2448e46b6..1f3f23038 100644 --- a/src/yuzu/applets/qt_profile_select.cpp +++ b/src/yuzu/applets/qt_profile_select.cpp | |||
| @@ -95,6 +95,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog( | |||
| 95 | scroll_area->setLayout(layout); | 95 | scroll_area->setLayout(layout); |
| 96 | 96 | ||
| 97 | connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser); | 97 | connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser); |
| 98 | connect(tree_view, &QTreeView::doubleClicked, this, &QtProfileSelectionDialog::accept); | ||
| 98 | connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, | 99 | connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, |
| 99 | [this](Qt::Key key) { | 100 | [this](Qt::Key key) { |
| 100 | if (!this->isActiveWindow()) { | 101 | if (!this->isActiveWindow()) { |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 4c7bf28d8..59d226113 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -1,36 +1,48 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2014 Citra Emulator Project | 1 | // SPDX-FileCopyrightText: 2014 Citra Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstring> | ||
| 8 | #include <string> | ||
| 9 | #include <tuple> | ||
| 10 | #include <type_traits> | ||
| 4 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 5 | 12 | ||
| 6 | #include <QApplication> | 13 | #include <QtCore/qglobal.h> |
| 7 | #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) && YUZU_USE_QT_MULTIMEDIA | 14 | #if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) && YUZU_USE_QT_MULTIMEDIA |
| 15 | #include <QCamera> | ||
| 8 | #include <QCameraImageCapture> | 16 | #include <QCameraImageCapture> |
| 9 | #include <QCameraInfo> | 17 | #include <QCameraInfo> |
| 10 | #endif | 18 | #endif |
| 19 | #include <QCursor> | ||
| 20 | #include <QEvent> | ||
| 21 | #include <QGuiApplication> | ||
| 11 | #include <QHBoxLayout> | 22 | #include <QHBoxLayout> |
| 23 | #include <QKeyEvent> | ||
| 24 | #include <QLayout> | ||
| 25 | #include <QList> | ||
| 12 | #include <QMessageBox> | 26 | #include <QMessageBox> |
| 13 | #include <QPainter> | ||
| 14 | #include <QScreen> | 27 | #include <QScreen> |
| 15 | #include <QString> | 28 | #include <QSize> |
| 16 | #include <QStringList> | 29 | #include <QStringLiteral> |
| 30 | #include <QSurfaceFormat> | ||
| 31 | #include <QTimer> | ||
| 17 | #include <QWindow> | 32 | #include <QWindow> |
| 33 | #include <QtCore/qobjectdefs.h> | ||
| 18 | 34 | ||
| 19 | #ifdef HAS_OPENGL | 35 | #ifdef HAS_OPENGL |
| 20 | #include <QOffscreenSurface> | 36 | #include <QOffscreenSurface> |
| 21 | #include <QOpenGLContext> | 37 | #include <QOpenGLContext> |
| 22 | #endif | 38 | #endif |
| 23 | 39 | ||
| 24 | #if !defined(WIN32) | ||
| 25 | #include <qpa/qplatformnativeinterface.h> | ||
| 26 | #endif | ||
| 27 | |||
| 28 | #include <fmt/format.h> | ||
| 29 | |||
| 30 | #include "common/assert.h" | ||
| 31 | #include "common/microprofile.h" | 40 | #include "common/microprofile.h" |
| 41 | #include "common/polyfill_thread.h" | ||
| 32 | #include "common/scm_rev.h" | 42 | #include "common/scm_rev.h" |
| 33 | #include "common/settings.h" | 43 | #include "common/settings.h" |
| 44 | #include "common/settings_input.h" | ||
| 45 | #include "common/thread.h" | ||
| 34 | #include "core/core.h" | 46 | #include "core/core.h" |
| 35 | #include "core/cpu_manager.h" | 47 | #include "core/cpu_manager.h" |
| 36 | #include "core/frontend/framebuffer_layout.h" | 48 | #include "core/frontend/framebuffer_layout.h" |
| @@ -40,11 +52,16 @@ | |||
| 40 | #include "input_common/drivers/tas_input.h" | 52 | #include "input_common/drivers/tas_input.h" |
| 41 | #include "input_common/drivers/touch_screen.h" | 53 | #include "input_common/drivers/touch_screen.h" |
| 42 | #include "input_common/main.h" | 54 | #include "input_common/main.h" |
| 55 | #include "video_core/gpu.h" | ||
| 56 | #include "video_core/rasterizer_interface.h" | ||
| 43 | #include "video_core/renderer_base.h" | 57 | #include "video_core/renderer_base.h" |
| 44 | #include "yuzu/bootmanager.h" | 58 | #include "yuzu/bootmanager.h" |
| 45 | #include "yuzu/main.h" | 59 | #include "yuzu/main.h" |
| 60 | #include "yuzu/qt_common.h" | ||
| 46 | 61 | ||
| 47 | static Core::Frontend::WindowSystemType GetWindowSystemType(); | 62 | class QObject; |
| 63 | class QPaintEngine; | ||
| 64 | class QSurface; | ||
| 48 | 65 | ||
| 49 | EmuThread::EmuThread(Core::System& system) : m_system{system} {} | 66 | EmuThread::EmuThread(Core::System& system) : m_system{system} {} |
| 50 | 67 | ||
| @@ -154,7 +171,10 @@ public: | |||
| 154 | 171 | ||
| 155 | // disable vsync for any shared contexts | 172 | // disable vsync for any shared contexts |
| 156 | auto format = share_context->format(); | 173 | auto format = share_context->format(); |
| 157 | format.setSwapInterval(main_surface ? Settings::values.use_vsync.GetValue() : 0); | 174 | const int swap_interval = |
| 175 | Settings::values.vsync_mode.GetValue() == Settings::VSyncMode::Immediate ? 0 : 1; | ||
| 176 | |||
| 177 | format.setSwapInterval(main_surface ? swap_interval : 0); | ||
| 158 | 178 | ||
| 159 | context = std::make_unique<QOpenGLContext>(); | 179 | context = std::make_unique<QOpenGLContext>(); |
| 160 | context->setShareContext(share_context); | 180 | context->setShareContext(share_context); |
| @@ -221,7 +241,7 @@ public: | |||
| 221 | explicit RenderWidget(GRenderWindow* parent) : QWidget(parent), render_window(parent) { | 241 | explicit RenderWidget(GRenderWindow* parent) : QWidget(parent), render_window(parent) { |
| 222 | setAttribute(Qt::WA_NativeWindow); | 242 | setAttribute(Qt::WA_NativeWindow); |
| 223 | setAttribute(Qt::WA_PaintOnScreen); | 243 | setAttribute(Qt::WA_PaintOnScreen); |
| 224 | if (GetWindowSystemType() == Core::Frontend::WindowSystemType::Wayland) { | 244 | if (QtCommon::GetWindowSystemType() == Core::Frontend::WindowSystemType::Wayland) { |
| 225 | setAttribute(Qt::WA_DontCreateNativeAncestors); | 245 | setAttribute(Qt::WA_DontCreateNativeAncestors); |
| 226 | } | 246 | } |
| 227 | } | 247 | } |
| @@ -259,46 +279,6 @@ struct NullRenderWidget : public RenderWidget { | |||
| 259 | explicit NullRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {} | 279 | explicit NullRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {} |
| 260 | }; | 280 | }; |
| 261 | 281 | ||
| 262 | static Core::Frontend::WindowSystemType GetWindowSystemType() { | ||
| 263 | // Determine WSI type based on Qt platform. | ||
| 264 | QString platform_name = QGuiApplication::platformName(); | ||
| 265 | if (platform_name == QStringLiteral("windows")) | ||
| 266 | return Core::Frontend::WindowSystemType::Windows; | ||
| 267 | else if (platform_name == QStringLiteral("xcb")) | ||
| 268 | return Core::Frontend::WindowSystemType::X11; | ||
| 269 | else if (platform_name == QStringLiteral("wayland")) | ||
| 270 | return Core::Frontend::WindowSystemType::Wayland; | ||
| 271 | else if (platform_name == QStringLiteral("wayland-egl")) | ||
| 272 | return Core::Frontend::WindowSystemType::Wayland; | ||
| 273 | else if (platform_name == QStringLiteral("cocoa")) | ||
| 274 | return Core::Frontend::WindowSystemType::Cocoa; | ||
| 275 | else if (platform_name == QStringLiteral("android")) | ||
| 276 | return Core::Frontend::WindowSystemType::Android; | ||
| 277 | |||
| 278 | LOG_CRITICAL(Frontend, "Unknown Qt platform {}!", platform_name.toStdString()); | ||
| 279 | return Core::Frontend::WindowSystemType::Windows; | ||
| 280 | } | ||
| 281 | |||
| 282 | static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { | ||
| 283 | Core::Frontend::EmuWindow::WindowSystemInfo wsi; | ||
| 284 | wsi.type = GetWindowSystemType(); | ||
| 285 | |||
| 286 | // Our Win32 Qt external doesn't have the private API. | ||
| 287 | #if defined(WIN32) || defined(__APPLE__) | ||
| 288 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 289 | #else | ||
| 290 | QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); | ||
| 291 | wsi.display_connection = pni->nativeResourceForWindow("display", window); | ||
| 292 | if (wsi.type == Core::Frontend::WindowSystemType::Wayland) | ||
| 293 | wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; | ||
| 294 | else | ||
| 295 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 296 | #endif | ||
| 297 | wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; | ||
| 298 | |||
| 299 | return wsi; | ||
| 300 | } | ||
| 301 | |||
| 302 | GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread_, | 282 | GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread_, |
| 303 | std::shared_ptr<InputCommon::InputSubsystem> input_subsystem_, | 283 | std::shared_ptr<InputCommon::InputSubsystem> input_subsystem_, |
| 304 | Core::System& system_) | 284 | Core::System& system_) |
| @@ -904,7 +884,7 @@ bool GRenderWindow::InitRenderTarget() { | |||
| 904 | } | 884 | } |
| 905 | 885 | ||
| 906 | // Update the Window System information with the new render target | 886 | // Update the Window System information with the new render target |
| 907 | window_info = GetWindowSystemInfo(child_widget->windowHandle()); | 887 | window_info = QtCommon::GetWindowSystemInfo(child_widget->windowHandle()); |
| 908 | 888 | ||
| 909 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | 889 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); |
| 910 | layout()->addWidget(child_widget); | 890 | layout()->addWidget(child_widget); |
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index bb4eca07f..4276be82b 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -5,27 +5,46 @@ | |||
| 5 | 5 | ||
| 6 | #include <atomic> | 6 | #include <atomic> |
| 7 | #include <condition_variable> | 7 | #include <condition_variable> |
| 8 | #include <cstddef> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <mutex> | 10 | #include <mutex> |
| 11 | #include <stop_token> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 10 | 14 | ||
| 15 | #include <QByteArray> | ||
| 11 | #include <QImage> | 16 | #include <QImage> |
| 17 | #include <QObject> | ||
| 18 | #include <QPoint> | ||
| 19 | #include <QString> | ||
| 12 | #include <QStringList> | 20 | #include <QStringList> |
| 13 | #include <QThread> | 21 | #include <QThread> |
| 14 | #include <QTouchEvent> | ||
| 15 | #include <QWidget> | 22 | #include <QWidget> |
| 23 | #include <qglobal.h> | ||
| 24 | #include <qnamespace.h> | ||
| 25 | #include <qobjectdefs.h> | ||
| 16 | 26 | ||
| 27 | #include "common/common_types.h" | ||
| 28 | #include "common/logging/log.h" | ||
| 17 | #include "common/polyfill_thread.h" | 29 | #include "common/polyfill_thread.h" |
| 18 | #include "common/thread.h" | 30 | #include "common/thread.h" |
| 19 | #include "core/frontend/emu_window.h" | 31 | #include "core/frontend/emu_window.h" |
| 20 | 32 | ||
| 21 | class GRenderWindow; | ||
| 22 | class GMainWindow; | 33 | class GMainWindow; |
| 23 | class QCamera; | 34 | class QCamera; |
| 24 | class QCameraImageCapture; | 35 | class QCameraImageCapture; |
| 36 | class QCloseEvent; | ||
| 37 | class QFocusEvent; | ||
| 25 | class QKeyEvent; | 38 | class QKeyEvent; |
| 39 | class QMouseEvent; | ||
| 40 | class QObject; | ||
| 41 | class QResizeEvent; | ||
| 42 | class QShowEvent; | ||
| 43 | class QTimer; | ||
| 44 | class QTouchEvent; | ||
| 45 | class QWheelEvent; | ||
| 26 | 46 | ||
| 27 | namespace Core { | 47 | namespace Core { |
| 28 | enum class SystemResultStatus : u32; | ||
| 29 | class System; | 48 | class System; |
| 30 | } // namespace Core | 49 | } // namespace Core |
| 31 | 50 | ||
| @@ -40,7 +59,6 @@ enum class TasState; | |||
| 40 | 59 | ||
| 41 | namespace VideoCore { | 60 | namespace VideoCore { |
| 42 | enum class LoadCallbackStage; | 61 | enum class LoadCallbackStage; |
| 43 | class RendererBase; | ||
| 44 | } // namespace VideoCore | 62 | } // namespace VideoCore |
| 45 | 63 | ||
| 46 | class EmuThread final : public QThread { | 64 | class EmuThread final : public QThread { |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index bb731276e..a85eb4687 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <QSettings> | 6 | #include <QSettings> |
| 7 | #include "common/fs/fs.h" | 7 | #include "common/fs/fs.h" |
| 8 | #include "common/fs/path_util.h" | 8 | #include "common/fs/path_util.h" |
| 9 | #include "common/settings.h" | ||
| 9 | #include "core/core.h" | 10 | #include "core/core.h" |
| 10 | #include "core/hle/service/acc/profile_manager.h" | 11 | #include "core/hle/service/acc/profile_manager.h" |
| 11 | #include "core/hle/service/hid/controllers/npad.h" | 12 | #include "core/hle/service/hid/controllers/npad.h" |
| @@ -497,7 +498,7 @@ void Config::ReadCoreValues() { | |||
| 497 | qt_config->beginGroup(QStringLiteral("Core")); | 498 | qt_config->beginGroup(QStringLiteral("Core")); |
| 498 | 499 | ||
| 499 | ReadGlobalSetting(Settings::values.use_multi_core); | 500 | ReadGlobalSetting(Settings::values.use_multi_core); |
| 500 | ReadGlobalSetting(Settings::values.use_extended_memory_layout); | 501 | ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout); |
| 501 | 502 | ||
| 502 | qt_config->endGroup(); | 503 | qt_config->endGroup(); |
| 503 | } | 504 | } |
| @@ -692,6 +693,7 @@ void Config::ReadRendererValues() { | |||
| 692 | qt_config->beginGroup(QStringLiteral("Renderer")); | 693 | qt_config->beginGroup(QStringLiteral("Renderer")); |
| 693 | 694 | ||
| 694 | ReadGlobalSetting(Settings::values.renderer_backend); | 695 | ReadGlobalSetting(Settings::values.renderer_backend); |
| 696 | ReadGlobalSetting(Settings::values.async_presentation); | ||
| 695 | ReadGlobalSetting(Settings::values.renderer_force_max_clock); | 697 | ReadGlobalSetting(Settings::values.renderer_force_max_clock); |
| 696 | ReadGlobalSetting(Settings::values.vulkan_device); | 698 | ReadGlobalSetting(Settings::values.vulkan_device); |
| 697 | ReadGlobalSetting(Settings::values.fullscreen_mode); | 699 | ReadGlobalSetting(Settings::values.fullscreen_mode); |
| @@ -708,17 +710,19 @@ void Config::ReadRendererValues() { | |||
| 708 | ReadGlobalSetting(Settings::values.nvdec_emulation); | 710 | ReadGlobalSetting(Settings::values.nvdec_emulation); |
| 709 | ReadGlobalSetting(Settings::values.accelerate_astc); | 711 | ReadGlobalSetting(Settings::values.accelerate_astc); |
| 710 | ReadGlobalSetting(Settings::values.async_astc); | 712 | ReadGlobalSetting(Settings::values.async_astc); |
| 711 | ReadGlobalSetting(Settings::values.use_vsync); | ||
| 712 | ReadGlobalSetting(Settings::values.shader_backend); | 713 | ReadGlobalSetting(Settings::values.shader_backend); |
| 713 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); | 714 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 714 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); | 715 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); |
| 715 | ReadGlobalSetting(Settings::values.use_pessimistic_flushes); | ||
| 716 | ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); | 716 | ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); |
| 717 | ReadGlobalSetting(Settings::values.bg_red); | 717 | ReadGlobalSetting(Settings::values.bg_red); |
| 718 | ReadGlobalSetting(Settings::values.bg_green); | 718 | ReadGlobalSetting(Settings::values.bg_green); |
| 719 | ReadGlobalSetting(Settings::values.bg_blue); | 719 | ReadGlobalSetting(Settings::values.bg_blue); |
| 720 | 720 | ||
| 721 | if (global) { | 721 | if (global) { |
| 722 | Settings::values.vsync_mode.SetValue(static_cast<Settings::VSyncMode>( | ||
| 723 | ReadSetting(QString::fromStdString(Settings::values.vsync_mode.GetLabel()), | ||
| 724 | static_cast<u32>(Settings::values.vsync_mode.GetDefault())) | ||
| 725 | .value<u32>())); | ||
| 722 | ReadBasicSetting(Settings::values.renderer_debug); | 726 | ReadBasicSetting(Settings::values.renderer_debug); |
| 723 | ReadBasicSetting(Settings::values.renderer_shader_feedback); | 727 | ReadBasicSetting(Settings::values.renderer_shader_feedback); |
| 724 | ReadBasicSetting(Settings::values.enable_nsight_aftermath); | 728 | ReadBasicSetting(Settings::values.enable_nsight_aftermath); |
| @@ -1161,7 +1165,7 @@ void Config::SaveCoreValues() { | |||
| 1161 | qt_config->beginGroup(QStringLiteral("Core")); | 1165 | qt_config->beginGroup(QStringLiteral("Core")); |
| 1162 | 1166 | ||
| 1163 | WriteGlobalSetting(Settings::values.use_multi_core); | 1167 | WriteGlobalSetting(Settings::values.use_multi_core); |
| 1164 | WriteGlobalSetting(Settings::values.use_extended_memory_layout); | 1168 | WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout); |
| 1165 | 1169 | ||
| 1166 | qt_config->endGroup(); | 1170 | qt_config->endGroup(); |
| 1167 | } | 1171 | } |
| @@ -1313,6 +1317,7 @@ void Config::SaveRendererValues() { | |||
| 1313 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), | 1317 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), |
| 1314 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), | 1318 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), |
| 1315 | Settings::values.renderer_backend.UsingGlobal()); | 1319 | Settings::values.renderer_backend.UsingGlobal()); |
| 1320 | WriteGlobalSetting(Settings::values.async_presentation); | ||
| 1316 | WriteGlobalSetting(Settings::values.renderer_force_max_clock); | 1321 | WriteGlobalSetting(Settings::values.renderer_force_max_clock); |
| 1317 | WriteGlobalSetting(Settings::values.vulkan_device); | 1322 | WriteGlobalSetting(Settings::values.vulkan_device); |
| 1318 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), | 1323 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), |
| @@ -1350,20 +1355,21 @@ void Config::SaveRendererValues() { | |||
| 1350 | Settings::values.nvdec_emulation.UsingGlobal()); | 1355 | Settings::values.nvdec_emulation.UsingGlobal()); |
| 1351 | WriteGlobalSetting(Settings::values.accelerate_astc); | 1356 | WriteGlobalSetting(Settings::values.accelerate_astc); |
| 1352 | WriteGlobalSetting(Settings::values.async_astc); | 1357 | WriteGlobalSetting(Settings::values.async_astc); |
| 1353 | WriteGlobalSetting(Settings::values.use_vsync); | ||
| 1354 | WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), | 1358 | WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), |
| 1355 | static_cast<u32>(Settings::values.shader_backend.GetValue(global)), | 1359 | static_cast<u32>(Settings::values.shader_backend.GetValue(global)), |
| 1356 | static_cast<u32>(Settings::values.shader_backend.GetDefault()), | 1360 | static_cast<u32>(Settings::values.shader_backend.GetDefault()), |
| 1357 | Settings::values.shader_backend.UsingGlobal()); | 1361 | Settings::values.shader_backend.UsingGlobal()); |
| 1358 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); | 1362 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 1359 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); | 1363 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); |
| 1360 | WriteGlobalSetting(Settings::values.use_pessimistic_flushes); | ||
| 1361 | WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); | 1364 | WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); |
| 1362 | WriteGlobalSetting(Settings::values.bg_red); | 1365 | WriteGlobalSetting(Settings::values.bg_red); |
| 1363 | WriteGlobalSetting(Settings::values.bg_green); | 1366 | WriteGlobalSetting(Settings::values.bg_green); |
| 1364 | WriteGlobalSetting(Settings::values.bg_blue); | 1367 | WriteGlobalSetting(Settings::values.bg_blue); |
| 1365 | 1368 | ||
| 1366 | if (global) { | 1369 | if (global) { |
| 1370 | WriteSetting(QString::fromStdString(Settings::values.vsync_mode.GetLabel()), | ||
| 1371 | static_cast<u32>(Settings::values.vsync_mode.GetValue()), | ||
| 1372 | static_cast<u32>(Settings::values.vsync_mode.GetDefault())); | ||
| 1367 | WriteBasicSetting(Settings::values.renderer_debug); | 1373 | WriteBasicSetting(Settings::values.renderer_debug); |
| 1368 | WriteBasicSetting(Settings::values.renderer_shader_feedback); | 1374 | WriteBasicSetting(Settings::values.renderer_shader_feedback); |
| 1369 | WriteBasicSetting(Settings::values.enable_nsight_aftermath); | 1375 | WriteBasicSetting(Settings::values.enable_nsight_aftermath); |
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index 207bcdc4d..26258d744 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp | |||
| @@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() { | |||
| 35 | 35 | ||
| 36 | ui->use_multi_core->setEnabled(runtime_lock); | 36 | ui->use_multi_core->setEnabled(runtime_lock); |
| 37 | ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); | 37 | ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); |
| 38 | ui->use_extended_memory_layout->setEnabled(runtime_lock); | ||
| 39 | ui->use_extended_memory_layout->setChecked( | ||
| 40 | Settings::values.use_extended_memory_layout.GetValue()); | ||
| 41 | 38 | ||
| 42 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); | 39 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); |
| 43 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); | 40 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); |
| @@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() { | |||
| 79 | void ConfigureGeneral::ApplyConfiguration() { | 76 | void ConfigureGeneral::ApplyConfiguration() { |
| 80 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, | 77 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, |
| 81 | use_multi_core); | 78 | use_multi_core); |
| 82 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout, | ||
| 83 | ui->use_extended_memory_layout, | ||
| 84 | use_extended_memory_layout); | ||
| 85 | 79 | ||
| 86 | if (Settings::IsConfiguringGlobal()) { | 80 | if (Settings::IsConfiguringGlobal()) { |
| 87 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); | 81 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); |
| @@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() { | |||
| 141 | Settings::values.use_speed_limit, use_speed_limit); | 135 | Settings::values.use_speed_limit, use_speed_limit); |
| 142 | ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, | 136 | ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, |
| 143 | use_multi_core); | 137 | use_multi_core); |
| 144 | ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout, | ||
| 145 | Settings::values.use_extended_memory_layout, | ||
| 146 | use_extended_memory_layout); | ||
| 147 | 138 | ||
| 148 | connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { | 139 | connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { |
| 149 | ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && | 140 | ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && |
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h index a090c1a3f..7ff63f425 100644 --- a/src/yuzu/configuration/configure_general.h +++ b/src/yuzu/configuration/configure_general.h | |||
| @@ -47,7 +47,6 @@ private: | |||
| 47 | 47 | ||
| 48 | ConfigurationShared::CheckState use_speed_limit; | 48 | ConfigurationShared::CheckState use_speed_limit; |
| 49 | ConfigurationShared::CheckState use_multi_core; | 49 | ConfigurationShared::CheckState use_multi_core; |
| 50 | ConfigurationShared::CheckState use_extended_memory_layout; | ||
| 51 | 50 | ||
| 52 | const Core::System& system; | 51 | const Core::System& system; |
| 53 | }; | 52 | }; |
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index add110bb0..986a1625b 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui | |||
| @@ -62,13 +62,6 @@ | |||
| 62 | </widget> | 62 | </widget> |
| 63 | </item> | 63 | </item> |
| 64 | <item> | 64 | <item> |
| 65 | <widget class="QCheckBox" name="use_extended_memory_layout"> | ||
| 66 | <property name="text"> | ||
| 67 | <string>Extended memory layout (8GB DRAM)</string> | ||
| 68 | </property> | ||
| 69 | </widget> | ||
| 70 | </item> | ||
| 71 | <item> | ||
| 72 | <widget class="QCheckBox" name="toggle_check_exit"> | 65 | <widget class="QCheckBox" name="toggle_check_exit"> |
| 73 | <property name="text"> | 66 | <property name="text"> |
| 74 | <string>Confirm exit while emulation is running</string> | 67 | <string>Confirm exit while emulation is running</string> |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index e9388daad..76e5b7499 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -4,20 +4,76 @@ | |||
| 4 | // Include this early to include Vulkan headers how we want to | 4 | // Include this early to include Vulkan headers how we want to |
| 5 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 5 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 8 | #include <iosfwd> | ||
| 9 | #include <iterator> | ||
| 10 | #include <string> | ||
| 11 | #include <tuple> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | #include <QBoxLayout> | ||
| 15 | #include <QCheckBox> | ||
| 7 | #include <QColorDialog> | 16 | #include <QColorDialog> |
| 8 | #include <QVulkanInstance> | 17 | #include <QComboBox> |
| 18 | #include <QIcon> | ||
| 19 | #include <QLabel> | ||
| 20 | #include <QPixmap> | ||
| 21 | #include <QPushButton> | ||
| 22 | #include <QSlider> | ||
| 23 | #include <QStringLiteral> | ||
| 24 | #include <QtCore/qobjectdefs.h> | ||
| 25 | #include <qcoreevent.h> | ||
| 26 | #include <qglobal.h> | ||
| 27 | #include <vulkan/vulkan_core.h> | ||
| 9 | 28 | ||
| 10 | #include "common/common_types.h" | 29 | #include "common/common_types.h" |
| 30 | #include "common/dynamic_library.h" | ||
| 11 | #include "common/logging/log.h" | 31 | #include "common/logging/log.h" |
| 12 | #include "common/settings.h" | 32 | #include "common/settings.h" |
| 13 | #include "core/core.h" | 33 | #include "core/core.h" |
| 14 | #include "ui_configure_graphics.h" | 34 | #include "ui_configure_graphics.h" |
| 15 | #include "video_core/vulkan_common/vulkan_instance.h" | 35 | #include "video_core/vulkan_common/vulkan_instance.h" |
| 16 | #include "video_core/vulkan_common/vulkan_library.h" | 36 | #include "video_core/vulkan_common/vulkan_library.h" |
| 37 | #include "video_core/vulkan_common/vulkan_surface.h" | ||
| 17 | #include "yuzu/configuration/configuration_shared.h" | 38 | #include "yuzu/configuration/configuration_shared.h" |
| 18 | #include "yuzu/configuration/configure_graphics.h" | 39 | #include "yuzu/configuration/configure_graphics.h" |
| 40 | #include "yuzu/qt_common.h" | ||
| 19 | #include "yuzu/uisettings.h" | 41 | #include "yuzu/uisettings.h" |
| 20 | 42 | ||
| 43 | static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR, | ||
| 44 | VK_PRESENT_MODE_FIFO_KHR}; | ||
| 45 | |||
| 46 | // Converts a setting to a present mode (or vice versa) | ||
| 47 | static constexpr VkPresentModeKHR VSyncSettingToMode(Settings::VSyncMode mode) { | ||
| 48 | switch (mode) { | ||
| 49 | case Settings::VSyncMode::Immediate: | ||
| 50 | return VK_PRESENT_MODE_IMMEDIATE_KHR; | ||
| 51 | case Settings::VSyncMode::Mailbox: | ||
| 52 | return VK_PRESENT_MODE_MAILBOX_KHR; | ||
| 53 | case Settings::VSyncMode::FIFO: | ||
| 54 | return VK_PRESENT_MODE_FIFO_KHR; | ||
| 55 | case Settings::VSyncMode::FIFORelaxed: | ||
| 56 | return VK_PRESENT_MODE_FIFO_RELAXED_KHR; | ||
| 57 | default: | ||
| 58 | return VK_PRESENT_MODE_FIFO_KHR; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | static constexpr Settings::VSyncMode PresentModeToSetting(VkPresentModeKHR mode) { | ||
| 63 | switch (mode) { | ||
| 64 | case VK_PRESENT_MODE_IMMEDIATE_KHR: | ||
| 65 | return Settings::VSyncMode::Immediate; | ||
| 66 | case VK_PRESENT_MODE_MAILBOX_KHR: | ||
| 67 | return Settings::VSyncMode::Mailbox; | ||
| 68 | case VK_PRESENT_MODE_FIFO_KHR: | ||
| 69 | return Settings::VSyncMode::FIFO; | ||
| 70 | case VK_PRESENT_MODE_FIFO_RELAXED_KHR: | ||
| 71 | return Settings::VSyncMode::FIFORelaxed; | ||
| 72 | default: | ||
| 73 | return Settings::VSyncMode::FIFO; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 21 | ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* parent) | 77 | ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* parent) |
| 22 | : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, system{system_} { | 78 | : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, system{system_} { |
| 23 | vulkan_device = Settings::values.vulkan_device.GetValue(); | 79 | vulkan_device = Settings::values.vulkan_device.GetValue(); |
| @@ -39,13 +95,16 @@ ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* paren | |||
| 39 | 95 | ||
| 40 | connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { | 96 | connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { |
| 41 | UpdateAPILayout(); | 97 | UpdateAPILayout(); |
| 98 | PopulateVSyncModeSelection(); | ||
| 42 | if (!Settings::IsConfiguringGlobal()) { | 99 | if (!Settings::IsConfiguringGlobal()) { |
| 43 | ConfigurationShared::SetHighlight( | 100 | ConfigurationShared::SetHighlight( |
| 44 | ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); | 101 | ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); |
| 45 | } | 102 | } |
| 46 | }); | 103 | }); |
| 47 | connect(ui->device, qOverload<int>(&QComboBox::activated), this, | 104 | connect(ui->device, qOverload<int>(&QComboBox::activated), this, [this](int device) { |
| 48 | [this](int device) { UpdateDeviceSelection(device); }); | 105 | UpdateDeviceSelection(device); |
| 106 | PopulateVSyncModeSelection(); | ||
| 107 | }); | ||
| 49 | connect(ui->backend, qOverload<int>(&QComboBox::activated), this, | 108 | connect(ui->backend, qOverload<int>(&QComboBox::activated), this, |
| 50 | [this](int backend) { UpdateShaderBackendSelection(backend); }); | 109 | [this](int backend) { UpdateShaderBackendSelection(backend); }); |
| 51 | 110 | ||
| @@ -70,6 +129,43 @@ ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* paren | |||
| 70 | ui->fsr_sharpening_label->setVisible(Settings::IsConfiguringGlobal()); | 129 | ui->fsr_sharpening_label->setVisible(Settings::IsConfiguringGlobal()); |
| 71 | } | 130 | } |
| 72 | 131 | ||
| 132 | void ConfigureGraphics::PopulateVSyncModeSelection() { | ||
| 133 | const Settings::RendererBackend backend{GetCurrentGraphicsBackend()}; | ||
| 134 | if (backend == Settings::RendererBackend::Null) { | ||
| 135 | ui->vsync_mode_combobox->setEnabled(false); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | ui->vsync_mode_combobox->setEnabled(true); | ||
| 139 | |||
| 140 | const int current_index = //< current selected vsync mode from combobox | ||
| 141 | ui->vsync_mode_combobox->currentIndex(); | ||
| 142 | const auto current_mode = //< current selected vsync mode as a VkPresentModeKHR | ||
| 143 | current_index == -1 ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue()) | ||
| 144 | : vsync_mode_combobox_enum_map[current_index]; | ||
| 145 | int index{}; | ||
| 146 | const int device{ui->device->currentIndex()}; //< current selected Vulkan device | ||
| 147 | const auto& present_modes = //< relevant vector of present modes for the selected device or API | ||
| 148 | backend == Settings::RendererBackend::Vulkan ? device_present_modes[device] | ||
| 149 | : default_present_modes; | ||
| 150 | |||
| 151 | ui->vsync_mode_combobox->clear(); | ||
| 152 | vsync_mode_combobox_enum_map.clear(); | ||
| 153 | vsync_mode_combobox_enum_map.reserve(present_modes.size()); | ||
| 154 | for (const auto present_mode : present_modes) { | ||
| 155 | const auto mode_name = TranslateVSyncMode(present_mode, backend); | ||
| 156 | if (mode_name.isEmpty()) { | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | |||
| 160 | ui->vsync_mode_combobox->insertItem(index, mode_name); | ||
| 161 | vsync_mode_combobox_enum_map.push_back(present_mode); | ||
| 162 | if (present_mode == current_mode) { | ||
| 163 | ui->vsync_mode_combobox->setCurrentIndex(index); | ||
| 164 | } | ||
| 165 | index++; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 73 | void ConfigureGraphics::UpdateDeviceSelection(int device) { | 169 | void ConfigureGraphics::UpdateDeviceSelection(int device) { |
| 74 | if (device == -1) { | 170 | if (device == -1) { |
| 75 | return; | 171 | return; |
| @@ -99,6 +195,9 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 99 | ui->nvdec_emulation_widget->setEnabled(runtime_lock); | 195 | ui->nvdec_emulation_widget->setEnabled(runtime_lock); |
| 100 | ui->resolution_combobox->setEnabled(runtime_lock); | 196 | ui->resolution_combobox->setEnabled(runtime_lock); |
| 101 | ui->accelerate_astc->setEnabled(runtime_lock); | 197 | ui->accelerate_astc->setEnabled(runtime_lock); |
| 198 | ui->vsync_mode_layout->setEnabled(runtime_lock || | ||
| 199 | Settings::values.renderer_backend.GetValue() == | ||
| 200 | Settings::RendererBackend::Vulkan); | ||
| 102 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); | 201 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); |
| 103 | ui->use_asynchronous_gpu_emulation->setChecked( | 202 | ui->use_asynchronous_gpu_emulation->setChecked( |
| 104 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); | 203 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| @@ -170,7 +269,24 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 170 | Settings::values.bg_green.GetValue(), | 269 | Settings::values.bg_green.GetValue(), |
| 171 | Settings::values.bg_blue.GetValue())); | 270 | Settings::values.bg_blue.GetValue())); |
| 172 | UpdateAPILayout(); | 271 | UpdateAPILayout(); |
| 272 | PopulateVSyncModeSelection(); //< must happen after UpdateAPILayout | ||
| 173 | SetFSRIndicatorText(ui->fsr_sharpening_slider->sliderPosition()); | 273 | SetFSRIndicatorText(ui->fsr_sharpening_slider->sliderPosition()); |
| 274 | |||
| 275 | // VSync setting needs to be determined after populating the VSync combobox | ||
| 276 | if (Settings::IsConfiguringGlobal()) { | ||
| 277 | const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue(); | ||
| 278 | const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting); | ||
| 279 | int index{}; | ||
| 280 | for (const auto mode : vsync_mode_combobox_enum_map) { | ||
| 281 | if (mode == vsync_mode) { | ||
| 282 | break; | ||
| 283 | } | ||
| 284 | index++; | ||
| 285 | } | ||
| 286 | if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) { | ||
| 287 | ui->vsync_mode_combobox->setCurrentIndex(index); | ||
| 288 | } | ||
| 289 | } | ||
| 174 | } | 290 | } |
| 175 | 291 | ||
| 176 | void ConfigureGraphics::SetFSRIndicatorText(int percentage) { | 292 | void ConfigureGraphics::SetFSRIndicatorText(int percentage) { |
| @@ -178,6 +294,27 @@ void ConfigureGraphics::SetFSRIndicatorText(int percentage) { | |||
| 178 | tr("%1%", "FSR sharpening percentage (e.g. 50%)").arg(100 - (percentage / 2))); | 294 | tr("%1%", "FSR sharpening percentage (e.g. 50%)").arg(100 - (percentage / 2))); |
| 179 | } | 295 | } |
| 180 | 296 | ||
| 297 | const QString ConfigureGraphics::TranslateVSyncMode(VkPresentModeKHR mode, | ||
| 298 | Settings::RendererBackend backend) const { | ||
| 299 | switch (mode) { | ||
| 300 | case VK_PRESENT_MODE_IMMEDIATE_KHR: | ||
| 301 | return backend == Settings::RendererBackend::OpenGL | ||
| 302 | ? tr("Off") | ||
| 303 | : QStringLiteral("Immediate (%1)").arg(tr("VSync Off")); | ||
| 304 | case VK_PRESENT_MODE_MAILBOX_KHR: | ||
| 305 | return QStringLiteral("Mailbox (%1)").arg(tr("Recommended")); | ||
| 306 | case VK_PRESENT_MODE_FIFO_KHR: | ||
| 307 | return backend == Settings::RendererBackend::OpenGL | ||
| 308 | ? tr("On") | ||
| 309 | : QStringLiteral("FIFO (%1)").arg(tr("VSync On")); | ||
| 310 | case VK_PRESENT_MODE_FIFO_RELAXED_KHR: | ||
| 311 | return QStringLiteral("FIFO Relaxed"); | ||
| 312 | default: | ||
| 313 | return {}; | ||
| 314 | break; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 181 | void ConfigureGraphics::ApplyConfiguration() { | 318 | void ConfigureGraphics::ApplyConfiguration() { |
| 182 | const auto resolution_setup = static_cast<Settings::ResolutionSetup>( | 319 | const auto resolution_setup = static_cast<Settings::ResolutionSetup>( |
| 183 | ui->resolution_combobox->currentIndex() - | 320 | ui->resolution_combobox->currentIndex() - |
| @@ -232,6 +369,10 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 232 | Settings::values.anti_aliasing.SetValue(anti_aliasing); | 369 | Settings::values.anti_aliasing.SetValue(anti_aliasing); |
| 233 | } | 370 | } |
| 234 | Settings::values.fsr_sharpening_slider.SetValue(ui->fsr_sharpening_slider->value()); | 371 | Settings::values.fsr_sharpening_slider.SetValue(ui->fsr_sharpening_slider->value()); |
| 372 | |||
| 373 | const auto mode = vsync_mode_combobox_enum_map[ui->vsync_mode_combobox->currentIndex()]; | ||
| 374 | const auto vsync_mode = PresentModeToSetting(mode); | ||
| 375 | Settings::values.vsync_mode.SetValue(vsync_mode); | ||
| 235 | } else { | 376 | } else { |
| 236 | if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { | 377 | if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { |
| 237 | Settings::values.resolution_setup.SetGlobal(true); | 378 | Settings::values.resolution_setup.SetGlobal(true); |
| @@ -345,7 +486,9 @@ void ConfigureGraphics::UpdateAPILayout() { | |||
| 345 | ui->backend_widget->setVisible(true); | 486 | ui->backend_widget->setVisible(true); |
| 346 | break; | 487 | break; |
| 347 | case Settings::RendererBackend::Vulkan: | 488 | case Settings::RendererBackend::Vulkan: |
| 348 | ui->device->setCurrentIndex(vulkan_device); | 489 | if (static_cast<int>(vulkan_device) < ui->device->count()) { |
| 490 | ui->device->setCurrentIndex(vulkan_device); | ||
| 491 | } | ||
| 349 | ui->device_widget->setVisible(true); | 492 | ui->device_widget->setVisible(true); |
| 350 | ui->backend_widget->setVisible(false); | 493 | ui->backend_widget->setVisible(false); |
| 351 | break; | 494 | break; |
| @@ -363,16 +506,27 @@ void ConfigureGraphics::RetrieveVulkanDevices() try { | |||
| 363 | 506 | ||
| 364 | using namespace Vulkan; | 507 | using namespace Vulkan; |
| 365 | 508 | ||
| 509 | auto* window = this->window()->windowHandle(); | ||
| 510 | auto wsi = QtCommon::GetWindowSystemInfo(window); | ||
| 511 | |||
| 366 | vk::InstanceDispatch dld; | 512 | vk::InstanceDispatch dld; |
| 367 | const Common::DynamicLibrary library = OpenLibrary(); | 513 | const Common::DynamicLibrary library = OpenLibrary(); |
| 368 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_1); | 514 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_1, wsi.type); |
| 369 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | 515 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); |
| 516 | vk::SurfaceKHR surface = //< needed to view present modes for a device | ||
| 517 | CreateSurface(instance, wsi); | ||
| 370 | 518 | ||
| 371 | vulkan_devices.clear(); | 519 | vulkan_devices.clear(); |
| 372 | vulkan_devices.reserve(physical_devices.size()); | 520 | vulkan_devices.reserve(physical_devices.size()); |
| 521 | device_present_modes.clear(); | ||
| 522 | device_present_modes.reserve(physical_devices.size()); | ||
| 373 | for (const VkPhysicalDevice device : physical_devices) { | 523 | for (const VkPhysicalDevice device : physical_devices) { |
| 374 | const std::string name = vk::PhysicalDevice(device, dld).GetProperties().deviceName; | 524 | const auto physical_device = vk::PhysicalDevice(device, dld); |
| 525 | const std::string name = physical_device.GetProperties().deviceName; | ||
| 526 | const std::vector<VkPresentModeKHR> present_modes = | ||
| 527 | physical_device.GetSurfacePresentModesKHR(*surface); | ||
| 375 | vulkan_devices.push_back(QString::fromStdString(name)); | 528 | vulkan_devices.push_back(QString::fromStdString(name)); |
| 529 | device_present_modes.push_back(present_modes); | ||
| 376 | } | 530 | } |
| 377 | } catch (const Vulkan::vk::Exception& exception) { | 531 | } catch (const Vulkan::vk::Exception& exception) { |
| 378 | LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); | 532 | LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); |
| @@ -465,4 +619,6 @@ void ConfigureGraphics::SetupPerGameUI() { | |||
| 465 | ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); | 619 | ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); |
| 466 | ConfigurationShared::InsertGlobalItem( | 620 | ConfigurationShared::InsertGlobalItem( |
| 467 | ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true))); | 621 | ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true))); |
| 622 | |||
| 623 | ui->vsync_mode_layout->setVisible(false); | ||
| 468 | } | 624 | } |
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index d98d6624e..901f604a5 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h | |||
| @@ -5,9 +5,21 @@ | |||
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include <QColor> | ||
| 8 | #include <QString> | 9 | #include <QString> |
| 9 | #include <QWidget> | 10 | #include <QWidget> |
| 10 | #include "common/settings.h" | 11 | #include <qobjectdefs.h> |
| 12 | #include <vulkan/vulkan_core.h> | ||
| 13 | #include "common/common_types.h" | ||
| 14 | |||
| 15 | class QEvent; | ||
| 16 | class QObject; | ||
| 17 | |||
| 18 | namespace Settings { | ||
| 19 | enum class NvdecEmulation : u32; | ||
| 20 | enum class RendererBackend : u32; | ||
| 21 | enum class ShaderBackend : u32; | ||
| 22 | } // namespace Settings | ||
| 11 | 23 | ||
| 12 | namespace Core { | 24 | namespace Core { |
| 13 | class System; | 25 | class System; |
| @@ -35,6 +47,7 @@ private: | |||
| 35 | void changeEvent(QEvent* event) override; | 47 | void changeEvent(QEvent* event) override; |
| 36 | void RetranslateUI(); | 48 | void RetranslateUI(); |
| 37 | 49 | ||
| 50 | void PopulateVSyncModeSelection(); | ||
| 38 | void UpdateBackgroundColorButton(QColor color); | 51 | void UpdateBackgroundColorButton(QColor color); |
| 39 | void UpdateAPILayout(); | 52 | void UpdateAPILayout(); |
| 40 | void UpdateDeviceSelection(int device); | 53 | void UpdateDeviceSelection(int device); |
| @@ -43,6 +56,10 @@ private: | |||
| 43 | void RetrieveVulkanDevices(); | 56 | void RetrieveVulkanDevices(); |
| 44 | 57 | ||
| 45 | void SetFSRIndicatorText(int percentage); | 58 | void SetFSRIndicatorText(int percentage); |
| 59 | /* Turns a Vulkan present mode into a textual string for a UI | ||
| 60 | * (and eventually for a human to read) */ | ||
| 61 | const QString TranslateVSyncMode(VkPresentModeKHR mode, | ||
| 62 | Settings::RendererBackend backend) const; | ||
| 46 | 63 | ||
| 47 | void SetupPerGameUI(); | 64 | void SetupPerGameUI(); |
| 48 | 65 | ||
| @@ -58,6 +75,10 @@ private: | |||
| 58 | ConfigurationShared::CheckState use_asynchronous_gpu_emulation; | 75 | ConfigurationShared::CheckState use_asynchronous_gpu_emulation; |
| 59 | 76 | ||
| 60 | std::vector<QString> vulkan_devices; | 77 | std::vector<QString> vulkan_devices; |
| 78 | std::vector<std::vector<VkPresentModeKHR>> device_present_modes; | ||
| 79 | std::vector<VkPresentModeKHR> | ||
| 80 | vsync_mode_combobox_enum_map; //< Keeps track of which present mode corresponds to which | ||
| 81 | // selection in the combobox | ||
| 61 | u32 vulkan_device{}; | 82 | u32 vulkan_device{}; |
| 62 | Settings::ShaderBackend shader_backend{}; | 83 | Settings::ShaderBackend shader_backend{}; |
| 63 | 84 | ||
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index a45ec69ec..39f70e406 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -189,6 +189,44 @@ | |||
| 189 | </widget> | 189 | </widget> |
| 190 | </item> | 190 | </item> |
| 191 | <item> | 191 | <item> |
| 192 | <widget class="QWidget" name="vsync_mode_layout" native="true"> | ||
| 193 | <layout class="QHBoxLayout" name="horizontalLayout_4"> | ||
| 194 | <property name="leftMargin"> | ||
| 195 | <number>0</number> | ||
| 196 | </property> | ||
| 197 | <property name="topMargin"> | ||
| 198 | <number>0</number> | ||
| 199 | </property> | ||
| 200 | <property name="rightMargin"> | ||
| 201 | <number>0</number> | ||
| 202 | </property> | ||
| 203 | <property name="bottomMargin"> | ||
| 204 | <number>0</number> | ||
| 205 | </property> | ||
| 206 | <item> | ||
| 207 | <widget class="QLabel" name="vsync_mode_label"> | ||
| 208 | <property name="text"> | ||
| 209 | <string>VSync Mode:</string> | ||
| 210 | </property> | ||
| 211 | </widget> | ||
| 212 | </item> | ||
| 213 | <item> | ||
| 214 | <widget class="QComboBox" name="vsync_mode_combobox"> | ||
| 215 | <property name="toolTip"> | ||
| 216 | <string>FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen refresh rate. | ||
| 217 | FIFO Relaxed is similar to FIFO but allows tearing as it recovers from a slow down. | ||
| 218 | Mailbox can have lower latency than FIFO and does not tear but may drop frames. | ||
| 219 | Immediate (no synchronization) just presents whatever is available and can exhibit tearing.</string> | ||
| 220 | </property> | ||
| 221 | <property name="currentText"> | ||
| 222 | <string/> | ||
| 223 | </property> | ||
| 224 | </widget> | ||
| 225 | </item> | ||
| 226 | </layout> | ||
| 227 | </widget> | ||
| 228 | </item> | ||
| 229 | <item> | ||
| 192 | <widget class="QWidget" name="nvdec_emulation_widget" native="true"> | 230 | <widget class="QWidget" name="nvdec_emulation_widget" native="true"> |
| 193 | <layout class="QHBoxLayout" name="nvdec_emulation_layout"> | 231 | <layout class="QHBoxLayout" name="nvdec_emulation_layout"> |
| 194 | <property name="leftMargin"> | 232 | <property name="leftMargin"> |
| @@ -366,7 +404,7 @@ | |||
| 366 | </item> | 404 | </item> |
| 367 | <item> | 405 | <item> |
| 368 | <property name="text"> | 406 | <property name="text"> |
| 369 | <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string> | 407 | <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string> |
| 370 | </property> | 408 | </property> |
| 371 | </item> | 409 | </item> |
| 372 | <item> | 410 | <item> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 59fb1b334..005b022ca 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -21,18 +21,17 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | |||
| 21 | 21 | ||
| 22 | void ConfigureGraphicsAdvanced::SetConfiguration() { | 22 | void ConfigureGraphicsAdvanced::SetConfiguration() { |
| 23 | const bool runtime_lock = !system.IsPoweredOn(); | 23 | const bool runtime_lock = !system.IsPoweredOn(); |
| 24 | ui->use_vsync->setEnabled(runtime_lock); | 24 | ui->async_present->setEnabled(runtime_lock); |
| 25 | ui->renderer_force_max_clock->setEnabled(runtime_lock); | 25 | ui->renderer_force_max_clock->setEnabled(runtime_lock); |
| 26 | ui->async_astc->setEnabled(runtime_lock); | 26 | ui->async_astc->setEnabled(runtime_lock); |
| 27 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); | 27 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); |
| 28 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | 28 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); |
| 29 | 29 | ||
| 30 | ui->async_present->setChecked(Settings::values.async_presentation.GetValue()); | ||
| 30 | ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); | 31 | ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); |
| 31 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); | ||
| 32 | ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); | 32 | ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); |
| 33 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); | 33 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); |
| 34 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); | 34 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); |
| 35 | ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); | ||
| 36 | ui->use_vulkan_driver_pipeline_cache->setChecked( | 35 | ui->use_vulkan_driver_pipeline_cache->setChecked( |
| 37 | Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); | 36 | Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); |
| 38 | 37 | ||
| @@ -54,12 +53,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 54 | 53 | ||
| 55 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | 54 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { |
| 56 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); | 55 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); |
| 56 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation, | ||
| 57 | ui->async_present, async_present); | ||
| 57 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, | 58 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, |
| 58 | ui->renderer_force_max_clock, | 59 | ui->renderer_force_max_clock, |
| 59 | renderer_force_max_clock); | 60 | renderer_force_max_clock); |
| 60 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, | 61 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, |
| 61 | ui->anisotropic_filtering_combobox); | 62 | ui->anisotropic_filtering_combobox); |
| 62 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); | ||
| 63 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, | 63 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, |
| 64 | async_astc); | 64 | async_astc); |
| 65 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, | 65 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, |
| @@ -67,8 +67,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 67 | use_asynchronous_shaders); | 67 | use_asynchronous_shaders); |
| 68 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, | 68 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, |
| 69 | ui->use_fast_gpu_time, use_fast_gpu_time); | 69 | ui->use_fast_gpu_time, use_fast_gpu_time); |
| 70 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, | ||
| 71 | ui->use_pessimistic_flushes, use_pessimistic_flushes); | ||
| 72 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, | 70 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, |
| 73 | ui->use_vulkan_driver_pipeline_cache, | 71 | ui->use_vulkan_driver_pipeline_cache, |
| 74 | use_vulkan_driver_pipeline_cache); | 72 | use_vulkan_driver_pipeline_cache); |
| @@ -90,15 +88,13 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 90 | // Disable if not global (only happens during game) | 88 | // Disable if not global (only happens during game) |
| 91 | if (Settings::IsConfiguringGlobal()) { | 89 | if (Settings::IsConfiguringGlobal()) { |
| 92 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); | 90 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); |
| 91 | ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal()); | ||
| 93 | ui->renderer_force_max_clock->setEnabled( | 92 | ui->renderer_force_max_clock->setEnabled( |
| 94 | Settings::values.renderer_force_max_clock.UsingGlobal()); | 93 | Settings::values.renderer_force_max_clock.UsingGlobal()); |
| 95 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); | ||
| 96 | ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); | 94 | ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); |
| 97 | ui->use_asynchronous_shaders->setEnabled( | 95 | ui->use_asynchronous_shaders->setEnabled( |
| 98 | Settings::values.use_asynchronous_shaders.UsingGlobal()); | 96 | Settings::values.use_asynchronous_shaders.UsingGlobal()); |
| 99 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); | 97 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); |
| 100 | ui->use_pessimistic_flushes->setEnabled( | ||
| 101 | Settings::values.use_pessimistic_flushes.UsingGlobal()); | ||
| 102 | ui->use_vulkan_driver_pipeline_cache->setEnabled( | 98 | ui->use_vulkan_driver_pipeline_cache->setEnabled( |
| 103 | Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); | 99 | Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); |
| 104 | ui->anisotropic_filtering_combobox->setEnabled( | 100 | ui->anisotropic_filtering_combobox->setEnabled( |
| @@ -107,10 +103,11 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 107 | return; | 103 | return; |
| 108 | } | 104 | } |
| 109 | 105 | ||
| 106 | ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation, | ||
| 107 | async_present); | ||
| 110 | ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, | 108 | ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, |
| 111 | Settings::values.renderer_force_max_clock, | 109 | Settings::values.renderer_force_max_clock, |
| 112 | renderer_force_max_clock); | 110 | renderer_force_max_clock); |
| 113 | ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); | ||
| 114 | ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc, | 111 | ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc, |
| 115 | async_astc); | 112 | async_astc); |
| 116 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, | 113 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, |
| @@ -118,9 +115,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 118 | use_asynchronous_shaders); | 115 | use_asynchronous_shaders); |
| 119 | ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, | 116 | ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, |
| 120 | Settings::values.use_fast_gpu_time, use_fast_gpu_time); | 117 | Settings::values.use_fast_gpu_time, use_fast_gpu_time); |
| 121 | ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, | ||
| 122 | Settings::values.use_pessimistic_flushes, | ||
| 123 | use_pessimistic_flushes); | ||
| 124 | ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, | 118 | ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, |
| 125 | Settings::values.use_vulkan_driver_pipeline_cache, | 119 | Settings::values.use_vulkan_driver_pipeline_cache, |
| 126 | use_vulkan_driver_pipeline_cache); | 120 | use_vulkan_driver_pipeline_cache); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index bf1b04749..ff5060957 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -36,12 +36,12 @@ private: | |||
| 36 | 36 | ||
| 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; | 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; |
| 38 | 38 | ||
| 39 | ConfigurationShared::CheckState async_present; | ||
| 39 | ConfigurationShared::CheckState renderer_force_max_clock; | 40 | ConfigurationShared::CheckState renderer_force_max_clock; |
| 40 | ConfigurationShared::CheckState use_vsync; | 41 | ConfigurationShared::CheckState use_vsync; |
| 41 | ConfigurationShared::CheckState async_astc; | 42 | ConfigurationShared::CheckState async_astc; |
| 42 | ConfigurationShared::CheckState use_asynchronous_shaders; | 43 | ConfigurationShared::CheckState use_asynchronous_shaders; |
| 43 | ConfigurationShared::CheckState use_fast_gpu_time; | 44 | ConfigurationShared::CheckState use_fast_gpu_time; |
| 44 | ConfigurationShared::CheckState use_pessimistic_flushes; | ||
| 45 | ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; | 45 | ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; |
| 46 | 46 | ||
| 47 | const Core::System& system; | 47 | const Core::System& system; |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index a7dbdc18c..d073fe9b1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | <x>0</x> | 7 | <x>0</x> |
| 8 | <y>0</y> | 8 | <y>0</y> |
| 9 | <width>404</width> | 9 | <width>404</width> |
| 10 | <height>321</height> | 10 | <height>376</height> |
| 11 | </rect> | 11 | </rect> |
| 12 | </property> | 12 | </property> |
| 13 | <property name="windowTitle"> | 13 | <property name="windowTitle"> |
| @@ -70,22 +70,19 @@ | |||
| 70 | </widget> | 70 | </widget> |
| 71 | </item> | 71 | </item> |
| 72 | <item> | 72 | <item> |
| 73 | <widget class="QCheckBox" name="renderer_force_max_clock"> | 73 | <widget class="QCheckBox" name="async_present"> |
| 74 | <property name="toolTip"> | ||
| 75 | <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> | ||
| 76 | </property> | ||
| 77 | <property name="text"> | 74 | <property name="text"> |
| 78 | <string>Force maximum clocks (Vulkan only)</string> | 75 | <string>Enable asynchronous presentation (Vulkan only)</string> |
| 79 | </property> | 76 | </property> |
| 80 | </widget> | 77 | </widget> |
| 81 | </item> | 78 | </item> |
| 82 | <item> | 79 | <item> |
| 83 | <widget class="QCheckBox" name="use_vsync"> | 80 | <widget class="QCheckBox" name="renderer_force_max_clock"> |
| 84 | <property name="toolTip"> | 81 | <property name="toolTip"> |
| 85 | <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> | 82 | <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> |
| 86 | </property> | 83 | </property> |
| 87 | <property name="text"> | 84 | <property name="text"> |
| 88 | <string>Use VSync</string> | 85 | <string>Force maximum clocks (Vulkan only)</string> |
| 89 | </property> | 86 | </property> |
| 90 | </widget> | 87 | </widget> |
| 91 | </item> | 88 | </item> |
| @@ -112,7 +109,7 @@ | |||
| 112 | <item> | 109 | <item> |
| 113 | <widget class="QCheckBox" name="use_fast_gpu_time"> | 110 | <widget class="QCheckBox" name="use_fast_gpu_time"> |
| 114 | <property name="toolTip"> | 111 | <property name="toolTip"> |
| 115 | <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> | 112 | <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> |
| 116 | </property> | 113 | </property> |
| 117 | <property name="text"> | 114 | <property name="text"> |
| 118 | <string>Use Fast GPU Time (Hack)</string> | 115 | <string>Use Fast GPU Time (Hack)</string> |
| @@ -120,19 +117,9 @@ | |||
| 120 | </widget> | 117 | </widget> |
| 121 | </item> | 118 | </item> |
| 122 | <item> | 119 | <item> |
| 123 | <widget class="QCheckBox" name="use_pessimistic_flushes"> | ||
| 124 | <property name="toolTip"> | ||
| 125 | <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string> | ||
| 126 | </property> | ||
| 127 | <property name="text"> | ||
| 128 | <string>Use pessimistic buffer flushes (Hack)</string> | ||
| 129 | </property> | ||
| 130 | </widget> | ||
| 131 | </item> | ||
| 132 | <item> | ||
| 133 | <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> | 120 | <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> |
| 134 | <property name="toolTip"> | 121 | <property name="toolTip"> |
| 135 | <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> | 122 | <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> |
| 136 | </property> | 123 | </property> |
| 137 | <property name="text"> | 124 | <property name="text"> |
| 138 | <string>Use Vulkan pipeline cache</string> | 125 | <string>Use Vulkan pipeline cache</string> |
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 50b62293e..561a08dc5 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <QInputDialog> | 8 | #include <QInputDialog> |
| 9 | #include <QMenu> | 9 | #include <QMenu> |
| 10 | #include <QMessageBox> | 10 | #include <QMessageBox> |
| 11 | #include <QMouseEvent> | ||
| 11 | #include <QTimer> | 12 | #include <QTimer> |
| 12 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 13 | #include "common/param_package.h" | 14 | #include "common/param_package.h" |
| @@ -206,7 +207,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) { | |||
| 206 | } | 207 | } |
| 207 | if (param.Has("axis")) { | 208 | if (param.Has("axis")) { |
| 208 | const QString axis = QString::fromStdString(param.Get("axis", "")); | 209 | const QString axis = QString::fromStdString(param.Get("axis", "")); |
| 209 | return QObject::tr("%1%2Axis %3").arg(toggle, invert, axis); | 210 | return QObject::tr("%1%2%3Axis %4").arg(toggle, inverted, invert, axis); |
| 210 | } | 211 | } |
| 211 | if (param.Has("axis_x") && param.Has("axis_y") && param.Has("axis_z")) { | 212 | if (param.Has("axis_x") && param.Has("axis_y") && param.Has("axis_z")) { |
| 212 | const QString axis_x = QString::fromStdString(param.Get("axis_x", "")); | 213 | const QString axis_x = QString::fromStdString(param.Get("axis_x", "")); |
| @@ -229,7 +230,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) { | |||
| 229 | return QObject::tr("%1%2%3Hat %4").arg(turbo, toggle, inverted, button_name); | 230 | return QObject::tr("%1%2%3Hat %4").arg(turbo, toggle, inverted, button_name); |
| 230 | } | 231 | } |
| 231 | if (param.Has("axis")) { | 232 | if (param.Has("axis")) { |
| 232 | return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name); | 233 | return QObject::tr("%1%2%3Axis %4").arg(toggle, inverted, invert, button_name); |
| 233 | } | 234 | } |
| 234 | if (param.Has("motion")) { | 235 | if (param.Has("motion")) { |
| 235 | return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name); | 236 | return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name); |
| @@ -410,6 +411,12 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i | |||
| 410 | button_map[button_id]->setText(ButtonToText(param)); | 411 | button_map[button_id]->setText(ButtonToText(param)); |
| 411 | emulated_controller->SetButtonParam(button_id, param); | 412 | emulated_controller->SetButtonParam(button_id, param); |
| 412 | }); | 413 | }); |
| 414 | context_menu.addAction(tr("Invert button"), [&] { | ||
| 415 | const bool invert_value = !param.Get("inverted", false); | ||
| 416 | param.Set("inverted", invert_value); | ||
| 417 | button_map[button_id]->setText(ButtonToText(param)); | ||
| 418 | emulated_controller->SetButtonParam(button_id, param); | ||
| 419 | }); | ||
| 413 | context_menu.addAction(tr("Set threshold"), [&] { | 420 | context_menu.addAction(tr("Set threshold"), [&] { |
| 414 | const int button_threshold = | 421 | const int button_threshold = |
| 415 | static_cast<int>(param.Get("threshold", 0.5f) * 100.0f); | 422 | static_cast<int>(param.Get("threshold", 0.5f) * 100.0f); |
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp index c287220fc..fe1ee2289 100644 --- a/src/yuzu/configuration/configure_input_player_widget.cpp +++ b/src/yuzu/configuration/configure_input_player_widget.cpp | |||
| @@ -180,6 +180,10 @@ void PlayerControlPreview::ControllerUpdate(Core::HID::ControllerTriggerType typ | |||
| 180 | battery_values = controller->GetBatteryValues(); | 180 | battery_values = controller->GetBatteryValues(); |
| 181 | needs_redraw = true; | 181 | needs_redraw = true; |
| 182 | break; | 182 | break; |
| 183 | case Core::HID::ControllerTriggerType::Motion: | ||
| 184 | motion_values = controller->GetMotions(); | ||
| 185 | needs_redraw = true; | ||
| 186 | break; | ||
| 183 | default: | 187 | default: |
| 184 | break; | 188 | break; |
| 185 | } | 189 | } |
| @@ -313,6 +317,15 @@ void PlayerControlPreview::DrawLeftController(QPainter& p, const QPointF center) | |||
| 313 | DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0)); | 317 | DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0)); |
| 314 | } | 318 | } |
| 315 | 319 | ||
| 320 | { | ||
| 321 | // Draw motion cubes | ||
| 322 | using namespace Settings::NativeMotion; | ||
| 323 | p.setPen(colors.outline); | ||
| 324 | p.setBrush(colors.transparent); | ||
| 325 | Draw3dCube(p, center + QPointF(-140, 90), | ||
| 326 | motion_values[Settings::NativeMotion::MotionLeft].euler, 20.0f); | ||
| 327 | } | ||
| 328 | |||
| 316 | using namespace Settings::NativeButton; | 329 | using namespace Settings::NativeButton; |
| 317 | 330 | ||
| 318 | // D-pad constants | 331 | // D-pad constants |
| @@ -435,6 +448,15 @@ void PlayerControlPreview::DrawRightController(QPainter& p, const QPointF center | |||
| 435 | DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90)); | 448 | DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90)); |
| 436 | } | 449 | } |
| 437 | 450 | ||
| 451 | { | ||
| 452 | // Draw motion cubes | ||
| 453 | using namespace Settings::NativeMotion; | ||
| 454 | p.setPen(colors.outline); | ||
| 455 | p.setBrush(colors.transparent); | ||
| 456 | Draw3dCube(p, center + QPointF(140, 90), | ||
| 457 | motion_values[Settings::NativeMotion::MotionRight].euler, 20.0f); | ||
| 458 | } | ||
| 459 | |||
| 438 | using namespace Settings::NativeButton; | 460 | using namespace Settings::NativeButton; |
| 439 | 461 | ||
| 440 | // Face buttons constants | 462 | // Face buttons constants |
| @@ -555,6 +577,17 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center) | |||
| 555 | DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90)); | 577 | DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90)); |
| 556 | } | 578 | } |
| 557 | 579 | ||
| 580 | { | ||
| 581 | // Draw motion cubes | ||
| 582 | using namespace Settings::NativeMotion; | ||
| 583 | p.setPen(colors.outline); | ||
| 584 | p.setBrush(colors.transparent); | ||
| 585 | Draw3dCube(p, center + QPointF(-180, -5), | ||
| 586 | motion_values[Settings::NativeMotion::MotionLeft].euler, 20.0f); | ||
| 587 | Draw3dCube(p, center + QPointF(180, -5), | ||
| 588 | motion_values[Settings::NativeMotion::MotionRight].euler, 20.0f); | ||
| 589 | } | ||
| 590 | |||
| 558 | using namespace Settings::NativeButton; | 591 | using namespace Settings::NativeButton; |
| 559 | 592 | ||
| 560 | // Face buttons constants | 593 | // Face buttons constants |
| @@ -647,6 +680,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen | |||
| 647 | DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0)); | 680 | DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0)); |
| 648 | } | 681 | } |
| 649 | 682 | ||
| 683 | { | ||
| 684 | // Draw motion cubes | ||
| 685 | using namespace Settings::NativeMotion; | ||
| 686 | p.setPen(colors.outline); | ||
| 687 | p.setBrush(colors.transparent); | ||
| 688 | Draw3dCube(p, center + QPointF(0, -115), | ||
| 689 | motion_values[Settings::NativeMotion::MotionLeft].euler, 15.0f); | ||
| 690 | } | ||
| 691 | |||
| 650 | using namespace Settings::NativeButton; | 692 | using namespace Settings::NativeButton; |
| 651 | 693 | ||
| 652 | // Face buttons constants | 694 | // Face buttons constants |
| @@ -750,6 +792,15 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center) | |||
| 750 | DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105)); | 792 | DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105)); |
| 751 | } | 793 | } |
| 752 | 794 | ||
| 795 | { | ||
| 796 | // Draw motion cubes | ||
| 797 | using namespace Settings::NativeMotion; | ||
| 798 | p.setPen(colors.button); | ||
| 799 | p.setBrush(colors.transparent); | ||
| 800 | Draw3dCube(p, center + QPointF(0, -100), | ||
| 801 | motion_values[Settings::NativeMotion::MotionLeft].euler, 15.0f); | ||
| 802 | } | ||
| 803 | |||
| 753 | using namespace Settings::NativeButton; | 804 | using namespace Settings::NativeButton; |
| 754 | 805 | ||
| 755 | // Face buttons constants | 806 | // Face buttons constants |
| @@ -2871,6 +2922,46 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di | |||
| 2871 | DrawPolygon(p, arrow_symbol); | 2922 | DrawPolygon(p, arrow_symbol); |
| 2872 | } | 2923 | } |
| 2873 | 2924 | ||
| 2925 | // Draw motion functions | ||
| 2926 | void PlayerControlPreview::Draw3dCube(QPainter& p, QPointF center, const Common::Vec3f& euler, | ||
| 2927 | float size) { | ||
| 2928 | std::array<Common::Vec3f, 8> cube{ | ||
| 2929 | Common::Vec3f{-1, -1, -1}, | ||
| 2930 | {-1, 1, -1}, | ||
| 2931 | {1, 1, -1}, | ||
| 2932 | {1, -1, -1}, | ||
| 2933 | {-1, -1, 1}, | ||
| 2934 | {-1, 1, 1}, | ||
| 2935 | {1, 1, 1}, | ||
| 2936 | {1, -1, 1}, | ||
| 2937 | }; | ||
| 2938 | |||
| 2939 | for (Common::Vec3f& point : cube) { | ||
| 2940 | point.RotateFromOrigin(euler.x, euler.y, euler.z); | ||
| 2941 | point *= size; | ||
| 2942 | } | ||
| 2943 | |||
| 2944 | const std::array<QPointF, 4> front_face{ | ||
| 2945 | center + QPointF{cube[0].x, cube[0].y}, | ||
| 2946 | center + QPointF{cube[1].x, cube[1].y}, | ||
| 2947 | center + QPointF{cube[2].x, cube[2].y}, | ||
| 2948 | center + QPointF{cube[3].x, cube[3].y}, | ||
| 2949 | }; | ||
| 2950 | const std::array<QPointF, 4> back_face{ | ||
| 2951 | center + QPointF{cube[4].x, cube[4].y}, | ||
| 2952 | center + QPointF{cube[5].x, cube[5].y}, | ||
| 2953 | center + QPointF{cube[6].x, cube[6].y}, | ||
| 2954 | center + QPointF{cube[7].x, cube[7].y}, | ||
| 2955 | }; | ||
| 2956 | |||
| 2957 | DrawPolygon(p, front_face); | ||
| 2958 | DrawPolygon(p, back_face); | ||
| 2959 | p.drawLine(center + QPointF{cube[0].x, cube[0].y}, center + QPointF{cube[4].x, cube[4].y}); | ||
| 2960 | p.drawLine(center + QPointF{cube[1].x, cube[1].y}, center + QPointF{cube[5].x, cube[5].y}); | ||
| 2961 | p.drawLine(center + QPointF{cube[2].x, cube[2].y}, center + QPointF{cube[6].x, cube[6].y}); | ||
| 2962 | p.drawLine(center + QPointF{cube[3].x, cube[3].y}, center + QPointF{cube[7].x, cube[7].y}); | ||
| 2963 | } | ||
| 2964 | |||
| 2874 | template <size_t N> | 2965 | template <size_t N> |
| 2875 | void PlayerControlPreview::DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon) { | 2966 | void PlayerControlPreview::DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon) { |
| 2876 | p.drawPolygon(polygon.data(), static_cast<int>(polygon.size())); | 2967 | p.drawPolygon(polygon.data(), static_cast<int>(polygon.size())); |
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h index 267d134de..a16943c3c 100644 --- a/src/yuzu/configuration/configure_input_player_widget.h +++ b/src/yuzu/configuration/configure_input_player_widget.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/input.h" | 10 | #include "common/input.h" |
| 11 | #include "common/settings_input.h" | 11 | #include "common/settings_input.h" |
| 12 | #include "common/vector_math.h" | ||
| 12 | #include "core/hid/emulated_controller.h" | 13 | #include "core/hid/emulated_controller.h" |
| 13 | #include "core/hid/hid_types.h" | 14 | #include "core/hid/hid_types.h" |
| 14 | 15 | ||
| @@ -193,6 +194,9 @@ private: | |||
| 193 | void DrawSymbol(QPainter& p, QPointF center, Symbol symbol, float icon_size); | 194 | void DrawSymbol(QPainter& p, QPointF center, Symbol symbol, float icon_size); |
| 194 | void DrawArrow(QPainter& p, QPointF center, Direction direction, float size); | 195 | void DrawArrow(QPainter& p, QPointF center, Direction direction, float size); |
| 195 | 196 | ||
| 197 | // Draw motion functions | ||
| 198 | void Draw3dCube(QPainter& p, QPointF center, const Common::Vec3f& euler, float size); | ||
| 199 | |||
| 196 | // Draw primitive types | 200 | // Draw primitive types |
| 197 | template <size_t N> | 201 | template <size_t N> |
| 198 | void DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon); | 202 | void DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon); |
| @@ -222,4 +226,5 @@ private: | |||
| 222 | Core::HID::SticksValues stick_values{}; | 226 | Core::HID::SticksValues stick_values{}; |
| 223 | Core::HID::TriggerValues trigger_values{}; | 227 | Core::HID::TriggerValues trigger_values{}; |
| 224 | Core::HID::BatteryValues battery_values{}; | 228 | Core::HID::BatteryValues battery_values{}; |
| 229 | Core::HID::MotionState motion_values{}; | ||
| 225 | }; | 230 | }; |
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index 6af34f793..286ccc5cd 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp | |||
| @@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() { | |||
| 111 | ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); | 111 | ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); |
| 112 | ui->device_name_edit->setText( | 112 | ui->device_name_edit->setText( |
| 113 | QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); | 113 | QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); |
| 114 | ui->use_unsafe_extended_memory_layout->setEnabled(enabled); | ||
| 115 | ui->use_unsafe_extended_memory_layout->setChecked( | ||
| 116 | Settings::values.use_unsafe_extended_memory_layout.GetValue()); | ||
| 114 | 117 | ||
| 115 | if (Settings::IsConfiguringGlobal()) { | 118 | if (Settings::IsConfiguringGlobal()) { |
| 116 | ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); | 119 | ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); |
| @@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() { | |||
| 160 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); | 163 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); |
| 161 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, | 164 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, |
| 162 | ui->combo_time_zone); | 165 | ui->combo_time_zone); |
| 166 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout, | ||
| 167 | ui->use_unsafe_extended_memory_layout, | ||
| 168 | use_unsafe_extended_memory_layout); | ||
| 163 | 169 | ||
| 164 | if (Settings::IsConfiguringGlobal()) { | 170 | if (Settings::IsConfiguringGlobal()) { |
| 165 | // Guard if during game and set to game-specific value | 171 | // Guard if during game and set to game-specific value |
| @@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() { | |||
| 215 | Settings::values.rng_seed.GetValue().has_value(), | 221 | Settings::values.rng_seed.GetValue().has_value(), |
| 216 | Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); | 222 | Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); |
| 217 | 223 | ||
| 224 | ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout, | ||
| 225 | Settings::values.use_unsafe_extended_memory_layout, | ||
| 226 | use_unsafe_extended_memory_layout); | ||
| 227 | |||
| 218 | ui->custom_rtc_checkbox->setVisible(false); | 228 | ui->custom_rtc_checkbox->setVisible(false); |
| 219 | ui->custom_rtc_edit->setVisible(false); | 229 | ui->custom_rtc_edit->setVisible(false); |
| 220 | } | 230 | } |
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index ec28724a1..ce1a91601 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h | |||
| @@ -41,6 +41,7 @@ private: | |||
| 41 | bool enabled = false; | 41 | bool enabled = false; |
| 42 | 42 | ||
| 43 | ConfigurationShared::CheckState use_rng_seed; | 43 | ConfigurationShared::CheckState use_rng_seed; |
| 44 | ConfigurationShared::CheckState use_unsafe_extended_memory_layout; | ||
| 44 | 45 | ||
| 45 | Core::System& system; | 46 | Core::System& system; |
| 46 | }; | 47 | }; |
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui index 9e7bc3b93..e0caecd5e 100644 --- a/src/yuzu/configuration/configure_system.ui +++ b/src/yuzu/configuration/configure_system.ui | |||
| @@ -478,6 +478,13 @@ | |||
| 478 | </property> | 478 | </property> |
| 479 | </widget> | 479 | </widget> |
| 480 | </item> | 480 | </item> |
| 481 | <item row="7" column="0"> | ||
| 482 | <widget class="QCheckBox" name="use_unsafe_extended_memory_layout"> | ||
| 483 | <property name="text"> | ||
| 484 | <string>Unsafe extended memory layout (8GB DRAM)</string> | ||
| 485 | </property> | ||
| 486 | </widget> | ||
| 487 | </item> | ||
| 481 | </layout> | 488 | </layout> |
| 482 | </item> | 489 | </item> |
| 483 | </layout> | 490 | </layout> |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b79409a68..ba9eece1d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "configuration/configure_input.h" | 27 | #include "configuration/configure_input.h" |
| 28 | #include "configuration/configure_per_game.h" | 28 | #include "configuration/configure_per_game.h" |
| 29 | #include "configuration/configure_tas.h" | 29 | #include "configuration/configure_tas.h" |
| 30 | #include "core/file_sys/romfs_factory.h" | ||
| 30 | #include "core/file_sys/vfs.h" | 31 | #include "core/file_sys/vfs.h" |
| 31 | #include "core/file_sys/vfs_real.h" | 32 | #include "core/file_sys/vfs_real.h" |
| 32 | #include "core/frontend/applets/cabinet.h" | 33 | #include "core/frontend/applets/cabinet.h" |
| @@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4171 | } | 4172 | } |
| 4172 | 4173 | ||
| 4173 | Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); | 4174 | Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); |
| 4175 | bool all_keys_present{true}; | ||
| 4176 | |||
| 4174 | if (keys.BaseDeriveNecessary()) { | 4177 | if (keys.BaseDeriveNecessary()) { |
| 4175 | Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; | 4178 | Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; |
| 4176 | 4179 | ||
| @@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4195 | errors += tr(" - Missing PRODINFO"); | 4198 | errors += tr(" - Missing PRODINFO"); |
| 4196 | } | 4199 | } |
| 4197 | if (!errors.isEmpty()) { | 4200 | if (!errors.isEmpty()) { |
| 4201 | all_keys_present = false; | ||
| 4198 | QMessageBox::warning( | 4202 | QMessageBox::warning( |
| 4199 | this, tr("Derivation Components Missing"), | 4203 | this, tr("Derivation Components Missing"), |
| 4200 | tr("Encryption keys are missing. " | 4204 | tr("Encryption keys are missing. " |
| @@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4222 | 4226 | ||
| 4223 | system->GetFileSystemController().CreateFactories(*vfs); | 4227 | system->GetFileSystemController().CreateFactories(*vfs); |
| 4224 | 4228 | ||
| 4229 | if (all_keys_present && !this->CheckSystemArchiveDecryption()) { | ||
| 4230 | LOG_WARNING(Frontend, "Mii model decryption failed"); | ||
| 4231 | QMessageBox::warning( | ||
| 4232 | this, tr("System Archive Decryption Failed"), | ||
| 4233 | tr("Encryption keys failed to decrypt firmware. " | ||
| 4234 | "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu " | ||
| 4235 | "quickstart guide</a> to get all your keys, firmware and " | ||
| 4236 | "games.")); | ||
| 4237 | } | ||
| 4238 | |||
| 4225 | if (behavior == ReinitializeKeyBehavior::Warning) { | 4239 | if (behavior == ReinitializeKeyBehavior::Warning) { |
| 4226 | game_list->PopulateAsync(UISettings::values.game_dirs); | 4240 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 4227 | } | 4241 | } |
| 4228 | } | 4242 | } |
| 4229 | 4243 | ||
| 4244 | bool GMainWindow::CheckSystemArchiveDecryption() { | ||
| 4245 | constexpr u64 MiiModelId = 0x0100000000000802; | ||
| 4246 | |||
| 4247 | auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); | ||
| 4248 | if (!bis_system) { | ||
| 4249 | // Not having system BIS files is not an error. | ||
| 4250 | return true; | ||
| 4251 | } | ||
| 4252 | |||
| 4253 | auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data); | ||
| 4254 | if (!mii_nca) { | ||
| 4255 | // Not having the Mii model is not an error. | ||
| 4256 | return true; | ||
| 4257 | } | ||
| 4258 | |||
| 4259 | // Return whether we are able to decrypt the RomFS of the Mii model. | ||
| 4260 | return mii_nca->GetRomFS().get() != nullptr; | ||
| 4261 | } | ||
| 4262 | |||
| 4230 | std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, | 4263 | std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, |
| 4231 | u64 program_id) { | 4264 | u64 program_id) { |
| 4232 | const auto dlc_entries = | 4265 | const auto dlc_entries = |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 8b5c1d747..3bbc31ada 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -392,6 +392,7 @@ private: | |||
| 392 | void LoadTranslation(); | 392 | void LoadTranslation(); |
| 393 | void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); | 393 | void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); |
| 394 | bool CheckDarkMode(); | 394 | bool CheckDarkMode(); |
| 395 | bool CheckSystemArchiveDecryption(); | ||
| 395 | 396 | ||
| 396 | QString GetTasStateDescription() const; | 397 | QString GetTasStateDescription() const; |
| 397 | bool CreateShortcut(const std::string& shortcut_path, const std::string& title, | 398 | bool CreateShortcut(const std::string& shortcut_path, const std::string& title, |
diff --git a/src/yuzu/qt_common.cpp b/src/yuzu/qt_common.cpp new file mode 100644 index 000000000..5ac9fe310 --- /dev/null +++ b/src/yuzu/qt_common.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <QGuiApplication> | ||
| 5 | #include <QStringLiteral> | ||
| 6 | #include <QWindow> | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/frontend/emu_window.h" | ||
| 9 | #include "yuzu/qt_common.h" | ||
| 10 | |||
| 11 | #ifdef __linux__ | ||
| 12 | #include <qpa/qplatformnativeinterface.h> | ||
| 13 | #endif | ||
| 14 | |||
| 15 | namespace QtCommon { | ||
| 16 | Core::Frontend::WindowSystemType GetWindowSystemType() { | ||
| 17 | // Determine WSI type based on Qt platform. | ||
| 18 | QString platform_name = QGuiApplication::platformName(); | ||
| 19 | if (platform_name == QStringLiteral("windows")) | ||
| 20 | return Core::Frontend::WindowSystemType::Windows; | ||
| 21 | else if (platform_name == QStringLiteral("xcb")) | ||
| 22 | return Core::Frontend::WindowSystemType::X11; | ||
| 23 | else if (platform_name == QStringLiteral("wayland")) | ||
| 24 | return Core::Frontend::WindowSystemType::Wayland; | ||
| 25 | else if (platform_name == QStringLiteral("wayland-egl")) | ||
| 26 | return Core::Frontend::WindowSystemType::Wayland; | ||
| 27 | else if (platform_name == QStringLiteral("cocoa")) | ||
| 28 | return Core::Frontend::WindowSystemType::Cocoa; | ||
| 29 | else if (platform_name == QStringLiteral("android")) | ||
| 30 | return Core::Frontend::WindowSystemType::Android; | ||
| 31 | |||
| 32 | LOG_CRITICAL(Frontend, "Unknown Qt platform {}!", platform_name.toStdString()); | ||
| 33 | return Core::Frontend::WindowSystemType::Windows; | ||
| 34 | } // namespace Core::Frontend::WindowSystemType | ||
| 35 | |||
| 36 | Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { | ||
| 37 | Core::Frontend::EmuWindow::WindowSystemInfo wsi; | ||
| 38 | wsi.type = GetWindowSystemType(); | ||
| 39 | |||
| 40 | // Our Win32 Qt external doesn't have the private API. | ||
| 41 | #if defined(WIN32) || defined(__APPLE__) | ||
| 42 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 43 | #else | ||
| 44 | QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); | ||
| 45 | wsi.display_connection = pni->nativeResourceForWindow("display", window); | ||
| 46 | if (wsi.type == Core::Frontend::WindowSystemType::Wayland) | ||
| 47 | wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; | ||
| 48 | else | ||
| 49 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 50 | #endif | ||
| 51 | wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; | ||
| 52 | |||
| 53 | return wsi; | ||
| 54 | } | ||
| 55 | } // namespace QtCommon | ||
diff --git a/src/yuzu/qt_common.h b/src/yuzu/qt_common.h new file mode 100644 index 000000000..9c63f08f3 --- /dev/null +++ b/src/yuzu/qt_common.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <QWindow> | ||
| 7 | #include "core/frontend/emu_window.h" | ||
| 8 | |||
| 9 | namespace QtCommon { | ||
| 10 | |||
| 11 | Core::Frontend::WindowSystemType GetWindowSystemType(); | ||
| 12 | |||
| 13 | Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window); | ||
| 14 | |||
| 15 | } // namespace QtCommon | ||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 464da3231..a6418e693 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -4,18 +4,8 @@ | |||
| 4 | #include <memory> | 4 | #include <memory> |
| 5 | #include <optional> | 5 | #include <optional> |
| 6 | #include <sstream> | 6 | #include <sstream> |
| 7 | |||
| 8 | // Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307 | ||
| 9 | #ifdef __clang__ | ||
| 10 | #pragma clang diagnostic push | ||
| 11 | #pragma clang diagnostic ignored "-Wimplicit-fallthrough" | ||
| 12 | #endif | ||
| 13 | #include <SDL.h> | ||
| 14 | #ifdef __clang__ | ||
| 15 | #pragma clang diagnostic pop | ||
| 16 | #endif | ||
| 17 | |||
| 18 | #include <INIReader.h> | 7 | #include <INIReader.h> |
| 8 | #include <SDL.h> | ||
| 19 | #include "common/fs/file.h" | 9 | #include "common/fs/file.h" |
| 20 | #include "common/fs/fs.h" | 10 | #include "common/fs/fs.h" |
| 21 | #include "common/fs/path_util.h" | 11 | #include "common/fs/path_util.h" |
| @@ -274,7 +264,7 @@ void Config::ReadValues() { | |||
| 274 | 264 | ||
| 275 | // Core | 265 | // Core |
| 276 | ReadSetting("Core", Settings::values.use_multi_core); | 266 | ReadSetting("Core", Settings::values.use_multi_core); |
| 277 | ReadSetting("Core", Settings::values.use_extended_memory_layout); | 267 | ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout); |
| 278 | 268 | ||
| 279 | // Cpu | 269 | // Cpu |
| 280 | ReadSetting("Cpu", Settings::values.cpu_accuracy); | 270 | ReadSetting("Cpu", Settings::values.cpu_accuracy); |
| @@ -300,6 +290,7 @@ void Config::ReadValues() { | |||
| 300 | 290 | ||
| 301 | // Renderer | 291 | // Renderer |
| 302 | ReadSetting("Renderer", Settings::values.renderer_backend); | 292 | ReadSetting("Renderer", Settings::values.renderer_backend); |
| 293 | ReadSetting("Renderer", Settings::values.async_presentation); | ||
| 303 | ReadSetting("Renderer", Settings::values.renderer_force_max_clock); | 294 | ReadSetting("Renderer", Settings::values.renderer_force_max_clock); |
| 304 | ReadSetting("Renderer", Settings::values.renderer_debug); | 295 | ReadSetting("Renderer", Settings::values.renderer_debug); |
| 305 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); | 296 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); |
| @@ -319,14 +310,13 @@ void Config::ReadValues() { | |||
| 319 | ReadSetting("Renderer", Settings::values.use_disk_shader_cache); | 310 | ReadSetting("Renderer", Settings::values.use_disk_shader_cache); |
| 320 | ReadSetting("Renderer", Settings::values.gpu_accuracy); | 311 | ReadSetting("Renderer", Settings::values.gpu_accuracy); |
| 321 | ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); | 312 | ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); |
| 322 | ReadSetting("Renderer", Settings::values.use_vsync); | 313 | ReadSetting("Renderer", Settings::values.vsync_mode); |
| 323 | ReadSetting("Renderer", Settings::values.shader_backend); | 314 | ReadSetting("Renderer", Settings::values.shader_backend); |
| 324 | ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); | 315 | ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); |
| 325 | ReadSetting("Renderer", Settings::values.nvdec_emulation); | 316 | ReadSetting("Renderer", Settings::values.nvdec_emulation); |
| 326 | ReadSetting("Renderer", Settings::values.accelerate_astc); | 317 | ReadSetting("Renderer", Settings::values.accelerate_astc); |
| 327 | ReadSetting("Renderer", Settings::values.async_astc); | 318 | ReadSetting("Renderer", Settings::values.async_astc); |
| 328 | ReadSetting("Renderer", Settings::values.use_fast_gpu_time); | 319 | ReadSetting("Renderer", Settings::values.use_fast_gpu_time); |
| 329 | ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); | ||
| 330 | ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); | 320 | ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); |
| 331 | 321 | ||
| 332 | ReadSetting("Renderer", Settings::values.bg_red); | 322 | ReadSetting("Renderer", Settings::values.bg_red); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 209cfc28a..086ed4cfa 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -163,9 +163,9 @@ keyboard_enabled = | |||
| 163 | # 0: Disabled, 1 (default): Enabled | 163 | # 0: Disabled, 1 (default): Enabled |
| 164 | use_multi_core = | 164 | use_multi_core = |
| 165 | 165 | ||
| 166 | # Enable extended guest system memory layout (8GB DRAM) | 166 | # Enable unsafe extended guest system memory layout (8GB DRAM) |
| 167 | # 0 (default): Disabled, 1: Enabled | 167 | # 0 (default): Disabled, 1: Enabled |
| 168 | use_extended_memory_layout = | 168 | use_unsafe_extended_memory_layout = |
| 169 | 169 | ||
| 170 | [Cpu] | 170 | [Cpu] |
| 171 | # Adjusts various optimizations. | 171 | # Adjusts various optimizations. |
| @@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor = | |||
| 264 | # 0: OpenGL, 1 (default): Vulkan | 264 | # 0: OpenGL, 1 (default): Vulkan |
| 265 | backend = | 265 | backend = |
| 266 | 266 | ||
| 267 | # Whether to enable asynchronous presentation (Vulkan only) | ||
| 268 | # 0 (default): Off, 1: On | ||
| 269 | async_presentation = | ||
| 270 | |||
| 267 | # Enable graphics API debugging mode. | 271 | # Enable graphics API debugging mode. |
| 268 | # 0 (default): Disabled, 1: Enabled | 272 | # 0 (default): Disabled, 1: Enabled |
| 269 | debug = | 273 | debug = |
| @@ -321,8 +325,14 @@ aspect_ratio = | |||
| 321 | # 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x | 325 | # 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x |
| 322 | max_anisotropy = | 326 | max_anisotropy = |
| 323 | 327 | ||
| 324 | # Whether to enable V-Sync (caps the framerate at 60FPS) or not. | 328 | # Whether to enable VSync or not. |
| 325 | # 0 (default): Off, 1: On | 329 | # OpenGL: Values other than 0 enable VSync |
| 330 | # Vulkan: FIFO is selected if the requested mode is not supported by the driver. | ||
| 331 | # FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen refresh rate. | ||
| 332 | # FIFO Relaxed is similar to FIFO but allows tearing as it recovers from a slow down. | ||
| 333 | # Mailbox can have lower latency than FIFO and does not tear but may drop frames. | ||
| 334 | # Immediate (no synchronization) just presents whatever is available and can exhibit tearing. | ||
| 335 | # 0: Immediate (Off), 1: Mailbox, 2 (Default): FIFO (On), 3: FIFO Relaxed | ||
| 326 | use_vsync = | 336 | use_vsync = |
| 327 | 337 | ||
| 328 | # Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is | 338 | # Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is |
| @@ -370,10 +380,6 @@ use_asynchronous_gpu_emulation = | |||
| 370 | # 0: Off, 1 (default): On | 380 | # 0: Off, 1 (default): On |
| 371 | use_fast_gpu_time = | 381 | use_fast_gpu_time = |
| 372 | 382 | ||
| 373 | # Force unmodified buffers to be flushed, which can cost performance. | ||
| 374 | # 0: Off (default), 1: On | ||
| 375 | use_pessimistic_flushes = | ||
| 376 | |||
| 377 | # Whether to use garbage collection or not for GPU caches. | 383 | # Whether to use garbage collection or not for GPU caches. |
| 378 | # 0 (default): Off, 1: On | 384 | # 0 (default): Off, 1: On |
| 379 | use_caches_gc = | 385 | use_caches_gc = |