summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt3
-rw-r--r--src/audio_core/renderer/system.cpp5
-rw-r--r--src/audio_core/sink/sdl2_sink.cpp11
-rw-r--r--src/common/address_space.inc4
-rw-r--r--src/common/input.h2
-rw-r--r--src/common/intrusive_list.h631
-rw-r--r--src/common/settings.cpp8
-rw-r--r--src/common/settings.h15
-rw-r--r--src/common/vector_math.h14
-rw-r--r--src/core/core.cpp7
-rw-r--r--src/core/hid/emulated_controller.cpp10
-rw-r--r--src/core/hid/emulated_controller.h1
-rw-r--r--src/core/hid/input_converter.cpp8
-rw-r--r--src/core/hid/motion_input.cpp36
-rw-r--r--src/core/hid/motion_input.h2
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp7
-rw-r--r--src/core/hle/kernel/k_auto_object.h4
-rw-r--r--src/core/hle/kernel/k_event_info.h5
-rw-r--r--src/core/hle/kernel/k_object_name.h8
-rw-r--r--src/core/hle/kernel/k_server_port.h4
-rw-r--r--src/core/hle/kernel/k_server_session.h7
-rw-r--r--src/core/hle/kernel/k_session_request.h4
-rw-r--r--src/core/hle/kernel/k_shared_memory_info.h4
-rw-r--r--src/core/hle/kernel/k_thread.h13
-rw-r--r--src/core/hle/kernel/kernel.cpp31
-rw-r--r--src/core/hle/service/audio/audout_u.cpp14
-rw-r--r--src/core/hle/service/ipc_helpers.h1
-rw-r--r--src/core/hle/service/kernel_helpers.cpp3
-rw-r--r--src/core/hle/service/mutex.cpp3
-rw-r--r--src/core/hle/service/server_manager.cpp6
-rw-r--r--src/core/hle/service/sm/sm.cpp3
-rw-r--r--src/core/hle/service/sm/sm_controller.cpp3
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/core/telemetry_session.cpp17
-rw-r--r--src/input_common/input_mapping.cpp3
-rw-r--r--src/input_common/input_poller.cpp1
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp3
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp3
-rw-r--r--src/tests/CMakeLists.txt2
-rw-r--r--src/tests/video_core/buffer_base.cpp549
-rw-r--r--src/tests/video_core/memory_tracker.cpp549
-rw-r--r--src/video_core/CMakeLists.txt7
-rw-r--r--src/video_core/buffer_cache/buffer_base.h518
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1002
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h580
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h273
-rw-r--r--src/video_core/buffer_cache/word_manager.h462
-rw-r--r--src/video_core/compatible_formats.cpp20
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/fence_manager.h143
-rw-r--r--src/video_core/memory_manager.cpp13
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/query_cache.h137
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp54
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h5
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp3
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp53
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp224
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp457
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h83
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp139
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h37
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp66
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h10
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/shader_environment.cpp16
-rw-r--r--src/video_core/shader_environment.h6
-rw-r--r--src/video_core/surface.cpp5
-rw-r--r--src/video_core/surface.h12
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp6
-rw-r--r--src/video_core/texture_cache/formatter.cpp22
-rw-r--r--src/video_core/texture_cache/formatter.h8
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp8
-rw-r--r--src/video_core/texture_cache/image_view_base.h7
-rw-r--r--src/video_core/texture_cache/texture_cache.h142
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h33
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h3
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.h9
-rw-r--r--src/yuzu/CMakeLists.txt2
-rw-r--r--src/yuzu/applets/qt_profile_select.cpp1
-rw-r--r--src/yuzu/bootmanager.cpp90
-rw-r--r--src/yuzu/bootmanager.h26
-rw-r--r--src/yuzu/configuration/config.cpp18
-rw-r--r--src/yuzu/configuration/configure_general.cpp9
-rw-r--r--src/yuzu/configuration/configure_general.h1
-rw-r--r--src/yuzu/configuration/configure_general.ui7
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp168
-rw-r--r--src/yuzu/configuration/configure_graphics.h23
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp20
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui29
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp11
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.cpp91
-rw-r--r--src/yuzu/configuration/configure_input_player_widget.h5
-rw-r--r--src/yuzu/configuration/configure_system.cpp10
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui7
-rw-r--r--src/yuzu/main.cpp33
-rw-r--r--src/yuzu/main.h1
-rw-r--r--src/yuzu/qt_common.cpp55
-rw-r--r--src/yuzu/qt_common.h15
-rw-r--r--src/yuzu_cmd/config.cpp18
-rw-r--r--src/yuzu_cmd/default_ini.h22
130 files changed, 5094 insertions, 2419 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 312a49f42..5e3a74c0f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -113,6 +113,9 @@ else()
113 113
114 $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> 114 $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init>
115 $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> 115 $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field>
116 $<$<CXX_COMPILER_ID:Clang>:-Werror=shadow-uncaptured-local>
117 $<$<CXX_COMPILER_ID:Clang>:-Werror=implicit-fallthrough>
118 $<$<CXX_COMPILER_ID:Clang>:-Werror=type-limits>
116 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> 119 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init>
117 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> 120 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field>
118 ) 121 )
diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp
index ad869facb..53b258c4f 100644
--- a/src/audio_core/renderer/system.cpp
+++ b/src/audio_core/renderer/system.cpp
@@ -436,10 +436,7 @@ void System::Stop() {
436 } 436 }
437 437
438 if (execution_mode == ExecutionMode::Auto) { 438 if (execution_mode == ExecutionMode::Auto) {
439 // Should wait for the system to terminate here, but core timing (should have) already 439 terminate_event.Wait();
440 // stopped, so this isn't needed. Find a way to make this definite.
441
442 // terminate_event.Wait();
443 } 440 }
444} 441}
445 442
diff --git a/src/audio_core/sink/sdl2_sink.cpp b/src/audio_core/sink/sdl2_sink.cpp
index ee1a0652f..c1529d1f9 100644
--- a/src/audio_core/sink/sdl2_sink.cpp
+++ b/src/audio_core/sink/sdl2_sink.cpp
@@ -3,6 +3,7 @@
3 3
4#include <span> 4#include <span>
5#include <vector> 5#include <vector>
6#include <SDL.h>
6 7
7#include "audio_core/common/common.h" 8#include "audio_core/common/common.h"
8#include "audio_core/sink/sdl2_sink.h" 9#include "audio_core/sink/sdl2_sink.h"
@@ -10,16 +11,6 @@
10#include "common/logging/log.h" 11#include "common/logging/log.h"
11#include "core/core.h" 12#include "core/core.h"
12 13
13// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
14#ifdef __clang__
15#pragma clang diagnostic push
16#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
17#endif
18#include <SDL.h>
19#ifdef __clang__
20#pragma clang diagnostic pop
21#endif
22
23namespace AudioCore::Sink { 14namespace AudioCore::Sink {
24/** 15/**
25 * SDL sink stream, responsible for sinking samples to hardware. 16 * SDL sink stream, responsible for sinking samples to hardware.
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
index 2195dabd5..1ee82df53 100644
--- a/src/common/address_space.inc
+++ b/src/common/address_space.inc
@@ -72,7 +72,7 @@ MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInf
72 } 72 }
73 }()}; 73 }()};
74 74
75 if (block_end_predecessor->virt >= virt) { 75 if (block_end_predecessor != blocks.begin() && block_end_predecessor->virt >= virt) {
76 // If this block's start would be overlapped by the map then reuse it as a tail 76 // If this block's start would be overlapped by the map then reuse it as a tail
77 // block 77 // block
78 block_end_predecessor->virt = virt_end; 78 block_end_predecessor->virt = virt_end;
@@ -336,7 +336,7 @@ ALLOC_MEMBER(VaType)::Allocate(VaType size) {
336 ASSERT_MSG(false, "Unexpected allocator state!"); 336 ASSERT_MSG(false, "Unexpected allocator state!");
337 } 337 }
338 338
339 auto search_predecessor{this->blocks.begin()}; 339 auto search_predecessor{std::next(this->blocks.begin())};
340 auto search_successor{std::next(search_predecessor)}; 340 auto search_successor{std::next(search_predecessor)};
341 341
342 while (search_successor != this->blocks.end() && 342 while (search_successor != this->blocks.end() &&
diff --git a/src/common/input.h b/src/common/input.h
index 51b277c1f..66fb15f0a 100644
--- a/src/common/input.h
+++ b/src/common/input.h
@@ -111,6 +111,8 @@ struct AnalogProperties {
111 float offset{}; 111 float offset{};
112 // Invert direction of the sensor data 112 // Invert direction of the sensor data
113 bool inverted{}; 113 bool inverted{};
114 // Invert the state if it's converted to a button
115 bool inverted_button{};
114 // Press once to activate, press again to release 116 // Press once to activate, press again to release
115 bool toggle{}; 117 bool toggle{};
116}; 118};
diff --git a/src/common/intrusive_list.h b/src/common/intrusive_list.h
new file mode 100644
index 000000000..d330dc1c2
--- /dev/null
+++ b/src/common/intrusive_list.h
@@ -0,0 +1,631 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/parent_of_member.h"
8
9namespace Common {
10
11// Forward declare implementation class for Node.
12namespace impl {
13
14class IntrusiveListImpl;
15
16}
17
18class IntrusiveListNode {
19 YUZU_NON_COPYABLE(IntrusiveListNode);
20
21private:
22 friend class impl::IntrusiveListImpl;
23
24 IntrusiveListNode* m_prev;
25 IntrusiveListNode* m_next;
26
27public:
28 constexpr IntrusiveListNode() : m_prev(this), m_next(this) {}
29
30 constexpr bool IsLinked() const {
31 return m_next != this;
32 }
33
34private:
35 constexpr void LinkPrev(IntrusiveListNode* node) {
36 // We can't link an already linked node.
37 ASSERT(!node->IsLinked());
38 this->SplicePrev(node, node);
39 }
40
41 constexpr void SplicePrev(IntrusiveListNode* first, IntrusiveListNode* last) {
42 // Splice a range into the list.
43 auto last_prev = last->m_prev;
44 first->m_prev = m_prev;
45 last_prev->m_next = this;
46 m_prev->m_next = first;
47 m_prev = last_prev;
48 }
49
50 constexpr void LinkNext(IntrusiveListNode* node) {
51 // We can't link an already linked node.
52 ASSERT(!node->IsLinked());
53 return this->SpliceNext(node, node);
54 }
55
56 constexpr void SpliceNext(IntrusiveListNode* first, IntrusiveListNode* last) {
57 // Splice a range into the list.
58 auto last_prev = last->m_prev;
59 first->m_prev = this;
60 last_prev->m_next = m_next;
61 m_next->m_prev = last_prev;
62 m_next = first;
63 }
64
65 constexpr void Unlink() {
66 this->Unlink(m_next);
67 }
68
69 constexpr void Unlink(IntrusiveListNode* last) {
70 // Unlink a node from a next node.
71 auto last_prev = last->m_prev;
72 m_prev->m_next = last;
73 last->m_prev = m_prev;
74 last_prev->m_next = this;
75 m_prev = last_prev;
76 }
77
78 constexpr IntrusiveListNode* GetPrev() {
79 return m_prev;
80 }
81
82 constexpr const IntrusiveListNode* GetPrev() const {
83 return m_prev;
84 }
85
86 constexpr IntrusiveListNode* GetNext() {
87 return m_next;
88 }
89
90 constexpr const IntrusiveListNode* GetNext() const {
91 return m_next;
92 }
93};
94// DEPRECATED: static_assert(std::is_literal_type<IntrusiveListNode>::value);
95
96namespace impl {
97
98class IntrusiveListImpl {
99 YUZU_NON_COPYABLE(IntrusiveListImpl);
100
101private:
102 IntrusiveListNode m_root_node;
103
104public:
105 template <bool Const>
106 class Iterator;
107
108 using value_type = IntrusiveListNode;
109 using size_type = size_t;
110 using difference_type = ptrdiff_t;
111 using pointer = value_type*;
112 using const_pointer = const value_type*;
113 using reference = value_type&;
114 using const_reference = const value_type&;
115 using iterator = Iterator<false>;
116 using const_iterator = Iterator<true>;
117 using reverse_iterator = std::reverse_iterator<iterator>;
118 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
119
120 template <bool Const>
121 class Iterator {
122 public:
123 using iterator_category = std::bidirectional_iterator_tag;
124 using value_type = typename IntrusiveListImpl::value_type;
125 using difference_type = typename IntrusiveListImpl::difference_type;
126 using pointer =
127 std::conditional_t<Const, IntrusiveListImpl::const_pointer, IntrusiveListImpl::pointer>;
128 using reference = std::conditional_t<Const, IntrusiveListImpl::const_reference,
129 IntrusiveListImpl::reference>;
130
131 private:
132 pointer m_node;
133
134 public:
135 constexpr explicit Iterator(pointer n) : m_node(n) {}
136
137 constexpr bool operator==(const Iterator& rhs) const {
138 return m_node == rhs.m_node;
139 }
140
141 constexpr pointer operator->() const {
142 return m_node;
143 }
144
145 constexpr reference operator*() const {
146 return *m_node;
147 }
148
149 constexpr Iterator& operator++() {
150 m_node = m_node->m_next;
151 return *this;
152 }
153
154 constexpr Iterator& operator--() {
155 m_node = m_node->m_prev;
156 return *this;
157 }
158
159 constexpr Iterator operator++(int) {
160 const Iterator it{*this};
161 ++(*this);
162 return it;
163 }
164
165 constexpr Iterator operator--(int) {
166 const Iterator it{*this};
167 --(*this);
168 return it;
169 }
170
171 constexpr operator Iterator<true>() const {
172 return Iterator<true>(m_node);
173 }
174
175 constexpr Iterator<false> GetNonConstIterator() const {
176 return Iterator<false>(const_cast<IntrusiveListImpl::pointer>(m_node));
177 }
178 };
179
180public:
181 constexpr IntrusiveListImpl() : m_root_node() {}
182
183 // Iterator accessors.
184 constexpr iterator begin() {
185 return iterator(m_root_node.GetNext());
186 }
187
188 constexpr const_iterator begin() const {
189 return const_iterator(m_root_node.GetNext());
190 }
191
192 constexpr iterator end() {
193 return iterator(std::addressof(m_root_node));
194 }
195
196 constexpr const_iterator end() const {
197 return const_iterator(std::addressof(m_root_node));
198 }
199
200 constexpr iterator iterator_to(reference v) {
201 // Only allow iterator_to for values in lists.
202 ASSERT(v.IsLinked());
203 return iterator(std::addressof(v));
204 }
205
206 constexpr const_iterator iterator_to(const_reference v) const {
207 // Only allow iterator_to for values in lists.
208 ASSERT(v.IsLinked());
209 return const_iterator(std::addressof(v));
210 }
211
212 // Content management.
213 constexpr bool empty() const {
214 return !m_root_node.IsLinked();
215 }
216
217 constexpr size_type size() const {
218 return static_cast<size_type>(std::distance(this->begin(), this->end()));
219 }
220
221 constexpr reference back() {
222 return *m_root_node.GetPrev();
223 }
224
225 constexpr const_reference back() const {
226 return *m_root_node.GetPrev();
227 }
228
229 constexpr reference front() {
230 return *m_root_node.GetNext();
231 }
232
233 constexpr const_reference front() const {
234 return *m_root_node.GetNext();
235 }
236
237 constexpr void push_back(reference node) {
238 m_root_node.LinkPrev(std::addressof(node));
239 }
240
241 constexpr void push_front(reference node) {
242 m_root_node.LinkNext(std::addressof(node));
243 }
244
245 constexpr void pop_back() {
246 m_root_node.GetPrev()->Unlink();
247 }
248
249 constexpr void pop_front() {
250 m_root_node.GetNext()->Unlink();
251 }
252
253 constexpr iterator insert(const_iterator pos, reference node) {
254 pos.GetNonConstIterator()->LinkPrev(std::addressof(node));
255 return iterator(std::addressof(node));
256 }
257
258 constexpr void splice(const_iterator pos, IntrusiveListImpl& o) {
259 splice_impl(pos, o.begin(), o.end());
260 }
261
262 constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first) {
263 const_iterator last(first);
264 std::advance(last, 1);
265 splice_impl(pos, first, last);
266 }
267
268 constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first,
269 const_iterator last) {
270 splice_impl(pos, first, last);
271 }
272
273 constexpr iterator erase(const_iterator pos) {
274 if (pos == this->end()) {
275 return this->end();
276 }
277 iterator it(pos.GetNonConstIterator());
278 (it++)->Unlink();
279 return it;
280 }
281
282 constexpr void clear() {
283 while (!this->empty()) {
284 this->pop_front();
285 }
286 }
287
288private:
289 constexpr void splice_impl(const_iterator _pos, const_iterator _first, const_iterator _last) {
290 if (_first == _last) {
291 return;
292 }
293 iterator pos(_pos.GetNonConstIterator());
294 iterator first(_first.GetNonConstIterator());
295 iterator last(_last.GetNonConstIterator());
296 first->Unlink(std::addressof(*last));
297 pos->SplicePrev(std::addressof(*first), std::addressof(*first));
298 }
299};
300
301} // namespace impl
302
303template <class T, class Traits>
304class IntrusiveList {
305 YUZU_NON_COPYABLE(IntrusiveList);
306
307private:
308 impl::IntrusiveListImpl m_impl;
309
310public:
311 template <bool Const>
312 class Iterator;
313
314 using value_type = T;
315 using size_type = size_t;
316 using difference_type = ptrdiff_t;
317 using pointer = value_type*;
318 using const_pointer = const value_type*;
319 using reference = value_type&;
320 using const_reference = const value_type&;
321 using iterator = Iterator<false>;
322 using const_iterator = Iterator<true>;
323 using reverse_iterator = std::reverse_iterator<iterator>;
324 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
325
326 template <bool Const>
327 class Iterator {
328 public:
329 friend class Common::IntrusiveList<T, Traits>;
330
331 using ImplIterator =
332 std::conditional_t<Const, Common::impl::IntrusiveListImpl::const_iterator,
333 Common::impl::IntrusiveListImpl::iterator>;
334
335 using iterator_category = std::bidirectional_iterator_tag;
336 using value_type = typename IntrusiveList::value_type;
337 using difference_type = typename IntrusiveList::difference_type;
338 using pointer =
339 std::conditional_t<Const, IntrusiveList::const_pointer, IntrusiveList::pointer>;
340 using reference =
341 std::conditional_t<Const, IntrusiveList::const_reference, IntrusiveList::reference>;
342
343 private:
344 ImplIterator m_iterator;
345
346 private:
347 constexpr explicit Iterator(ImplIterator it) : m_iterator(it) {}
348
349 constexpr ImplIterator GetImplIterator() const {
350 return m_iterator;
351 }
352
353 public:
354 constexpr bool operator==(const Iterator& rhs) const {
355 return m_iterator == rhs.m_iterator;
356 }
357
358 constexpr pointer operator->() const {
359 return std::addressof(Traits::GetParent(*m_iterator));
360 }
361
362 constexpr reference operator*() const {
363 return Traits::GetParent(*m_iterator);
364 }
365
366 constexpr Iterator& operator++() {
367 ++m_iterator;
368 return *this;
369 }
370
371 constexpr Iterator& operator--() {
372 --m_iterator;
373 return *this;
374 }
375
376 constexpr Iterator operator++(int) {
377 const Iterator it{*this};
378 ++m_iterator;
379 return it;
380 }
381
382 constexpr Iterator operator--(int) {
383 const Iterator it{*this};
384 --m_iterator;
385 return it;
386 }
387
388 constexpr operator Iterator<true>() const {
389 return Iterator<true>(m_iterator);
390 }
391 };
392
393private:
394 static constexpr IntrusiveListNode& GetNode(reference ref) {
395 return Traits::GetNode(ref);
396 }
397
398 static constexpr IntrusiveListNode const& GetNode(const_reference ref) {
399 return Traits::GetNode(ref);
400 }
401
402 static constexpr reference GetParent(IntrusiveListNode& node) {
403 return Traits::GetParent(node);
404 }
405
406 static constexpr const_reference GetParent(IntrusiveListNode const& node) {
407 return Traits::GetParent(node);
408 }
409
410public:
411 constexpr IntrusiveList() : m_impl() {}
412
413 // Iterator accessors.
414 constexpr iterator begin() {
415 return iterator(m_impl.begin());
416 }
417
418 constexpr const_iterator begin() const {
419 return const_iterator(m_impl.begin());
420 }
421
422 constexpr iterator end() {
423 return iterator(m_impl.end());
424 }
425
426 constexpr const_iterator end() const {
427 return const_iterator(m_impl.end());
428 }
429
430 constexpr const_iterator cbegin() const {
431 return this->begin();
432 }
433
434 constexpr const_iterator cend() const {
435 return this->end();
436 }
437
438 constexpr reverse_iterator rbegin() {
439 return reverse_iterator(this->end());
440 }
441
442 constexpr const_reverse_iterator rbegin() const {
443 return const_reverse_iterator(this->end());
444 }
445
446 constexpr reverse_iterator rend() {
447 return reverse_iterator(this->begin());
448 }
449
450 constexpr const_reverse_iterator rend() const {
451 return const_reverse_iterator(this->begin());
452 }
453
454 constexpr const_reverse_iterator crbegin() const {
455 return this->rbegin();
456 }
457
458 constexpr const_reverse_iterator crend() const {
459 return this->rend();
460 }
461
462 constexpr iterator iterator_to(reference v) {
463 return iterator(m_impl.iterator_to(GetNode(v)));
464 }
465
466 constexpr const_iterator iterator_to(const_reference v) const {
467 return const_iterator(m_impl.iterator_to(GetNode(v)));
468 }
469
470 // Content management.
471 constexpr bool empty() const {
472 return m_impl.empty();
473 }
474
475 constexpr size_type size() const {
476 return m_impl.size();
477 }
478
479 constexpr reference back() {
480 return GetParent(m_impl.back());
481 }
482
483 constexpr const_reference back() const {
484 return GetParent(m_impl.back());
485 }
486
487 constexpr reference front() {
488 return GetParent(m_impl.front());
489 }
490
491 constexpr const_reference front() const {
492 return GetParent(m_impl.front());
493 }
494
495 constexpr void push_back(reference ref) {
496 m_impl.push_back(GetNode(ref));
497 }
498
499 constexpr void push_front(reference ref) {
500 m_impl.push_front(GetNode(ref));
501 }
502
503 constexpr void pop_back() {
504 m_impl.pop_back();
505 }
506
507 constexpr void pop_front() {
508 m_impl.pop_front();
509 }
510
511 constexpr iterator insert(const_iterator pos, reference ref) {
512 return iterator(m_impl.insert(pos.GetImplIterator(), GetNode(ref)));
513 }
514
515 constexpr void splice(const_iterator pos, IntrusiveList& o) {
516 m_impl.splice(pos.GetImplIterator(), o.m_impl);
517 }
518
519 constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first) {
520 m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator());
521 }
522
523 constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first,
524 const_iterator last) {
525 m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator(),
526 last.GetImplIterator());
527 }
528
529 constexpr iterator erase(const_iterator pos) {
530 return iterator(m_impl.erase(pos.GetImplIterator()));
531 }
532
533 constexpr void clear() {
534 m_impl.clear();
535 }
536};
537
538template <auto T, class Derived = Common::impl::GetParentType<T>>
539class IntrusiveListMemberTraits;
540
541template <class Parent, IntrusiveListNode Parent::*Member, class Derived>
542class IntrusiveListMemberTraits<Member, Derived> {
543public:
544 using ListType = IntrusiveList<Derived, IntrusiveListMemberTraits>;
545
546private:
547 friend class IntrusiveList<Derived, IntrusiveListMemberTraits>;
548
549 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
550 return parent.*Member;
551 }
552
553 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
554 return parent.*Member;
555 }
556
557 static Derived& GetParent(IntrusiveListNode& node) {
558 return Common::GetParentReference<Member, Derived>(std::addressof(node));
559 }
560
561 static Derived const& GetParent(IntrusiveListNode const& node) {
562 return Common::GetParentReference<Member, Derived>(std::addressof(node));
563 }
564};
565
566template <auto T, class Derived = Common::impl::GetParentType<T>>
567class IntrusiveListMemberTraitsByNonConstexprOffsetOf;
568
569template <class Parent, IntrusiveListNode Parent::*Member, class Derived>
570class IntrusiveListMemberTraitsByNonConstexprOffsetOf<Member, Derived> {
571public:
572 using ListType = IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>;
573
574private:
575 friend class IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>;
576
577 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
578 return parent.*Member;
579 }
580
581 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
582 return parent.*Member;
583 }
584
585 static Derived& GetParent(IntrusiveListNode& node) {
586 return *reinterpret_cast<Derived*>(reinterpret_cast<char*>(std::addressof(node)) -
587 GetOffset());
588 }
589
590 static Derived const& GetParent(IntrusiveListNode const& node) {
591 return *reinterpret_cast<const Derived*>(
592 reinterpret_cast<const char*>(std::addressof(node)) - GetOffset());
593 }
594
595 static uintptr_t GetOffset() {
596 return reinterpret_cast<uintptr_t>(std::addressof(reinterpret_cast<Derived*>(0)->*Member));
597 }
598};
599
600template <class Derived>
601class IntrusiveListBaseNode : public IntrusiveListNode {};
602
603template <class Derived>
604class IntrusiveListBaseTraits {
605public:
606 using ListType = IntrusiveList<Derived, IntrusiveListBaseTraits>;
607
608private:
609 friend class IntrusiveList<Derived, IntrusiveListBaseTraits>;
610
611 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
612 return static_cast<IntrusiveListNode&>(
613 static_cast<IntrusiveListBaseNode<Derived>&>(parent));
614 }
615
616 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
617 return static_cast<const IntrusiveListNode&>(
618 static_cast<const IntrusiveListBaseNode<Derived>&>(parent));
619 }
620
621 static constexpr Derived& GetParent(IntrusiveListNode& node) {
622 return static_cast<Derived&>(static_cast<IntrusiveListBaseNode<Derived>&>(node));
623 }
624
625 static constexpr Derived const& GetParent(IntrusiveListNode const& node) {
626 return static_cast<const Derived&>(
627 static_cast<const IntrusiveListBaseNode<Derived>&>(node));
628 }
629};
630
631} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 84955030b..f1ee42ab2 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -45,6 +45,7 @@ void LogSettings() {
45 log_setting("System_LanguageIndex", values.language_index.GetValue()); 45 log_setting("System_LanguageIndex", values.language_index.GetValue());
46 log_setting("System_RegionIndex", values.region_index.GetValue()); 46 log_setting("System_RegionIndex", values.region_index.GetValue());
47 log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); 47 log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
48 log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue());
48 log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); 49 log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
49 log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); 50 log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
50 log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); 51 log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
@@ -60,7 +61,7 @@ void LogSettings() {
60 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); 61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
61 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); 62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
62 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); 63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
63 log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); 64 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
64 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); 65 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
65 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 66 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
66 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); 67 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
@@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) {
191 192
192 // Core 193 // Core
193 values.use_multi_core.SetGlobal(true); 194 values.use_multi_core.SetGlobal(true);
194 values.use_extended_memory_layout.SetGlobal(true); 195 values.use_unsafe_extended_memory_layout.SetGlobal(true);
195 196
196 // CPU 197 // CPU
197 values.cpu_accuracy.SetGlobal(true); 198 values.cpu_accuracy.SetGlobal(true);
@@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) {
205 // Renderer 206 // Renderer
206 values.fsr_sharpening_slider.SetGlobal(true); 207 values.fsr_sharpening_slider.SetGlobal(true);
207 values.renderer_backend.SetGlobal(true); 208 values.renderer_backend.SetGlobal(true);
209 values.async_presentation.SetGlobal(true);
208 values.renderer_force_max_clock.SetGlobal(true); 210 values.renderer_force_max_clock.SetGlobal(true);
209 values.vulkan_device.SetGlobal(true); 211 values.vulkan_device.SetGlobal(true);
210 values.fullscreen_mode.SetGlobal(true); 212 values.fullscreen_mode.SetGlobal(true);
@@ -221,11 +223,9 @@ void RestoreGlobalState(bool is_powered_on) {
221 values.nvdec_emulation.SetGlobal(true); 223 values.nvdec_emulation.SetGlobal(true);
222 values.accelerate_astc.SetGlobal(true); 224 values.accelerate_astc.SetGlobal(true);
223 values.async_astc.SetGlobal(true); 225 values.async_astc.SetGlobal(true);
224 values.use_vsync.SetGlobal(true);
225 values.shader_backend.SetGlobal(true); 226 values.shader_backend.SetGlobal(true);
226 values.use_asynchronous_shaders.SetGlobal(true); 227 values.use_asynchronous_shaders.SetGlobal(true);
227 values.use_fast_gpu_time.SetGlobal(true); 228 values.use_fast_gpu_time.SetGlobal(true);
228 values.use_pessimistic_flushes.SetGlobal(true);
229 values.use_vulkan_driver_pipeline_cache.SetGlobal(true); 229 values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
230 values.bg_red.SetGlobal(true); 230 values.bg_red.SetGlobal(true);
231 values.bg_green.SetGlobal(true); 231 values.bg_green.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index b77a1580a..2bf191cef 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -16,6 +16,13 @@
16 16
17namespace Settings { 17namespace Settings {
18 18
19enum class VSyncMode : u32 {
20 Immediate = 0,
21 Mailbox = 1,
22 FIFO = 2,
23 FIFORelaxed = 3,
24};
25
19enum class RendererBackend : u32 { 26enum class RendererBackend : u32 {
20 OpenGL = 0, 27 OpenGL = 0,
21 Vulkan = 1, 28 Vulkan = 1,
@@ -388,7 +395,8 @@ struct Values {
388 395
389 // Core 396 // Core
390 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; 397 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"};
391 SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"}; 398 SwitchableSetting<bool> use_unsafe_extended_memory_layout{false,
399 "use_unsafe_extended_memory_layout"};
392 400
393 // Cpu 401 // Cpu
394 SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, 402 SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
@@ -422,6 +430,7 @@ struct Values {
422 // Renderer 430 // Renderer
423 SwitchableSetting<RendererBackend, true> renderer_backend{ 431 SwitchableSetting<RendererBackend, true> renderer_backend{
424 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; 432 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
433 SwitchableSetting<bool> async_presentation{false, "async_presentation"};
425 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; 434 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
426 Setting<bool> renderer_debug{false, "debug"}; 435 Setting<bool> renderer_debug{false, "debug"};
427 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 436 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
@@ -454,12 +463,12 @@ struct Values {
454 SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; 463 SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
455 SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"}; 464 SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
456 SwitchableSetting<bool> async_astc{false, "async_astc"}; 465 SwitchableSetting<bool> async_astc{false, "async_astc"};
457 SwitchableSetting<bool> use_vsync{true, "use_vsync"}; 466 Setting<VSyncMode, true> vsync_mode{VSyncMode::FIFO, VSyncMode::Immediate,
467 VSyncMode::FIFORelaxed, "use_vsync"};
458 SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL, 468 SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL,
459 ShaderBackend::SPIRV, "shader_backend"}; 469 ShaderBackend::SPIRV, "shader_backend"};
460 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 470 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
461 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 471 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
462 SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
463 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, 472 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
464 "use_vulkan_driver_pipeline_cache"}; 473 "use_vulkan_driver_pipeline_cache"};
465 474
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 0e2095c45..b4885835d 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -259,6 +259,20 @@ public:
259 return *this; 259 return *this;
260 } 260 }
261 261
262 void RotateFromOrigin(float roll, float pitch, float yaw) {
263 float temp = y;
264 y = std::cos(roll) * y - std::sin(roll) * z;
265 z = std::sin(roll) * temp + std::cos(roll) * z;
266
267 temp = x;
268 x = std::cos(pitch) * x + std::sin(pitch) * z;
269 z = -std::sin(pitch) * temp + std::cos(pitch) * z;
270
271 temp = x;
272 x = std::cos(yaw) * x - std::sin(yaw) * y;
273 y = std::sin(yaw) * temp + std::cos(yaw) * y;
274 }
275
262 [[nodiscard]] constexpr T Length2() const { 276 [[nodiscard]] constexpr T Length2() const {
263 return x * x + y * y + z * z; 277 return x * x + y * y + z * z;
264 } 278 }
diff --git a/src/core/core.cpp b/src/core/core.cpp
index caa6a77be..06fba4ce5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -137,7 +137,7 @@ struct System::Impl {
137 device_memory = std::make_unique<Core::DeviceMemory>(); 137 device_memory = std::make_unique<Core::DeviceMemory>();
138 138
139 is_multicore = Settings::values.use_multi_core.GetValue(); 139 is_multicore = Settings::values.use_multi_core.GetValue();
140 extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); 140 extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
141 141
142 core_timing.SetMulticore(is_multicore); 142 core_timing.SetMulticore(is_multicore);
143 core_timing.Initialize([&system]() { system.RegisterHostThread(); }); 143 core_timing.Initialize([&system]() { system.RegisterHostThread(); });
@@ -169,7 +169,7 @@ struct System::Impl {
169 void ReinitializeIfNecessary(System& system) { 169 void ReinitializeIfNecessary(System& system) {
170 const bool must_reinitialize = 170 const bool must_reinitialize =
171 is_multicore != Settings::values.use_multi_core.GetValue() || 171 is_multicore != Settings::values.use_multi_core.GetValue() ||
172 extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue(); 172 extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue();
173 173
174 if (!must_reinitialize) { 174 if (!must_reinitialize) {
175 return; 175 return;
@@ -178,7 +178,7 @@ struct System::Impl {
178 LOG_DEBUG(Kernel, "Re-initializing"); 178 LOG_DEBUG(Kernel, "Re-initializing");
179 179
180 is_multicore = Settings::values.use_multi_core.GetValue(); 180 is_multicore = Settings::values.use_multi_core.GetValue();
181 extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); 181 extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
182 182
183 Initialize(system); 183 Initialize(system);
184 } 184 }
@@ -293,6 +293,7 @@ struct System::Impl {
293 ASSERT(Kernel::KProcess::Initialize(main_process, system, "main", 293 ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
294 Kernel::KProcess::ProcessType::Userland, resource_limit) 294 Kernel::KProcess::ProcessType::Userland, resource_limit)
295 .IsSuccess()); 295 .IsSuccess());
296 Kernel::KProcess::Register(system.Kernel(), main_process);
296 kernel.MakeApplicationProcess(main_process); 297 kernel.MakeApplicationProcess(main_process);
297 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); 298 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
298 if (load_result != Loader::ResultStatus::Success) { 299 if (load_result != Loader::ResultStatus::Success) {
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index a70f8807c..ecab85893 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -376,6 +376,7 @@ void EmulatedController::ReloadInput() {
376 motion.accel = emulated_motion.GetAcceleration(); 376 motion.accel = emulated_motion.GetAcceleration();
377 motion.gyro = emulated_motion.GetGyroscope(); 377 motion.gyro = emulated_motion.GetGyroscope();
378 motion.rotation = emulated_motion.GetRotations(); 378 motion.rotation = emulated_motion.GetRotations();
379 motion.euler = emulated_motion.GetEulerAngles();
379 motion.orientation = emulated_motion.GetOrientation(); 380 motion.orientation = emulated_motion.GetOrientation();
380 motion.is_at_rest = !emulated_motion.IsMoving(motion_sensitivity); 381 motion.is_at_rest = !emulated_motion.IsMoving(motion_sensitivity);
381 } 382 }
@@ -551,6 +552,8 @@ void EmulatedController::EnableSystemButtons() {
551void EmulatedController::DisableSystemButtons() { 552void EmulatedController::DisableSystemButtons() {
552 std::scoped_lock lock{mutex}; 553 std::scoped_lock lock{mutex};
553 system_buttons_enabled = false; 554 system_buttons_enabled = false;
555 controller.home_button_state.raw = 0;
556 controller.capture_button_state.raw = 0;
554} 557}
555 558
556void EmulatedController::ResetSystemButtons() { 559void EmulatedController::ResetSystemButtons() {
@@ -734,6 +737,8 @@ void EmulatedController::SetButton(const Common::Input::CallbackStatus& callback
734 if (is_configuring) { 737 if (is_configuring) {
735 controller.npad_button_state.raw = NpadButton::None; 738 controller.npad_button_state.raw = NpadButton::None;
736 controller.debug_pad_button_state.raw = 0; 739 controller.debug_pad_button_state.raw = 0;
740 controller.home_button_state.raw = 0;
741 controller.capture_button_state.raw = 0;
737 lock.unlock(); 742 lock.unlock();
738 TriggerOnChange(ControllerTriggerType::Button, false); 743 TriggerOnChange(ControllerTriggerType::Button, false);
739 return; 744 return;
@@ -976,14 +981,11 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback
976 emulated.UpdateOrientation(raw_status.delta_timestamp); 981 emulated.UpdateOrientation(raw_status.delta_timestamp);
977 force_update_motion = raw_status.force_update; 982 force_update_motion = raw_status.force_update;
978 983
979 if (is_configuring) {
980 return;
981 }
982
983 auto& motion = controller.motion_state[index]; 984 auto& motion = controller.motion_state[index];
984 motion.accel = emulated.GetAcceleration(); 985 motion.accel = emulated.GetAcceleration();
985 motion.gyro = emulated.GetGyroscope(); 986 motion.gyro = emulated.GetGyroscope();
986 motion.rotation = emulated.GetRotations(); 987 motion.rotation = emulated.GetRotations();
988 motion.euler = emulated.GetEulerAngles();
987 motion.orientation = emulated.GetOrientation(); 989 motion.orientation = emulated.GetOrientation();
988 motion.is_at_rest = !emulated.IsMoving(motion_sensitivity); 990 motion.is_at_rest = !emulated.IsMoving(motion_sensitivity);
989} 991}
diff --git a/src/core/hid/emulated_controller.h b/src/core/hid/emulated_controller.h
index 429655355..6e01f4e12 100644
--- a/src/core/hid/emulated_controller.h
+++ b/src/core/hid/emulated_controller.h
@@ -106,6 +106,7 @@ struct ControllerMotion {
106 Common::Vec3f accel{}; 106 Common::Vec3f accel{};
107 Common::Vec3f gyro{}; 107 Common::Vec3f gyro{};
108 Common::Vec3f rotation{}; 108 Common::Vec3f rotation{};
109 Common::Vec3f euler{};
109 std::array<Common::Vec3f, 3> orientation{}; 110 std::array<Common::Vec3f, 3> orientation{};
110 bool is_at_rest{}; 111 bool is_at_rest{};
111}; 112};
diff --git a/src/core/hid/input_converter.cpp b/src/core/hid/input_converter.cpp
index 7cee39a53..53b00b1f9 100644
--- a/src/core/hid/input_converter.cpp
+++ b/src/core/hid/input_converter.cpp
@@ -54,6 +54,7 @@ Common::Input::ButtonStatus TransformToButton(const Common::Input::CallbackStatu
54 case Common::Input::InputType::Analog: 54 case Common::Input::InputType::Analog:
55 status.value = TransformToTrigger(callback).pressed.value; 55 status.value = TransformToTrigger(callback).pressed.value;
56 status.toggle = callback.analog_status.properties.toggle; 56 status.toggle = callback.analog_status.properties.toggle;
57 status.inverted = callback.analog_status.properties.inverted_button;
57 break; 58 break;
58 case Common::Input::InputType::Trigger: 59 case Common::Input::InputType::Trigger:
59 status.value = TransformToTrigger(callback).pressed.value; 60 status.value = TransformToTrigger(callback).pressed.value;
@@ -61,6 +62,9 @@ Common::Input::ButtonStatus TransformToButton(const Common::Input::CallbackStatu
61 case Common::Input::InputType::Button: 62 case Common::Input::InputType::Button:
62 status = callback.button_status; 63 status = callback.button_status;
63 break; 64 break;
65 case Common::Input::InputType::Motion:
66 status.value = std::abs(callback.motion_status.gyro.x.raw_value) > 1.0f;
67 break;
64 default: 68 default:
65 LOG_ERROR(Input, "Conversion from type {} to button not implemented", callback.type); 69 LOG_ERROR(Input, "Conversion from type {} to button not implemented", callback.type);
66 break; 70 break;
@@ -226,6 +230,10 @@ Common::Input::TriggerStatus TransformToTrigger(const Common::Input::CallbackSta
226 status = callback.trigger_status; 230 status = callback.trigger_status;
227 calculate_button_value = false; 231 calculate_button_value = false;
228 break; 232 break;
233 case Common::Input::InputType::Motion:
234 status.analog.properties.range = 1.0f;
235 raw_value = callback.motion_status.accel.x.raw_value;
236 break;
229 default: 237 default:
230 LOG_ERROR(Input, "Conversion from type {} to trigger not implemented", callback.type); 238 LOG_ERROR(Input, "Conversion from type {} to trigger not implemented", callback.type);
231 break; 239 break;
diff --git a/src/core/hid/motion_input.cpp b/src/core/hid/motion_input.cpp
index 0dd66c1cc..b60478dbb 100644
--- a/src/core/hid/motion_input.cpp
+++ b/src/core/hid/motion_input.cpp
@@ -1,6 +1,8 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <cmath>
5
4#include "common/math_util.h" 6#include "common/math_util.h"
5#include "core/hid/motion_input.h" 7#include "core/hid/motion_input.h"
6 8
@@ -51,6 +53,20 @@ void MotionInput::SetQuaternion(const Common::Quaternion<f32>& quaternion) {
51 quat = quaternion; 53 quat = quaternion;
52} 54}
53 55
56void MotionInput::SetEulerAngles(const Common::Vec3f& euler_angles) {
57 const float cr = std::cos(euler_angles.x * 0.5f);
58 const float sr = std::sin(euler_angles.x * 0.5f);
59 const float cp = std::cos(euler_angles.y * 0.5f);
60 const float sp = std::sin(euler_angles.y * 0.5f);
61 const float cy = std::cos(euler_angles.z * 0.5f);
62 const float sy = std::sin(euler_angles.z * 0.5f);
63
64 quat.w = cr * cp * cy + sr * sp * sy;
65 quat.xyz.x = sr * cp * cy - cr * sp * sy;
66 quat.xyz.y = cr * sp * cy + sr * cp * sy;
67 quat.xyz.z = cr * cp * sy - sr * sp * cy;
68}
69
54void MotionInput::SetGyroBias(const Common::Vec3f& bias) { 70void MotionInput::SetGyroBias(const Common::Vec3f& bias) {
55 gyro_bias = bias; 71 gyro_bias = bias;
56} 72}
@@ -222,6 +238,26 @@ Common::Vec3f MotionInput::GetRotations() const {
222 return rotations; 238 return rotations;
223} 239}
224 240
241Common::Vec3f MotionInput::GetEulerAngles() const {
242 // roll (x-axis rotation)
243 const float sinr_cosp = 2 * (quat.w * quat.xyz.x + quat.xyz.y * quat.xyz.z);
244 const float cosr_cosp = 1 - 2 * (quat.xyz.x * quat.xyz.x + quat.xyz.y * quat.xyz.y);
245
246 // pitch (y-axis rotation)
247 const float sinp = std::sqrt(1 + 2 * (quat.w * quat.xyz.y - quat.xyz.x * quat.xyz.z));
248 const float cosp = std::sqrt(1 - 2 * (quat.w * quat.xyz.y - quat.xyz.x * quat.xyz.z));
249
250 // yaw (z-axis rotation)
251 const float siny_cosp = 2 * (quat.w * quat.xyz.z + quat.xyz.x * quat.xyz.y);
252 const float cosy_cosp = 1 - 2 * (quat.xyz.y * quat.xyz.y + quat.xyz.z * quat.xyz.z);
253
254 return {
255 std::atan2(sinr_cosp, cosr_cosp),
256 2 * std::atan2(sinp, cosp) - Common::PI / 2,
257 std::atan2(siny_cosp, cosy_cosp),
258 };
259}
260
225void MotionInput::ResetOrientation() { 261void MotionInput::ResetOrientation() {
226 if (!reset_enabled || only_accelerometer) { 262 if (!reset_enabled || only_accelerometer) {
227 return; 263 return;
diff --git a/src/core/hid/motion_input.h b/src/core/hid/motion_input.h
index 9f3fc1cf7..482719359 100644
--- a/src/core/hid/motion_input.h
+++ b/src/core/hid/motion_input.h
@@ -35,6 +35,7 @@ public:
35 void SetAcceleration(const Common::Vec3f& acceleration); 35 void SetAcceleration(const Common::Vec3f& acceleration);
36 void SetGyroscope(const Common::Vec3f& gyroscope); 36 void SetGyroscope(const Common::Vec3f& gyroscope);
37 void SetQuaternion(const Common::Quaternion<f32>& quaternion); 37 void SetQuaternion(const Common::Quaternion<f32>& quaternion);
38 void SetEulerAngles(const Common::Vec3f& euler_angles);
38 void SetGyroBias(const Common::Vec3f& bias); 39 void SetGyroBias(const Common::Vec3f& bias);
39 void SetGyroThreshold(f32 threshold); 40 void SetGyroThreshold(f32 threshold);
40 41
@@ -54,6 +55,7 @@ public:
54 [[nodiscard]] Common::Vec3f GetGyroBias() const; 55 [[nodiscard]] Common::Vec3f GetGyroBias() const;
55 [[nodiscard]] Common::Vec3f GetRotations() const; 56 [[nodiscard]] Common::Vec3f GetRotations() const;
56 [[nodiscard]] Common::Quaternion<f32> GetQuaternion() const; 57 [[nodiscard]] Common::Quaternion<f32> GetQuaternion() const;
58 [[nodiscard]] Common::Vec3f GetEulerAngles() const;
57 59
58 [[nodiscard]] bool IsMoving(f32 sensitivity) const; 60 [[nodiscard]] bool IsMoving(f32 sensitivity) const;
59 [[nodiscard]] bool IsCalibrated(f32 sensitivity) const; 61 [[nodiscard]] bool IsCalibrated(f32 sensitivity) const;
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 36d0d20d2..49bdc671e 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -35,12 +35,13 @@ namespace {
35using namespace Common::Literals; 35using namespace Common::Literals;
36 36
37u32 GetMemorySizeForInit() { 37u32 GetMemorySizeForInit() {
38 return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB; 38 return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB
39 : Smc::MemorySize_4GB;
39} 40}
40 41
41Smc::MemoryArrangement GetMemoryArrangeForInit() { 42Smc::MemoryArrangement GetMemoryArrangeForInit() {
42 return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB 43 return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB
43 : Smc::MemoryArrangement_4GB; 44 : Smc::MemoryArrangement_4GB;
44} 45}
45} // namespace 46} // namespace
46 47
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h
index 9b71fe371..f384b1568 100644
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -182,8 +182,8 @@ public:
182 explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {} 182 explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}
183 183
184 static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) { 184 static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
185 const u64 lid = lhs.GetId(); 185 const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
186 const u64 rid = rhs.GetId(); 186 const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));
187 187
188 if (lid < rid) { 188 if (lid < rid) {
189 return -1; 189 return -1;
diff --git a/src/core/hle/kernel/k_event_info.h b/src/core/hle/kernel/k_event_info.h
index 25b3ff594..eacfa5dc6 100644
--- a/src/core/hle/kernel/k_event_info.h
+++ b/src/core/hle/kernel/k_event_info.h
@@ -5,14 +5,15 @@
5 5
6#include <array> 6#include <array>
7 7
8#include <boost/intrusive/list.hpp> 8#include "common/intrusive_list.h"
9 9
10#include "core/hle/kernel/slab_helpers.h" 10#include "core/hle/kernel/slab_helpers.h"
11#include "core/hle/kernel/svc_types.h" 11#include "core/hle/kernel/svc_types.h"
12 12
13namespace Kernel { 13namespace Kernel {
14 14
15class KEventInfo : public KSlabAllocated<KEventInfo>, public boost::intrusive::list_base_hook<> { 15class KEventInfo : public KSlabAllocated<KEventInfo>,
16 public Common::IntrusiveListBaseNode<KEventInfo> {
16public: 17public:
17 struct InfoCreateThread { 18 struct InfoCreateThread {
18 u32 thread_id{}; 19 u32 thread_id{};
diff --git a/src/core/hle/kernel/k_object_name.h b/src/core/hle/kernel/k_object_name.h
index 2d97fc777..a8876fe37 100644
--- a/src/core/hle/kernel/k_object_name.h
+++ b/src/core/hle/kernel/k_object_name.h
@@ -5,7 +5,8 @@
5 5
6#include <array> 6#include <array>
7#include <memory> 7#include <memory>
8#include <boost/intrusive/list.hpp> 8
9#include "common/intrusive_list.h"
9 10
10#include "core/hle/kernel/k_light_lock.h" 11#include "core/hle/kernel/k_light_lock.h"
11#include "core/hle/kernel/slab_helpers.h" 12#include "core/hle/kernel/slab_helpers.h"
@@ -15,13 +16,14 @@ namespace Kernel {
15 16
16class KObjectNameGlobalData; 17class KObjectNameGlobalData;
17 18
18class KObjectName : public KSlabAllocated<KObjectName>, public boost::intrusive::list_base_hook<> { 19class KObjectName : public KSlabAllocated<KObjectName>,
20 public Common::IntrusiveListBaseNode<KObjectName> {
19public: 21public:
20 explicit KObjectName(KernelCore&) {} 22 explicit KObjectName(KernelCore&) {}
21 virtual ~KObjectName() = default; 23 virtual ~KObjectName() = default;
22 24
23 static constexpr size_t NameLengthMax = 12; 25 static constexpr size_t NameLengthMax = 12;
24 using List = boost::intrusive::list<KObjectName>; 26 using List = Common::IntrusiveListBaseTraits<KObjectName>::ListType;
25 27
26 static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); 28 static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name);
27 static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); 29 static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name);
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h
index 21c040e62..625280290 100644
--- a/src/core/hle/kernel/k_server_port.h
+++ b/src/core/hle/kernel/k_server_port.h
@@ -7,7 +7,7 @@
7#include <string> 7#include <string>
8#include <utility> 8#include <utility>
9 9
10#include <boost/intrusive/list.hpp> 10#include "common/intrusive_list.h"
11 11
12#include "core/hle/kernel/k_server_session.h" 12#include "core/hle/kernel/k_server_session.h"
13#include "core/hle/kernel/k_synchronization_object.h" 13#include "core/hle/kernel/k_synchronization_object.h"
@@ -42,7 +42,7 @@ public:
42 bool IsSignaled() const override; 42 bool IsSignaled() const override;
43 43
44private: 44private:
45 using SessionList = boost::intrusive::list<KServerSession>; 45 using SessionList = Common::IntrusiveListBaseTraits<KServerSession>::ListType;
46 46
47 void CleanupSessions(); 47 void CleanupSessions();
48 48
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h
index 5ee02f556..403891919 100644
--- a/src/core/hle/kernel/k_server_session.h
+++ b/src/core/hle/kernel/k_server_session.h
@@ -8,7 +8,7 @@
8#include <string> 8#include <string>
9#include <utility> 9#include <utility>
10 10
11#include <boost/intrusive/list.hpp> 11#include "common/intrusive_list.h"
12 12
13#include "core/hle/kernel/k_light_lock.h" 13#include "core/hle/kernel/k_light_lock.h"
14#include "core/hle/kernel/k_session_request.h" 14#include "core/hle/kernel/k_session_request.h"
@@ -27,7 +27,7 @@ class KSession;
27class KThread; 27class KThread;
28 28
29class KServerSession final : public KSynchronizationObject, 29class KServerSession final : public KSynchronizationObject,
30 public boost::intrusive::list_base_hook<> { 30 public Common::IntrusiveListBaseNode<KServerSession> {
31 KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); 31 KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject);
32 32
33 friend class ServiceThread; 33 friend class ServiceThread;
@@ -67,7 +67,8 @@ private:
67 KSession* m_parent{}; 67 KSession* m_parent{};
68 68
69 /// List of threads which are pending a reply. 69 /// List of threads which are pending a reply.
70 boost::intrusive::list<KSessionRequest> m_request_list{}; 70 using RequestList = Common::IntrusiveListBaseTraits<KSessionRequest>::ListType;
71 RequestList m_request_list{};
71 KSessionRequest* m_current_request{}; 72 KSessionRequest* m_current_request{};
72 73
73 KLightLock m_lock; 74 KLightLock m_lock;
diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h
index b5f04907b..283669e0a 100644
--- a/src/core/hle/kernel/k_session_request.h
+++ b/src/core/hle/kernel/k_session_request.h
@@ -5,6 +5,8 @@
5 5
6#include <array> 6#include <array>
7 7
8#include "common/intrusive_list.h"
9
8#include "core/hle/kernel/k_auto_object.h" 10#include "core/hle/kernel/k_auto_object.h"
9#include "core/hle/kernel/k_event.h" 11#include "core/hle/kernel/k_event.h"
10#include "core/hle/kernel/k_memory_block.h" 12#include "core/hle/kernel/k_memory_block.h"
@@ -16,7 +18,7 @@ namespace Kernel {
16 18
17class KSessionRequest final : public KSlabAllocated<KSessionRequest>, 19class KSessionRequest final : public KSlabAllocated<KSessionRequest>,
18 public KAutoObject, 20 public KAutoObject,
19 public boost::intrusive::list_base_hook<> { 21 public Common::IntrusiveListBaseNode<KSessionRequest> {
20 KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); 22 KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject);
21 23
22public: 24public:
diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h
index 75b73ba39..2d8ff20d6 100644
--- a/src/core/hle/kernel/k_shared_memory_info.h
+++ b/src/core/hle/kernel/k_shared_memory_info.h
@@ -3,7 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <boost/intrusive/list.hpp> 6#include "common/intrusive_list.h"
7 7
8#include "core/hle/kernel/slab_helpers.h" 8#include "core/hle/kernel/slab_helpers.h"
9 9
@@ -12,7 +12,7 @@ namespace Kernel {
12class KSharedMemory; 12class KSharedMemory;
13 13
14class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, 14class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>,
15 public boost::intrusive::list_base_hook<> { 15 public Common::IntrusiveListBaseNode<KSharedMemoryInfo> {
16 16
17public: 17public:
18 explicit KSharedMemoryInfo(KernelCore&) {} 18 explicit KSharedMemoryInfo(KernelCore&) {}
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index 9c1a41128..f9814ac8f 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -12,7 +12,7 @@
12#include <utility> 12#include <utility>
13#include <vector> 13#include <vector>
14 14
15#include <boost/intrusive/list.hpp> 15#include "common/intrusive_list.h"
16 16
17#include "common/intrusive_red_black_tree.h" 17#include "common/intrusive_red_black_tree.h"
18#include "common/spin_lock.h" 18#include "common/spin_lock.h"
@@ -119,7 +119,7 @@ s32 GetCurrentCoreId(KernelCore& kernel);
119Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); 119Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel);
120 120
121class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, 121class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>,
122 public boost::intrusive::list_base_hook<>, 122 public Common::IntrusiveListBaseNode<KThread>,
123 public KTimerTask { 123 public KTimerTask {
124 KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); 124 KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject);
125 125
@@ -138,7 +138,7 @@ public:
138public: 138public:
139 using ThreadContext32 = Core::ARM_Interface::ThreadContext32; 139 using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
140 using ThreadContext64 = Core::ARM_Interface::ThreadContext64; 140 using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
141 using WaiterList = boost::intrusive::list<KThread>; 141 using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType;
142 142
143 /** 143 /**
144 * Gets the thread's current priority 144 * Gets the thread's current priority
@@ -750,8 +750,9 @@ private:
750 ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; 750 ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>;
751 751
752public: 752public:
753 class LockWithPriorityInheritanceInfo : public KSlabAllocated<LockWithPriorityInheritanceInfo>, 753 class LockWithPriorityInheritanceInfo
754 public boost::intrusive::list_base_hook<> { 754 : public KSlabAllocated<LockWithPriorityInheritanceInfo>,
755 public Common::IntrusiveListBaseNode<LockWithPriorityInheritanceInfo> {
755 public: 756 public:
756 explicit LockWithPriorityInheritanceInfo(KernelCore&) {} 757 explicit LockWithPriorityInheritanceInfo(KernelCore&) {}
757 758
@@ -839,7 +840,7 @@ public:
839 840
840private: 841private:
841 using LockWithPriorityInheritanceInfoList = 842 using LockWithPriorityInheritanceInfoList =
842 boost::intrusive::list<LockWithPriorityInheritanceInfo>; 843 Common::IntrusiveListBaseTraits<LockWithPriorityInheritanceInfo>::ListType;
843 844
844 ConditionVariableThreadTree* m_condvar_tree{}; 845 ConditionVariableThreadTree* m_condvar_tree{};
845 u64 m_condvar_key{}; 846 u64 m_condvar_key{};
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4f3366c9d..f33600ca5 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -95,7 +95,7 @@ struct KernelCore::Impl {
95 pt_heap_region.GetSize()); 95 pt_heap_region.GetSize());
96 } 96 }
97 97
98 InitializeHackSharedMemory(); 98 InitializeHackSharedMemory(kernel);
99 RegisterHostThread(nullptr); 99 RegisterHostThread(nullptr);
100 } 100 }
101 101
@@ -216,10 +216,12 @@ struct KernelCore::Impl {
216 auto* main_thread{Kernel::KThread::Create(system.Kernel())}; 216 auto* main_thread{Kernel::KThread::Create(system.Kernel())};
217 main_thread->SetCurrentCore(core); 217 main_thread->SetCurrentCore(core);
218 ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess()); 218 ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
219 KThread::Register(system.Kernel(), main_thread);
219 220
220 auto* idle_thread{Kernel::KThread::Create(system.Kernel())}; 221 auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
221 idle_thread->SetCurrentCore(core); 222 idle_thread->SetCurrentCore(core);
222 ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess()); 223 ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
224 KThread::Register(system.Kernel(), idle_thread);
223 225
224 schedulers[i]->Initialize(main_thread, idle_thread, core); 226 schedulers[i]->Initialize(main_thread, idle_thread, core);
225 } 227 }
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
230 const Core::Timing::CoreTiming& core_timing) { 232 const Core::Timing::CoreTiming& core_timing) {
231 system_resource_limit = KResourceLimit::Create(system.Kernel()); 233 system_resource_limit = KResourceLimit::Create(system.Kernel());
232 system_resource_limit->Initialize(&core_timing); 234 system_resource_limit->Initialize(&core_timing);
235 KResourceLimit::Register(kernel, system_resource_limit);
233 236
234 const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()}; 237 const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
235 const auto total_size{sizes.first}; 238 const auto total_size{sizes.first};
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
355 ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {}, 358 ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
356 core_id) 359 core_id)
357 .IsSuccess()); 360 .IsSuccess());
361 KThread::Register(system.Kernel(), shutdown_threads[core_id]);
358 } 362 }
359 } 363 }
360 364
@@ -729,7 +733,7 @@ struct KernelCore::Impl {
729 memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); 733 memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
730 } 734 }
731 735
732 void InitializeHackSharedMemory() { 736 void InitializeHackSharedMemory(KernelCore& kernel) {
733 // Setup memory regions for emulated processes 737 // Setup memory regions for emulated processes
734 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel 738 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel
735 constexpr std::size_t hid_size{0x40000}; 739 constexpr std::size_t hid_size{0x40000};
@@ -746,14 +750,23 @@ struct KernelCore::Impl {
746 750
747 hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 751 hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
748 Svc::MemoryPermission::Read, hid_size); 752 Svc::MemoryPermission::Read, hid_size);
753 KSharedMemory::Register(kernel, hid_shared_mem);
754
749 font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 755 font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
750 Svc::MemoryPermission::Read, font_size); 756 Svc::MemoryPermission::Read, font_size);
757 KSharedMemory::Register(kernel, font_shared_mem);
758
751 irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 759 irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
752 Svc::MemoryPermission::Read, irs_size); 760 Svc::MemoryPermission::Read, irs_size);
761 KSharedMemory::Register(kernel, irs_shared_mem);
762
753 time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 763 time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
754 Svc::MemoryPermission::Read, time_size); 764 Svc::MemoryPermission::Read, time_size);
765 KSharedMemory::Register(kernel, time_shared_mem);
766
755 hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 767 hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
756 Svc::MemoryPermission::Read, hidbus_size); 768 Svc::MemoryPermission::Read, hidbus_size);
769 KSharedMemory::Register(kernel, hidbus_shared_mem);
757 } 770 }
758 771
759 std::mutex registered_objects_lock; 772 std::mutex registered_objects_lock;
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
1072 // Commit the thread reservation. 1085 // Commit the thread reservation.
1073 thread_reservation.Commit(); 1086 thread_reservation.Commit();
1074 1087
1088 // Register the thread.
1089 KThread::Register(kernel, thread);
1090
1075 return std::jthread( 1091 return std::jthread(
1076 [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] { 1092 [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
1077 // Set the thread name. 1093 // Set the thread name.
1078 Common::SetCurrentThreadName(thread_name.c_str()); 1094 Common::SetCurrentThreadName(thread_name.c_str());
1079 1095
1080 // Register the thread. 1096 // Set the thread as current.
1081 kernel.RegisterHostThread(thread); 1097 kernel.RegisterHostThread(thread);
1082 1098
1083 // Run the callback. 1099 // Run the callback.
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
1099 // Ensure that we don't hold onto any extra references. 1115 // Ensure that we don't hold onto any extra references.
1100 SCOPE_EXIT({ process->Close(); }); 1116 SCOPE_EXIT({ process->Close(); });
1101 1117
1118 // Register the new process.
1119 KProcess::Register(*this, process);
1120
1102 // Run the host thread. 1121 // Run the host thread.
1103 return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func)); 1122 return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
1104} 1123}
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
1124 // Ensure that we don't hold onto any extra references. 1143 // Ensure that we don't hold onto any extra references.
1125 SCOPE_EXIT({ process->Close(); }); 1144 SCOPE_EXIT({ process->Close(); });
1126 1145
1146 // Register the new process.
1147 KProcess::Register(*this, process);
1148
1127 // Reserve a new thread from the process resource limit. 1149 // Reserve a new thread from the process resource limit.
1128 KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax); 1150 KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
1129 ASSERT(thread_reservation.Succeeded()); 1151 ASSERT(thread_reservation.Succeeded());
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
1136 // Commit the thread reservation. 1158 // Commit the thread reservation.
1137 thread_reservation.Commit(); 1159 thread_reservation.Commit();
1138 1160
1161 // Register the new thread.
1162 KThread::Register(*this, thread);
1163
1139 // Begin running the thread. 1164 // Begin running the thread.
1140 ASSERT(R_SUCCEEDED(thread->Run())); 1165 ASSERT(R_SUCCEEDED(thread->Run()));
1141} 1166}
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 23b8be993..3e62fa4fc 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -49,12 +49,6 @@ public:
49 }; 49 };
50 // clang-format on 50 // clang-format on
51 RegisterHandlers(functions); 51 RegisterHandlers(functions);
52
53 if (impl->GetSystem()
54 .Initialize(device_name, in_params, handle, applet_resource_user_id)
55 .IsError()) {
56 LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
57 }
58 } 52 }
59 53
60 ~IAudioOut() override { 54 ~IAudioOut() override {
@@ -287,6 +281,14 @@ void AudOutU::OpenAudioOut(HLERequestContext& ctx) {
287 281
288 auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name, 282 auto audio_out = std::make_shared<IAudioOut>(system, *impl, new_session_id, device_name,
289 in_params, handle, applet_resource_user_id); 283 in_params, handle, applet_resource_user_id);
284 result = audio_out->GetImpl()->GetSystem().Initialize(device_name, in_params, handle,
285 applet_resource_user_id);
286 if (result.IsError()) {
287 LOG_ERROR(Service_Audio, "Failed to initialize the AudioOut System!");
288 IPC::ResponseBuilder rb{ctx, 2};
289 rb.Push(result);
290 return;
291 }
290 292
291 impl->sessions[new_session_id] = audio_out->GetImpl(); 293 impl->sessions[new_session_id] = audio_out->GetImpl();
292 impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id; 294 impl->applet_resource_user_ids[new_session_id] = applet_resource_user_id;
diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h
index e4cb4e1f2..0e222362e 100644
--- a/src/core/hle/service/ipc_helpers.h
+++ b/src/core/hle/service/ipc_helpers.h
@@ -156,6 +156,7 @@ public:
156 156
157 auto* session = Kernel::KSession::Create(kernel); 157 auto* session = Kernel::KSession::Create(kernel);
158 session->Initialize(nullptr, 0); 158 session->Initialize(nullptr, 0);
159 Kernel::KSession::Register(kernel, session);
159 160
160 auto next_manager = std::make_shared<Service::SessionRequestManager>( 161 auto next_manager = std::make_shared<Service::SessionRequestManager>(
161 kernel, manager->GetServerManager()); 162 kernel, manager->GetServerManager());
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
index a39ce5212..6a313a03b 100644
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
25 Kernel::KProcess::ProcessType::KernelInternal, 25 Kernel::KProcess::ProcessType::KernelInternal,
26 kernel.GetSystemResourceLimit()) 26 kernel.GetSystemResourceLimit())
27 .IsSuccess()); 27 .IsSuccess());
28
29 // Register the process.
30 Kernel::KProcess::Register(kernel, process);
28 process_created = true; 31 process_created = true;
29} 32}
30 33
diff --git a/src/core/hle/service/mutex.cpp b/src/core/hle/service/mutex.cpp
index 07589a0f0..b0ff71d1b 100644
--- a/src/core/hle/service/mutex.cpp
+++ b/src/core/hle/service/mutex.cpp
@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
12 m_event = Kernel::KEvent::Create(system.Kernel()); 12 m_event = Kernel::KEvent::Create(system.Kernel());
13 m_event->Initialize(nullptr); 13 m_event->Initialize(nullptr);
14 14
15 // Register the event.
16 Kernel::KEvent::Register(system.Kernel(), m_event);
17
15 ASSERT(R_SUCCEEDED(m_event->Signal())); 18 ASSERT(R_SUCCEEDED(m_event->Signal()));
16} 19}
17 20
diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp
index 6b4a1291e..156bc27d8 100644
--- a/src/core/hle/service/server_manager.cpp
+++ b/src/core/hle/service/server_manager.cpp
@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
33 // Initialize event. 33 // Initialize event.
34 m_event = Kernel::KEvent::Create(system.Kernel()); 34 m_event = Kernel::KEvent::Create(system.Kernel());
35 m_event->Initialize(nullptr); 35 m_event->Initialize(nullptr);
36
37 // Register event.
38 Kernel::KEvent::Register(system.Kernel(), m_event);
36} 39}
37 40
38ServerManager::~ServerManager() { 41ServerManager::~ServerManager() {
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
160 // Initialize the event. 163 // Initialize the event.
161 m_deferral_event->Initialize(nullptr); 164 m_deferral_event->Initialize(nullptr);
162 165
166 // Register the event.
167 Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
168
163 // Set the output. 169 // Set the output.
164 *out_event = m_deferral_event; 170 *out_event = m_deferral_event;
165 171
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index c45be5726..1608fa24c 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
64 auto* port = Kernel::KPort::Create(kernel); 64 auto* port = Kernel::KPort::Create(kernel);
65 port->Initialize(ServerSessionCountMax, false, 0); 65 port->Initialize(ServerSessionCountMax, false, 0);
66 66
67 // Register the port.
68 Kernel::KPort::Register(kernel, port);
69
67 service_ports.emplace(name, port); 70 service_ports.emplace(name, port);
68 registered_services.emplace(name, handler); 71 registered_services.emplace(name, handler);
69 if (deferral_event) { 72 if (deferral_event) {
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
index 419c1df2b..7dce28fe0 100644
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
49 // Commit the session reservation. 49 // Commit the session reservation.
50 session_reservation.Commit(); 50 session_reservation.Commit();
51 51
52 // Register the session.
53 Kernel::KSession::Register(system.Kernel(), session);
54
52 // Register with server manager. 55 // Register with server manager.
53 session_manager->GetServerManager().RegisterSession(&session->GetServerSession(), 56 session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
54 session_manager); 57 session_manager);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 432310632..a9667463f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -462,7 +462,7 @@ struct Memory::Impl {
462 } 462 }
463 463
464 if (Settings::IsFastmemEnabled()) { 464 if (Settings::IsFastmemEnabled()) {
465 const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; 465 const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
466 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); 466 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
467 } 467 }
468 468
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 9178b00ca..7a2f3c90a 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -85,6 +85,20 @@ static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
85 return "Unknown"; 85 return "Unknown";
86} 86}
87 87
88static constexpr const char* TranslateVSyncMode(Settings::VSyncMode mode) {
89 switch (mode) {
90 case Settings::VSyncMode::Immediate:
91 return "Immediate";
92 case Settings::VSyncMode::Mailbox:
93 return "Mailbox";
94 case Settings::VSyncMode::FIFO:
95 return "FIFO";
96 case Settings::VSyncMode::FIFORelaxed:
97 return "FIFO Relaxed";
98 }
99 return "Unknown";
100}
101
88u64 GetTelemetryId() { 102u64 GetTelemetryId() {
89 u64 telemetry_id{}; 103 u64 telemetry_id{};
90 const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; 104 const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@@ -241,7 +255,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
241 AddField(field_type, "Renderer_NvdecEmulation", 255 AddField(field_type, "Renderer_NvdecEmulation",
242 TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); 256 TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue()));
243 AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); 257 AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
244 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); 258 AddField(field_type, "Renderer_UseVsync",
259 TranslateVSyncMode(Settings::values.vsync_mode.GetValue()));
245 AddField(field_type, "Renderer_ShaderBackend", 260 AddField(field_type, "Renderer_ShaderBackend",
246 static_cast<u32>(Settings::values.shader_backend.GetValue())); 261 static_cast<u32>(Settings::values.shader_backend.GetValue()));
247 AddField(field_type, "Renderer_UseAsynchronousShaders", 262 AddField(field_type, "Renderer_UseAsynchronousShaders",
diff --git a/src/input_common/input_mapping.cpp b/src/input_common/input_mapping.cpp
index 9361b00c5..8c2ee4eb3 100644
--- a/src/input_common/input_mapping.cpp
+++ b/src/input_common/input_mapping.cpp
@@ -82,6 +82,9 @@ void MappingFactory::RegisterButton(const MappingData& data) {
82 new_input.Set("axis", data.index); 82 new_input.Set("axis", data.index);
83 new_input.Set("threshold", 0.5f); 83 new_input.Set("threshold", 0.5f);
84 break; 84 break;
85 case EngineInputType::Motion:
86 new_input.Set("motion", data.index);
87 break;
85 default: 88 default:
86 return; 89 return;
87 } 90 }
diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp
index 8c6a6521a..5c2c4a463 100644
--- a/src/input_common/input_poller.cpp
+++ b/src/input_common/input_poller.cpp
@@ -939,6 +939,7 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateAnalogDevice(
939 .threshold = std::clamp(params.Get("threshold", 0.5f), 0.0f, 1.0f), 939 .threshold = std::clamp(params.Get("threshold", 0.5f), 0.0f, 1.0f),
940 .offset = std::clamp(params.Get("offset", 0.0f), -1.0f, 1.0f), 940 .offset = std::clamp(params.Get("offset", 0.0f), -1.0f, 1.0f),
941 .inverted = params.Get("invert", "+") == "-", 941 .inverted = params.Get("invert", "+") == "-",
942 .inverted_button = params.Get("inverted", false) != 0,
942 .toggle = params.Get("toggle", false) != 0, 943 .toggle = params.Get("toggle", false) != 0,
943 }; 944 };
944 input_engine->PreSetController(identifier); 945 input_engine->PreSetController(identifier);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 0cd87a48f..fee510f7b 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
473} 473}
474 474
475void EmitSetSampleMask(EmitContext& ctx, Id value) { 475void EmitSetSampleMask(EmitContext& ctx, Id value) {
476 ctx.OpStore(ctx.sample_mask, value); 476 const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)};
477 ctx.OpStore(pointer, value);
477} 478}
478 479
479void EmitSetFragDepth(EmitContext& ctx, Id value) { 480void EmitSetFragDepth(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index d48d4860e..47739794f 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
1572 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); 1572 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
1573 } 1573 }
1574 if (info.stores_sample_mask) { 1574 if (info.stores_sample_mask) {
1575 sample_mask = DefineOutput(*this, U32[1], std::nullopt); 1575 const Id array_type{TypeArray(U32[1], Const(1U))};
1576 sample_mask = DefineOutput(*this, array_type, std::nullopt);
1576 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); 1577 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
1577 } 1578 }
1578 break; 1579 break;
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 39b774c98..1e158f375 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -15,7 +15,7 @@ add_executable(tests
15 core/core_timing.cpp 15 core/core_timing.cpp
16 core/internal_network/network.cpp 16 core/internal_network/network.cpp
17 precompiled_headers.h 17 precompiled_headers.h
18 video_core/buffer_base.cpp 18 video_core/memory_tracker.cpp
19 input_common/calibration_configuration_job.cpp 19 input_common/calibration_configuration_job.cpp
20) 20)
21 21
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
deleted file mode 100644
index 734dbf4b6..000000000
--- a/src/tests/video_core/buffer_base.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stdexcept>
5#include <unordered_map>
6
7#include <catch2/catch_test_macros.hpp>
8
9#include "common/alignment.h"
10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_base.h"
12
13namespace {
14using VideoCommon::BufferBase;
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19
20constexpr VAddr c = 0x1328914000;
21
22class RasterizerInterface {
23public:
24 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
25 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
26 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
27 Core::Memory::YUZU_PAGEBITS};
28 for (u64 page = page_start; page < page_end; ++page) {
29 int& value = page_table[page];
30 value += delta;
31 if (value < 0) {
32 throw std::logic_error{"negative page"};
33 }
34 if (value == 0) {
35 page_table.erase(page);
36 }
37 }
38 }
39
40 [[nodiscard]] int Count(VAddr addr) const noexcept {
41 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
42 return it == page_table.end() ? 0 : it->second;
43 }
44
45 [[nodiscard]] unsigned Count() const noexcept {
46 unsigned count = 0;
47 for (const auto& [index, value] : page_table) {
48 count += value;
49 }
50 return count;
51 }
52
53private:
54 std::unordered_map<u64, int> page_table;
55};
56} // Anonymous namespace
57
58TEST_CASE("BufferBase: Small buffer", "[video_core]") {
59 RasterizerInterface rasterizer;
60 BufferBase buffer(rasterizer, c, WORD);
61 REQUIRE(rasterizer.Count() == 0);
62 buffer.UnmarkRegionAsCpuModified(c, WORD);
63 REQUIRE(rasterizer.Count() == WORD / PAGE);
64 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0});
65
66 buffer.MarkRegionAsCpuModified(c + PAGE, 1);
67 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2});
68}
69
70TEST_CASE("BufferBase: Large buffer", "[video_core]") {
71 RasterizerInterface rasterizer;
72 BufferBase buffer(rasterizer, c, WORD * 32);
73 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
74 buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4);
75 REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2});
76 REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8});
77 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE});
78 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE});
79 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
80 Range{WORD * 3 + PAGE * 63, WORD * 4});
81
82 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
83 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
84 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
85 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9});
86
87 buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
88 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
89 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7});
90
91 buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
92 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32});
93
94 buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
95 buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
96
97 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
98 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
99}
100
101TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") {
102 RasterizerInterface rasterizer;
103 BufferBase buffer(rasterizer, c, PAGE * 2);
104 REQUIRE(rasterizer.Count() == 0);
105 buffer.UnmarkRegionAsCpuModified(c, PAGE);
106 REQUIRE(rasterizer.Count() == 1);
107 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
108 REQUIRE(rasterizer.Count() == 0);
109 buffer.UnmarkRegionAsCpuModified(c, PAGE);
110 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
111 REQUIRE(rasterizer.Count() == 2);
112 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
113 REQUIRE(rasterizer.Count() == 0);
114}
115
116TEST_CASE("BufferBase: Basic range", "[video_core]") {
117 RasterizerInterface rasterizer;
118 BufferBase buffer(rasterizer, c, WORD);
119 buffer.UnmarkRegionAsCpuModified(c, WORD);
120 buffer.MarkRegionAsCpuModified(c, PAGE);
121 int num = 0;
122 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
123 REQUIRE(offset == 0U);
124 REQUIRE(size == PAGE);
125 ++num;
126 });
127 REQUIRE(num == 1U);
128}
129
130TEST_CASE("BufferBase: Border upload", "[video_core]") {
131 RasterizerInterface rasterizer;
132 BufferBase buffer(rasterizer, c, WORD * 2);
133 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
134 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
135 buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
136 REQUIRE(offset == WORD - PAGE);
137 REQUIRE(size == PAGE * 2);
138 });
139}
140
141TEST_CASE("BufferBase: Border upload range", "[video_core]") {
142 RasterizerInterface rasterizer;
143 BufferBase buffer(rasterizer, c, WORD * 2);
144 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
145 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
146 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
147 REQUIRE(offset == WORD - PAGE);
148 REQUIRE(size == PAGE * 2);
149 });
150 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
151 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
152 REQUIRE(offset == WORD - PAGE);
153 REQUIRE(size == PAGE);
154 });
155 buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
156 REQUIRE(offset == WORD);
157 REQUIRE(size == PAGE);
158 });
159}
160
161TEST_CASE("BufferBase: Border upload partial range", "[video_core]") {
162 RasterizerInterface rasterizer;
163 BufferBase buffer(rasterizer, c, WORD * 2);
164 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
165 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
166 buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
167 REQUIRE(offset == WORD - PAGE);
168 REQUIRE(size == PAGE * 2);
169 });
170 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
171 buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
172 REQUIRE(offset == WORD - PAGE);
173 REQUIRE(size == PAGE);
174 });
175 buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
176 REQUIRE(offset == WORD);
177 REQUIRE(size == PAGE);
178 });
179}
180
181TEST_CASE("BufferBase: Partial word uploads", "[video_core]") {
182 RasterizerInterface rasterizer;
183 BufferBase buffer(rasterizer, c, 0x9d000);
184 int num = 0;
185 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
186 REQUIRE(offset == 0U);
187 REQUIRE(size == WORD);
188 ++num;
189 });
190 REQUIRE(num == 1);
191 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
192 REQUIRE(offset == WORD);
193 REQUIRE(size == WORD);
194 ++num;
195 });
196 REQUIRE(num == 2);
197 buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
198 REQUIRE(offset == WORD * 2);
199 REQUIRE(size == PAGE * 0x1d);
200 ++num;
201 });
202 REQUIRE(num == 3);
203}
204
205TEST_CASE("BufferBase: Partial page upload", "[video_core]") {
206 RasterizerInterface rasterizer;
207 BufferBase buffer(rasterizer, c, WORD);
208 buffer.UnmarkRegionAsCpuModified(c, WORD);
209 int num = 0;
210 buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
211 buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
212 buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
213 REQUIRE(offset == PAGE * 2);
214 REQUIRE(size == PAGE);
215 ++num;
216 });
217 REQUIRE(num == 1);
218 buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
219 REQUIRE(offset == PAGE * 9);
220 REQUIRE(size == PAGE);
221 ++num;
222 });
223 REQUIRE(num == 2);
224}
225
226TEST_CASE("BufferBase: Partial page upload with multiple words on the right") {
227 RasterizerInterface rasterizer;
228 BufferBase buffer(rasterizer, c, WORD * 8);
229 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
230 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
231 int num = 0;
232 buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
233 REQUIRE(offset == PAGE * 13);
234 REQUIRE(size == WORD * 7 - PAGE * 3);
235 ++num;
236 });
237 REQUIRE(num == 1);
238 buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
239 REQUIRE(offset == WORD * 7 + PAGE * 10);
240 REQUIRE(size == PAGE * 3);
241 ++num;
242 });
243 REQUIRE(num == 2);
244}
245
246TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") {
247 RasterizerInterface rasterizer;
248 BufferBase buffer(rasterizer, c, WORD * 8);
249 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
250 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
251 int num = 0;
252 buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
253 REQUIRE(offset == PAGE * 16);
254 REQUIRE(size == WORD * 7 - PAGE * 3);
255 ++num;
256 });
257 REQUIRE(num == 1);
258 buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
259 REQUIRE(offset == PAGE * 13);
260 REQUIRE(size == PAGE * 3);
261 ++num;
262 });
263 REQUIRE(num == 2);
264}
265
266TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") {
267 RasterizerInterface rasterizer;
268 BufferBase buffer(rasterizer, c, WORD * 8);
269 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
270 buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
271 int num = 0;
272 buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
273 REQUIRE(offset == PAGE * 16);
274 REQUIRE(size == WORD);
275 ++num;
276 });
277 REQUIRE(num == 1);
278 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
279 REQUIRE(offset == PAGE * 13);
280 REQUIRE(size == PAGE * 3);
281 ++num;
282 });
283 REQUIRE(num == 2);
284 buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
285 REQUIRE(offset == WORD + PAGE * 16);
286 REQUIRE(size == PAGE * 73);
287 ++num;
288 });
289 REQUIRE(num == 3);
290}
291
292TEST_CASE("BufferBase: Empty right bits", "[video_core]") {
293 RasterizerInterface rasterizer;
294 BufferBase buffer(rasterizer, c, WORD * 2048);
295 buffer.UnmarkRegionAsCpuModified(c, WORD * 2048);
296 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
297 buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
298 REQUIRE(offset == WORD - PAGE);
299 REQUIRE(size == PAGE * 2);
300 });
301}
302
303TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") {
304 RasterizerInterface rasterizer;
305 BufferBase buffer(rasterizer, c, WORD);
306 buffer.UnmarkRegionAsCpuModified(c, WORD);
307 buffer.MarkRegionAsCpuModified(c, PAGE);
308 int num = 0;
309 buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
310 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
311 buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
312 REQUIRE(num == 0);
313 buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
314 REQUIRE(num == 1);
315 buffer.MarkRegionAsCpuModified(c, WORD);
316 REQUIRE(rasterizer.Count() == 0);
317}
318
319TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") {
320 RasterizerInterface rasterizer;
321 BufferBase buffer(rasterizer, c, 0x22000);
322 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
323 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
324 REQUIRE(rasterizer.Count() == 0);
325 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
326 REQUIRE(rasterizer.Count() == 1);
327 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2));
328 buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2);
329 buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2);
330 REQUIRE(rasterizer.Count() == 2);
331}
332
333TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") {
334 RasterizerInterface rasterizer;
335 BufferBase buffer(rasterizer, c, 0x310720);
336 buffer.UnmarkRegionAsCpuModified(c, 0x310720);
337 REQUIRE(rasterizer.Count(c) == 1);
338 REQUIRE(rasterizer.Count(c + PAGE) == 1);
339 REQUIRE(rasterizer.Count(c + WORD) == 1);
340 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
341}
342
343TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") {
344 RasterizerInterface rasterizer;
345 BufferBase buffer(rasterizer, c, WORD);
346 buffer.UnmarkRegionAsCpuModified(c, WORD);
347 buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
348 buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
349 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
350 static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3};
351 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
352 REQUIRE(offset == offsets.at(i));
353 REQUIRE(size == sizes.at(i));
354 ++i;
355 });
356}
357
358TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") {
359 RasterizerInterface rasterizer;
360 BufferBase buffer(rasterizer, c, 0x22000);
361 buffer.UnmarkRegionAsCpuModified(c, 0x22000);
362 REQUIRE(rasterizer.Count() == 0x22);
363 buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
364 buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
365 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
366 static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21};
367 static constexpr std::array<u64, 2> sizes{PAGE, PAGE};
368 REQUIRE(offset == offsets.at(i));
369 REQUIRE(size == sizes.at(i));
370 ++i;
371 });
372}
373
374TEST_CASE("BufferBase: Single page modified range", "[video_core]") {
375 RasterizerInterface rasterizer;
376 BufferBase buffer(rasterizer, c, PAGE);
377 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
378 buffer.UnmarkRegionAsCpuModified(c, PAGE);
379 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
380}
381
382TEST_CASE("BufferBase: Two page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 BufferBase buffer(rasterizer, c, PAGE * 2);
385 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
386 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
387 REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2));
388 buffer.UnmarkRegionAsCpuModified(c, PAGE);
389 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
390}
391
392TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") {
393 for (int offset = 0; offset < 4; ++offset) {
394 const VAddr address = c + WORD * offset;
395 RasterizerInterface rasterizer;
396 BufferBase buffer(rasterizer, address, WORD * 4);
397 REQUIRE(buffer.IsRegionCpuModified(address, PAGE));
398 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE));
399 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE));
400
401 buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
402 REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD));
403 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE));
404 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE));
405 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE));
406 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
407 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
408
409 buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
410 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
411 }
412}
413
414TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") {
415 RasterizerInterface rasterizer;
416 BufferBase buffer(rasterizer, c, WORD * 16);
417 buffer.UnmarkRegionAsCpuModified(c, WORD * 16);
418 REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16));
419
420 buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
421 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16));
422 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2));
423 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2));
424 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2));
425 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
426 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
427 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
428 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
429 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
430}
431
432TEST_CASE("BufferBase: Out of bounds region query") {
433 RasterizerInterface rasterizer;
434 BufferBase buffer(rasterizer, c, WORD * 16);
435 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE));
436 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE));
437 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE));
438 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64));
439 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64));
440}
441
442TEST_CASE("BufferBase: Wrap word regions") {
443 RasterizerInterface rasterizer;
444 BufferBase buffer(rasterizer, c, WORD * 2);
445 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
446 buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
447 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2));
448 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE));
449 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE));
450 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE));
451 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
452 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
453 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
454
455 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
457 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
458 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE));
459 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE));
460 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
461 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16));
462}
463
464TEST_CASE("BufferBase: Unaligned page region query") {
465 RasterizerInterface rasterizer;
466 BufferBase buffer(rasterizer, c, WORD);
467 buffer.UnmarkRegionAsCpuModified(c, WORD);
468 buffer.MarkRegionAsCpuModified(c + 4000, 1000);
469 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
470 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
473}
474
475TEST_CASE("BufferBase: Cached write") {
476 RasterizerInterface rasterizer;
477 BufferBase buffer(rasterizer, c, WORD);
478 buffer.UnmarkRegionAsCpuModified(c, WORD);
479 buffer.CachedCpuWrite(c + PAGE, PAGE);
480 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
481 buffer.FlushCachedWrites();
482 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
483 buffer.MarkRegionAsCpuModified(c, WORD);
484 REQUIRE(rasterizer.Count() == 0);
485}
486
487TEST_CASE("BufferBase: Multiple cached write") {
488 RasterizerInterface rasterizer;
489 BufferBase buffer(rasterizer, c, WORD);
490 buffer.UnmarkRegionAsCpuModified(c, WORD);
491 buffer.CachedCpuWrite(c + PAGE, PAGE);
492 buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
493 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
494 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
495 buffer.FlushCachedWrites();
496 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
497 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
498 buffer.MarkRegionAsCpuModified(c, WORD);
499 REQUIRE(rasterizer.Count() == 0);
500}
501
502TEST_CASE("BufferBase: Cached write unmarked") {
503 RasterizerInterface rasterizer;
504 BufferBase buffer(rasterizer, c, WORD);
505 buffer.UnmarkRegionAsCpuModified(c, WORD);
506 buffer.CachedCpuWrite(c + PAGE, PAGE);
507 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
508 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
509 buffer.FlushCachedWrites();
510 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
511 buffer.MarkRegionAsCpuModified(c, WORD);
512 REQUIRE(rasterizer.Count() == 0);
513}
514
515TEST_CASE("BufferBase: Cached write iterated") {
516 RasterizerInterface rasterizer;
517 BufferBase buffer(rasterizer, c, WORD);
518 buffer.UnmarkRegionAsCpuModified(c, WORD);
519 buffer.CachedCpuWrite(c + PAGE, PAGE);
520 int num = 0;
521 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
522 REQUIRE(num == 0);
523 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
524 buffer.FlushCachedWrites();
525 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
526 buffer.MarkRegionAsCpuModified(c, WORD);
527 REQUIRE(rasterizer.Count() == 0);
528}
529
530TEST_CASE("BufferBase: Cached write downloads") {
531 RasterizerInterface rasterizer;
532 BufferBase buffer(rasterizer, c, WORD);
533 buffer.UnmarkRegionAsCpuModified(c, WORD);
534 REQUIRE(rasterizer.Count() == 64);
535 buffer.CachedCpuWrite(c + PAGE, PAGE);
536 REQUIRE(rasterizer.Count() == 63);
537 buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
538 int num = 0;
539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites();
545 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
547 buffer.MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549}
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp
new file mode 100644
index 000000000..3981907a2
--- /dev/null
+++ b/src/tests/video_core/memory_tracker.cpp
@@ -0,0 +1,549 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <memory>
5#include <stdexcept>
6#include <unordered_map>
7
8#include <catch2/catch_test_macros.hpp>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/buffer_cache/memory_tracker_base.h"
13
14namespace {
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19constexpr u64 HIGH_PAGE_BITS = 22;
20constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS;
21
22constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
23
24class RasterizerInterface {
25public:
26 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
27 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
28 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
29 Core::Memory::YUZU_PAGEBITS};
30 for (u64 page = page_start; page < page_end; ++page) {
31 int& value = page_table[page];
32 value += delta;
33 if (value < 0) {
34 throw std::logic_error{"negative page"};
35 }
36 if (value == 0) {
37 page_table.erase(page);
38 }
39 }
40 }
41
42 [[nodiscard]] int Count(VAddr addr) const noexcept {
43 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
44 return it == page_table.end() ? 0 : it->second;
45 }
46
47 [[nodiscard]] unsigned Count() const noexcept {
48 unsigned count = 0;
49 for (const auto& [index, value] : page_table) {
50 count += value;
51 }
52 return count;
53 }
54
55private:
56 std::unordered_map<u64, int> page_table;
57};
58} // Anonymous namespace
59
60using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>;
61
62TEST_CASE("MemoryTracker: Small region", "[video_core]") {
63 RasterizerInterface rasterizer;
64 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
65 REQUIRE(rasterizer.Count() == 0);
66 memory_track->UnmarkRegionAsCpuModified(c, WORD);
67 REQUIRE(rasterizer.Count() == WORD / PAGE);
68 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0});
69
70 memory_track->MarkRegionAsCpuModified(c + PAGE, 1);
71 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2});
72}
73
74TEST_CASE("MemoryTracker: Large region", "[video_core]") {
75 RasterizerInterface rasterizer;
76 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
77 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
78 memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4);
79 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) ==
80 Range{c + PAGE, c + WORD + PAGE * 2});
81 REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) ==
82 Range{c + PAGE * 2, c + PAGE * 8});
83 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE});
84 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) ==
85 Range{c + WORD * 4, c + WORD * 4 + PAGE});
86 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
87 Range{c + WORD * 3 + PAGE * 63, c + WORD * 4});
88
89 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
90 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
91 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
92 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9});
93
94 memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
95 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
96 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7});
97
98 memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
99 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32});
100
101 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
102 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
103
104 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
105 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
106}
107
108TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") {
109 RasterizerInterface rasterizer;
110 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
111 REQUIRE(rasterizer.Count() == 0);
112 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
113 REQUIRE(rasterizer.Count() == 1);
114 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
115 REQUIRE(rasterizer.Count() == 0);
116 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
117 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
118 REQUIRE(rasterizer.Count() == 2);
119 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
120 REQUIRE(rasterizer.Count() == 0);
121}
122
123TEST_CASE("MemoryTracker: Basic range", "[video_core]") {
124 RasterizerInterface rasterizer;
125 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
126 memory_track->UnmarkRegionAsCpuModified(c, WORD);
127 memory_track->MarkRegionAsCpuModified(c, PAGE);
128 int num = 0;
129 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
130 REQUIRE(offset == c);
131 REQUIRE(size == PAGE);
132 ++num;
133 });
134 REQUIRE(num == 1U);
135}
136
137TEST_CASE("MemoryTracker: Border upload", "[video_core]") {
138 RasterizerInterface rasterizer;
139 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
140 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
141 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
142 memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
143 REQUIRE(offset == c + WORD - PAGE);
144 REQUIRE(size == PAGE * 2);
145 });
146}
147
148TEST_CASE("MemoryTracker: Border upload range", "[video_core]") {
149 RasterizerInterface rasterizer;
150 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
151 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
152 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
153 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
154 REQUIRE(offset == c + WORD - PAGE);
155 REQUIRE(size == PAGE * 2);
156 });
157 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
158 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
159 REQUIRE(offset == c + WORD - PAGE);
160 REQUIRE(size == PAGE);
161 });
162 memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
163 REQUIRE(offset == c + WORD);
164 REQUIRE(size == PAGE);
165 });
166}
167
168TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") {
169 RasterizerInterface rasterizer;
170 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
171 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
172 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
173 memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
174 REQUIRE(offset == c + WORD - PAGE);
175 REQUIRE(size == PAGE * 2);
176 });
177 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
178 memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
179 REQUIRE(offset == c + WORD - PAGE);
180 REQUIRE(size == PAGE);
181 });
182 memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
183 REQUIRE(offset == c + WORD);
184 REQUIRE(size == PAGE);
185 });
186}
187
188TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") {
189 RasterizerInterface rasterizer;
190 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
191 int num = 0;
192 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
193 REQUIRE(offset == c);
194 REQUIRE(size == WORD);
195 ++num;
196 });
197 REQUIRE(num == 1);
198 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
199 REQUIRE(offset == c + WORD);
200 REQUIRE(size == WORD);
201 ++num;
202 });
203 REQUIRE(num == 2);
204 memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
205 REQUIRE(offset == c + WORD * 2);
206 REQUIRE(size == PAGE * 0x1d);
207 ++num;
208 });
209 REQUIRE(num == 3);
210}
211
212TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") {
213 RasterizerInterface rasterizer;
214 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
215 memory_track->UnmarkRegionAsCpuModified(c, WORD);
216 int num = 0;
217 memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
218 memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
219 memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
220 REQUIRE(offset == c + PAGE * 2);
221 REQUIRE(size == PAGE);
222 ++num;
223 });
224 REQUIRE(num == 1);
225 memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
226 REQUIRE(offset == c + PAGE * 9);
227 REQUIRE(size == PAGE);
228 ++num;
229 });
230 REQUIRE(num == 2);
231}
232
233TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") {
234 RasterizerInterface rasterizer;
235 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
236 memory_track->UnmarkRegionAsCpuModified(c, WORD * 9);
237 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
238 int num = 0;
239 memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
240 REQUIRE(offset == c + PAGE * 13);
241 REQUIRE(size == WORD * 7 - PAGE * 3);
242 ++num;
243 });
244 REQUIRE(num == 1);
245 memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
246 REQUIRE(offset == c + WORD * 7 + PAGE * 10);
247 REQUIRE(size == PAGE * 3);
248 ++num;
249 });
250 REQUIRE(num == 2);
251}
252
253TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") {
254 RasterizerInterface rasterizer;
255 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
256 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
257 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
258 int num = 0;
259 memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
260 REQUIRE(offset == c + PAGE * 16);
261 REQUIRE(size == WORD * 7 - PAGE * 3);
262 ++num;
263 });
264 REQUIRE(num == 1);
265 memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
266 REQUIRE(offset == c + PAGE * 13);
267 REQUIRE(size == PAGE * 3);
268 ++num;
269 });
270 REQUIRE(num == 2);
271}
272
273TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") {
274 RasterizerInterface rasterizer;
275 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
276 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
277 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
278 int num = 0;
279 memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
280 REQUIRE(offset == c + PAGE * 16);
281 REQUIRE(size == WORD);
282 ++num;
283 });
284 REQUIRE(num == 1);
285 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
286 REQUIRE(offset == c + PAGE * 13);
287 REQUIRE(size == PAGE * 3);
288 ++num;
289 });
290 REQUIRE(num == 2);
291 memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
292 REQUIRE(offset == c + WORD + PAGE * 16);
293 REQUIRE(size == PAGE * 73);
294 ++num;
295 });
296 REQUIRE(num == 3);
297}
298
299TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") {
300 RasterizerInterface rasterizer;
301 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
302 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048);
303 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
304 memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
305 REQUIRE(offset == c + WORD - PAGE);
306 REQUIRE(size == PAGE * 2);
307 });
308}
309
310TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") {
311 RasterizerInterface rasterizer;
312 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
313 memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD);
314 memory_track->MarkRegionAsCpuModified(c, PAGE);
315 REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE);
316 int num = 0;
317 memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
318 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
319 memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
320 REQUIRE(num == 0);
321 memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
322 REQUIRE(num == 1);
323 memory_track->MarkRegionAsCpuModified(c, WORD);
324 REQUIRE(rasterizer.Count() == 2 * WORD / PAGE);
325}
326
327TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") {
328 RasterizerInterface rasterizer;
329 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
330 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
331 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
332 REQUIRE(rasterizer.Count() == 2);
333 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
334 REQUIRE(rasterizer.Count() == 3);
335 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2));
336 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2);
337 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2);
338 REQUIRE(rasterizer.Count() == 7);
339}
340
341TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") {
342 RasterizerInterface rasterizer;
343 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
344 memory_track->UnmarkRegionAsCpuModified(c, 0x310720);
345 REQUIRE(rasterizer.Count(c) == 1);
346 REQUIRE(rasterizer.Count(c + PAGE) == 1);
347 REQUIRE(rasterizer.Count(c + WORD) == 1);
348 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
349}
350
351TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") {
352 RasterizerInterface rasterizer;
353 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
354 memory_track->UnmarkRegionAsCpuModified(c, WORD);
355 memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
356 memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
357 memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
358 static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3};
359 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
360 REQUIRE(offset == offsets.at(i));
361 REQUIRE(size == sizes.at(i));
362 ++i;
363 });
364}
365
366TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") {
367 RasterizerInterface rasterizer;
368 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
369 memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23);
370 REQUIRE(rasterizer.Count() == 0x23);
371 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
372 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
373 memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable {
374 static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21};
375 static constexpr std::array<u64, 3> sizes{PAGE, PAGE};
376 REQUIRE(offset == offsets.at(i));
377 REQUIRE(size == sizes.at(i));
378 ++i;
379 });
380}
381
382TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
385 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
386 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
387 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
388}
389
390TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") {
391 RasterizerInterface rasterizer;
392 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
393 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
394 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
395 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2));
396 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
397 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
398}
399
400TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") {
401 for (int offset = 0; offset < 4; ++offset) {
402 const VAddr address = c + WORD * offset;
403 RasterizerInterface rasterizer;
404 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
405 REQUIRE(memory_track->IsRegionCpuModified(address, PAGE));
406 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE));
407 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE));
408
409 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
410 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD));
411 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE));
412 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE));
413 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE));
414 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
415 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
416
417 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
418 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
419 }
420}
421
422TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") {
423 RasterizerInterface rasterizer;
424 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
425 memory_track->UnmarkRegionAsCpuModified(c, WORD * 16);
426 REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16));
427
428 memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
429 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16));
430 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2));
431 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2));
432 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2));
433 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
434 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
435 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
436 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
437 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
438}
439
440TEST_CASE("MemoryTracker: Wrap word regions") {
441 RasterizerInterface rasterizer;
442 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
443 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
444 memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
445 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2));
446 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE));
447 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE));
448 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE));
449 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
450 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
451 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
452
453 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
454 memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
455 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE));
457 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE));
458 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
459 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16));
460}
461
462TEST_CASE("MemoryTracker: Unaligned page region query") {
463 RasterizerInterface rasterizer;
464 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
465 memory_track->UnmarkRegionAsCpuModified(c, WORD);
466 memory_track->MarkRegionAsCpuModified(c + 4000, 1000);
467 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
468 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
469 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000));
470 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1));
471}
472
473TEST_CASE("MemoryTracker: Cached write") {
474 RasterizerInterface rasterizer;
475 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
476 memory_track->UnmarkRegionAsCpuModified(c, WORD);
477 memory_track->CachedCpuWrite(c + PAGE, c + PAGE);
478 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
479 memory_track->FlushCachedWrites();
480 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
481 memory_track->MarkRegionAsCpuModified(c, WORD);
482 REQUIRE(rasterizer.Count() == 0);
483}
484
485TEST_CASE("MemoryTracker: Multiple cached write") {
486 RasterizerInterface rasterizer;
487 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
488 memory_track->UnmarkRegionAsCpuModified(c, WORD);
489 memory_track->CachedCpuWrite(c + PAGE, PAGE);
490 memory_track->CachedCpuWrite(c + PAGE * 3, PAGE);
491 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
492 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
493 memory_track->FlushCachedWrites();
494 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
495 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
496 memory_track->MarkRegionAsCpuModified(c, WORD);
497 REQUIRE(rasterizer.Count() == 0);
498}
499
500TEST_CASE("MemoryTracker: Cached write unmarked") {
501 RasterizerInterface rasterizer;
502 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
503 memory_track->UnmarkRegionAsCpuModified(c, WORD);
504 memory_track->CachedCpuWrite(c + PAGE, PAGE);
505 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
506 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
507 memory_track->FlushCachedWrites();
508 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
509 memory_track->MarkRegionAsCpuModified(c, WORD);
510 REQUIRE(rasterizer.Count() == 0);
511}
512
513TEST_CASE("MemoryTracker: Cached write iterated") {
514 RasterizerInterface rasterizer;
515 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
516 memory_track->UnmarkRegionAsCpuModified(c, WORD);
517 memory_track->CachedCpuWrite(c + PAGE, PAGE);
518 int num = 0;
519 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
520 REQUIRE(num == 0);
521 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
522 memory_track->FlushCachedWrites();
523 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
524 memory_track->MarkRegionAsCpuModified(c, WORD);
525 REQUIRE(rasterizer.Count() == 0);
526}
527
528TEST_CASE("MemoryTracker: Cached write downloads") {
529 RasterizerInterface rasterizer;
530 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
531 memory_track->UnmarkRegionAsCpuModified(c, WORD);
532 REQUIRE(rasterizer.Count() == 64);
533 memory_track->CachedCpuWrite(c + PAGE, PAGE);
534 REQUIRE(rasterizer.Count() == 63);
535 memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE);
536 int num = 0;
537 memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
538 REQUIRE(num == 1);
539 num = 0;
540 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
544 memory_track->FlushCachedWrites();
545 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
547 memory_track->MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549} \ No newline at end of file
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e904573d7..a0009a36f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -11,8 +11,11 @@ endif()
11 11
12add_library(video_core STATIC 12add_library(video_core STATIC
13 buffer_cache/buffer_base.h 13 buffer_cache/buffer_base.h
14 buffer_cache/buffer_cache_base.h
14 buffer_cache/buffer_cache.cpp 15 buffer_cache/buffer_cache.cpp
15 buffer_cache/buffer_cache.h 16 buffer_cache/buffer_cache.h
17 buffer_cache/memory_tracker_base.h
18 buffer_cache/word_manager.h
16 cache_types.h 19 cache_types.h
17 cdma_pusher.cpp 20 cdma_pusher.cpp
18 cdma_pusher.h 21 cdma_pusher.h
@@ -104,6 +107,7 @@ add_library(video_core STATIC
104 renderer_null/renderer_null.h 107 renderer_null/renderer_null.h
105 renderer_opengl/blit_image.cpp 108 renderer_opengl/blit_image.cpp
106 renderer_opengl/blit_image.h 109 renderer_opengl/blit_image.h
110 renderer_opengl/gl_buffer_cache_base.cpp
107 renderer_opengl/gl_buffer_cache.cpp 111 renderer_opengl/gl_buffer_cache.cpp
108 renderer_opengl/gl_buffer_cache.h 112 renderer_opengl/gl_buffer_cache.h
109 renderer_opengl/gl_compute_pipeline.cpp 113 renderer_opengl/gl_compute_pipeline.cpp
@@ -154,6 +158,7 @@ add_library(video_core STATIC
154 renderer_vulkan/renderer_vulkan.cpp 158 renderer_vulkan/renderer_vulkan.cpp
155 renderer_vulkan/vk_blit_screen.cpp 159 renderer_vulkan/vk_blit_screen.cpp
156 renderer_vulkan/vk_blit_screen.h 160 renderer_vulkan/vk_blit_screen.h
161 renderer_vulkan/vk_buffer_cache_base.cpp
157 renderer_vulkan/vk_buffer_cache.cpp 162 renderer_vulkan/vk_buffer_cache.cpp
158 renderer_vulkan/vk_buffer_cache.h 163 renderer_vulkan/vk_buffer_cache.h
159 renderer_vulkan/vk_command_pool.cpp 164 renderer_vulkan/vk_command_pool.cpp
@@ -174,6 +179,8 @@ add_library(video_core STATIC
174 renderer_vulkan/vk_master_semaphore.h 179 renderer_vulkan/vk_master_semaphore.h
175 renderer_vulkan/vk_pipeline_cache.cpp 180 renderer_vulkan/vk_pipeline_cache.cpp
176 renderer_vulkan/vk_pipeline_cache.h 181 renderer_vulkan/vk_pipeline_cache.h
182 renderer_vulkan/vk_present_manager.cpp
183 renderer_vulkan/vk_present_manager.h
177 renderer_vulkan/vk_query_cache.cpp 184 renderer_vulkan/vk_query_cache.cpp
178 renderer_vulkan/vk_query_cache.h 185 renderer_vulkan/vk_query_cache.h
179 renderer_vulkan/vk_rasterizer.cpp 186 renderer_vulkan/vk_rasterizer.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 1b4d63616..9cbd95c4b 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
@@ -11,9 +11,7 @@
11#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/common_funcs.h" 12#include "common/common_funcs.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/div_ceil.h" 14#include "video_core/buffer_cache/word_manager.h"
15#include "common/settings.h"
16#include "core/memory.h"
17 15
18namespace VideoCommon { 16namespace VideoCommon {
19 17
@@ -36,116 +34,12 @@ struct NullBufferParams {};
36 */ 34 */
37template <class RasterizerInterface> 35template <class RasterizerInterface>
38class BufferBase { 36class BufferBase {
39 static constexpr u64 PAGES_PER_WORD = 64;
40 static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
42
43 /// Vector tracking modified pages tightly packed with small vector optimization
44 union WordsArray {
45 /// Returns the pointer to the words state
46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
47 return is_short ? &stack : heap;
48 }
49
50 /// Returns the pointer to the words state
51 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
52 return is_short ? &stack : heap;
53 }
54
55 u64 stack = 0; ///< Small buffers storage
56 u64* heap; ///< Not-small buffers pointer to the storage
57 };
58
59 struct Words {
60 explicit Words() = default;
61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
62 if (IsShort()) {
63 cpu.stack = ~u64{0};
64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
67 } else {
68 // Share allocation between CPU and GPU pages and set their default values
69 const size_t num_words = NumWords();
70 u64* const alloc = new u64[num_words * 4];
71 cpu.heap = alloc;
72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
75 std::fill_n(cpu.heap, num_words, ~u64{0});
76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
79 }
80 // Clean up tailing bits
81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
84 const u64 last_word = (~u64{0} << shift) >> shift;
85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
87 }
88
89 ~Words() {
90 Release();
91 }
92
93 Words& operator=(Words&& rhs) noexcept {
94 Release();
95 size_bytes = rhs.size_bytes;
96 cpu = rhs.cpu;
97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
100 rhs.cpu.heap = nullptr;
101 return *this;
102 }
103
104 Words(Words&& rhs) noexcept
105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
107 rhs.cpu.heap = nullptr;
108 }
109
110 Words& operator=(const Words&) = delete;
111 Words(const Words&) = delete;
112
113 /// Returns true when the buffer fits in the small vector optimization
114 [[nodiscard]] bool IsShort() const noexcept {
115 return size_bytes <= BYTES_PER_WORD;
116 }
117
118 /// Returns the number of words of the buffer
119 [[nodiscard]] size_t NumWords() const noexcept {
120 return Common::DivCeil(size_bytes, BYTES_PER_WORD);
121 }
122
123 /// Release buffer resources
124 void Release() {
125 if (!IsShort()) {
126 // CPU written words is the base for the heap allocation
127 delete[] cpu.heap;
128 }
129 }
130
131 u64 size_bytes = 0;
132 WordsArray cpu;
133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
143 };
144
145public: 37public:
146 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) 38 static constexpr u64 BASE_PAGE_BITS = 16;
147 : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
148 words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} 40
41 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_)
42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
149 43
150 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
151 45
@@ -155,100 +49,6 @@ public:
155 BufferBase& operator=(BufferBase&&) = default; 49 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default; 50 BufferBase(BufferBase&&) = default;
157 51
158 /// Returns the inclusive CPU modified range in a begin end pair
159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
160 u64 query_size) const noexcept {
161 const u64 offset = query_cpu_addr - cpu_addr;
162 return ModifiedRegion<Type::CPU>(offset, query_size);
163 }
164
165 /// Returns the inclusive GPU modified range in a begin end pair
166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
167 u64 query_size) const noexcept {
168 const u64 offset = query_cpu_addr - cpu_addr;
169 return ModifiedRegion<Type::GPU>(offset, query_size);
170 }
171
172 /// Returns true if a region has been modified from the CPU
173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
174 const u64 offset = query_cpu_addr - cpu_addr;
175 return IsRegionModified<Type::CPU>(offset, query_size);
176 }
177
178 /// Returns true if a region has been modified from the GPU
179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
180 const u64 offset = query_cpu_addr - cpu_addr;
181 return IsRegionModified<Type::GPU>(offset, query_size);
182 }
183
184 /// Mark region as CPU modified, notifying the rasterizer about this change
185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
187 }
188
189 /// Unmark region as CPU modified, notifying the rasterizer about this change
190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
192 }
193
194 /// Mark region as modified from the host GPU
195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
197 }
198
199 /// Unmark region as modified from the host GPU
200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 if (!Settings::values.use_pessimistic_flushes) {
224 cached_words[word_index] = 0;
225 }
226 }
227 }
228
229 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
230 template <typename Func>
231 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
232 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
233 }
234
235 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
236 template <typename Func>
237 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
238 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
239 }
240
241 template <typename Func>
242 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) {
243 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func);
244 }
245
246 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
247 template <typename Func>
248 void ForEachDownloadRange(Func&& func) {
249 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
250 }
251
252 /// Mark buffer as picked 52 /// Mark buffer as picked
253 void Pick() noexcept { 53 void Pick() noexcept {
254 flags |= BufferFlagBits::Picked; 54 flags |= BufferFlagBits::Picked;
@@ -295,11 +95,6 @@ public:
295 return static_cast<u32>(other_cpu_addr - cpu_addr); 95 return static_cast<u32>(other_cpu_addr - cpu_addr);
296 } 96 }
297 97
298 /// Returns the size in bytes of the buffer
299 [[nodiscard]] u64 SizeBytes() const noexcept {
300 return words.size_bytes;
301 }
302
303 size_t getLRUID() const noexcept { 98 size_t getLRUID() const noexcept {
304 return lru_id; 99 return lru_id;
305 } 100 }
@@ -308,305 +103,16 @@ public:
308 lru_id = lru_id_; 103 lru_id = lru_id_;
309 } 104 }
310 105
311private: 106 size_t SizeBytes() const {
312 template <Type type> 107 return size_bytes;
313 u64* Array() noexcept {
314 if constexpr (type == Type::CPU) {
315 return words.cpu.Pointer(IsShort());
316 } else if constexpr (type == Type::GPU) {
317 return words.gpu.Pointer(IsShort());
318 } else if constexpr (type == Type::CachedCPU) {
319 return words.cached_cpu.Pointer(IsShort());
320 } else if constexpr (type == Type::Untracked) {
321 return words.untracked.Pointer(IsShort());
322 }
323 }
324
325 template <Type type>
326 const u64* Array() const noexcept {
327 if constexpr (type == Type::CPU) {
328 return words.cpu.Pointer(IsShort());
329 } else if constexpr (type == Type::GPU) {
330 return words.gpu.Pointer(IsShort());
331 } else if constexpr (type == Type::CachedCPU) {
332 return words.cached_cpu.Pointer(IsShort());
333 } else if constexpr (type == Type::Untracked) {
334 return words.untracked.Pointer(IsShort());
335 }
336 }
337
338 /**
339 * Change the state of a range of pages
340 *
341 * @param dirty_addr Base address to mark or unmark as modified
342 * @param size Size in bytes to mark or unmark as modified
343 */
344 template <Type type, bool enable>
345 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
346 const s64 difference = dirty_addr - cpu_addr;
347 const u64 offset = std::max<s64>(difference, 0);
348 size += std::min<s64>(difference, 0);
349 if (offset >= SizeBytes() || size < 0) {
350 return;
351 }
352 u64* const untracked_words = Array<Type::Untracked>();
353 u64* const state_words = Array<type>();
354 const u64 offset_end = std::min(offset + size, SizeBytes());
355 const u64 begin_page_index = offset / BYTES_PER_PAGE;
356 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
357 const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE);
358 const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD);
359 u64 page_index = begin_page_index % PAGES_PER_WORD;
360 u64 word_index = begin_word_index;
361 while (word_index < end_word_index) {
362 const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD;
363 const u64 left_offset =
364 std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD;
365 const u64 right_offset = page_index;
366 u64 bits = ~u64{0};
367 bits = (bits >> right_offset) << right_offset;
368 bits = (bits << left_offset) >> left_offset;
369 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
370 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
371 }
372 if constexpr (enable) {
373 state_words[word_index] |= bits;
374 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
375 untracked_words[word_index] |= bits;
376 }
377 } else {
378 state_words[word_index] &= ~bits;
379 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
380 untracked_words[word_index] &= ~bits;
381 }
382 }
383 page_index = 0;
384 ++word_index;
385 }
386 }
387
388 /**
389 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
390 *
391 * @param word_index Index to the word to notify to the rasterizer
392 * @param current_bits Current state of the word
393 * @param new_bits New state of the word
394 *
395 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
396 */
397 template <bool add_to_rasterizer>
398 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
399 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
400 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
401 while (changed_bits != 0) {
402 const int empty_bits = std::countr_zero(changed_bits);
403 addr += empty_bits * BYTES_PER_PAGE;
404 changed_bits >>= empty_bits;
405
406 const u32 continuous_bits = std::countr_one(changed_bits);
407 const u64 size = continuous_bits * BYTES_PER_PAGE;
408 const VAddr begin_addr = addr;
409 addr += size;
410 changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0;
411 rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1);
412 }
413 }
414
415 /**
416 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
417 * needed. Call the given function on each turned off range.
418 *
419 * @param query_cpu_range Base CPU address to loop over
420 * @param size Size in bytes of the CPU range to loop over
421 * @param func Function to call for each turned off region
422 */
423 template <Type type, typename Func>
424 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
425 static_assert(type != Type::Untracked);
426
427 const s64 difference = query_cpu_range - cpu_addr;
428 const u64 query_begin = std::max<s64>(difference, 0);
429 size += std::min<s64>(difference, 0);
430 if (query_begin >= SizeBytes() || size < 0) {
431 return;
432 }
433 u64* const untracked_words = Array<Type::Untracked>();
434 u64* const state_words = Array<type>();
435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
437 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
438
439 const auto modified = [](u64 word) { return word != 0; };
440 const auto first_modified_word = std::find_if(words_begin, words_end, modified);
441 if (first_modified_word == words_end) {
442 // Exit early when the buffer is not modified
443 return;
444 }
445 const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified);
446
447 const u64 word_index_begin = std::distance(state_words, first_modified_word);
448 const u64 word_index_end = std::distance(state_words, last_modified_word);
449
450 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
451 const unsigned local_page_end =
452 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
453 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
454 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
455 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
456 const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE);
457 const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin);
458 const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end);
459 const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD;
460 const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1;
461
462 u64 page_begin = first_word_page_begin;
463 u64 current_base = 0;
464 u64 current_size = 0;
465 bool on_going = false;
466 for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) {
467 const bool is_last_word = word_index + 1 == word_index_end;
468 const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD;
469 const u64 right_offset = page_begin;
470 const u64 left_offset = PAGES_PER_WORD - page_end;
471 u64 bits = ~u64{0};
472 bits = (bits >> right_offset) << right_offset;
473 bits = (bits << left_offset) >> left_offset;
474
475 const u64 current_word = state_words[word_index] & bits;
476 if (clear) {
477 state_words[word_index] &= ~bits;
478 }
479
480 if constexpr (type == Type::CPU) {
481 const u64 current_bits = untracked_words[word_index] & bits;
482 untracked_words[word_index] &= ~bits;
483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
484 }
485 // Exclude CPU modified pages when visiting GPU pages
486 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
487 u64 page = page_begin;
488 page_begin = 0;
489
490 while (page < page_end) {
491 const int empty_bits = std::countr_zero(word >> page);
492 if (on_going && empty_bits != 0) {
493 InvokeModifiedRange(func, current_size, current_base);
494 current_size = 0;
495 on_going = false;
496 }
497 if (empty_bits == PAGES_PER_WORD) {
498 break;
499 }
500 page += empty_bits;
501
502 const int continuous_bits = std::countr_one(word >> page);
503 if (!on_going && continuous_bits != 0) {
504 current_base = word_index * PAGES_PER_WORD + page;
505 on_going = true;
506 }
507 current_size += continuous_bits;
508 page += continuous_bits;
509 }
510 }
511 if (on_going && current_size > 0) {
512 InvokeModifiedRange(func, current_size, current_base);
513 }
514 }
515
516 template <typename Func>
517 void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) {
518 const u64 current_size_bytes = current_size * BYTES_PER_PAGE;
519 const u64 offset_begin = current_base * BYTES_PER_PAGE;
520 const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes());
521 func(offset_begin, offset_end - offset_begin);
522 } 108 }
523 109
524 /** 110private:
525 * Returns true when a region has been modified
526 *
527 * @param offset Offset in bytes from the start of the buffer
528 * @param size Size in bytes of the region to query for modifications
529 */
530 template <Type type>
531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
532 static_assert(type != Type::Untracked);
533
534 const u64* const untracked_words = Array<Type::Untracked>();
535 const u64* const state_words = Array<type>();
536 const u64 num_query_words = size / BYTES_PER_WORD + 1;
537 const u64 word_begin = offset / BYTES_PER_WORD;
538 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
542 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
543 const u64 word = state_words[word_index] & ~off_word;
544 if (word == 0) {
545 continue;
546 }
547 const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit);
548 const u64 local_page_end = page_end % PAGES_PER_WORD;
549 const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD;
550 if (((word >> page_index) << page_index) << page_end_shift != 0) {
551 return true;
552 }
553 }
554 return false;
555 }
556
557 /**
558 * Returns a begin end pair with the inclusive modified region
559 *
560 * @param offset Offset in bytes from the start of the buffer
561 * @param size Size in bytes of the region to query for modifications
562 */
563 template <Type type>
564 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
565 static_assert(type != Type::Untracked);
566
567 const u64* const untracked_words = Array<Type::Untracked>();
568 const u64* const state_words = Array<type>();
569 const u64 num_query_words = size / BYTES_PER_WORD + 1;
570 const u64 word_begin = offset / BYTES_PER_WORD;
571 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
572 const u64 page_base = offset / BYTES_PER_PAGE;
573 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
574 u64 begin = std::numeric_limits<u64>::max();
575 u64 end = 0;
576 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
577 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
578 const u64 word = state_words[word_index] & ~off_word;
579 if (word == 0) {
580 continue;
581 }
582 const u64 local_page_begin = std::countr_zero(word);
583 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
584 const u64 page_index = word_index * PAGES_PER_WORD;
585 const u64 page_begin = std::max(page_index + local_page_begin, page_base);
586 const u64 page_end = std::min(page_index + local_page_end, page_limit);
587 begin = std::min(begin, page_begin);
588 end = std::max(end, page_end);
589 }
590 static constexpr std::pair<u64, u64> EMPTY{0, 0};
591 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
592 }
593
594 /// Returns the number of words of the buffer
595 [[nodiscard]] size_t NumWords() const noexcept {
596 return words.NumWords();
597 }
598
599 /// Returns true when the buffer fits in the small vector optimization
600 [[nodiscard]] bool IsShort() const noexcept {
601 return words.IsShort();
602 }
603
604 RasterizerInterface* rasterizer = nullptr;
605 VAddr cpu_addr = 0; 111 VAddr cpu_addr = 0;
606 Words words;
607 BufferFlagBits flags{}; 112 BufferFlagBits flags{};
608 int stream_score = 0; 113 int stream_score = 0;
609 size_t lru_id = SIZE_MAX; 114 size_t lru_id = SIZE_MAX;
115 size_t size_bytes = 0;
610}; 116};
611 117
612} // namespace VideoCommon 118} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index a16308b60..40db243d2 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include "common/microprofile.h" 4#include "common/microprofile.h"
5 5
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index abdc593df..e534e1e9c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1,485 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <array>
8#include <memory> 7#include <memory>
9#include <mutex>
10#include <numeric> 8#include <numeric>
11#include <span>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/icl/interval_set.hpp>
16
17#include "common/common_types.h"
18#include "common/div_ceil.h"
19#include "common/literals.h"
20#include "common/lru_cache.h"
21#include "common/microprofile.h"
22#include "common/polyfill_ranges.h"
23#include "common/scratch_buffer.h"
24#include "common/settings.h"
25#include "core/memory.h"
26#include "video_core/buffer_cache/buffer_base.h"
27#include "video_core/control/channel_state_cache.h"
28#include "video_core/delayed_destruction_ring.h"
29#include "video_core/dirty_flags.h"
30#include "video_core/engines/draw_manager.h"
31#include "video_core/engines/kepler_compute.h"
32#include "video_core/engines/maxwell_3d.h"
33#include "video_core/memory_manager.h"
34#include "video_core/rasterizer_interface.h"
35#include "video_core/surface.h"
36#include "video_core/texture_cache/slot_vector.h"
37#include "video_core/texture_cache/types.h"
38 9
39namespace VideoCommon { 10#include "video_core/buffer_cache/buffer_cache_base.h"
40
41MICROPROFILE_DECLARE(GPU_PrepareBuffers);
42MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
43MICROPROFILE_DECLARE(GPU_DownloadMemory);
44
45using BufferId = SlotId;
46
47using VideoCore::Surface::PixelFormat;
48using namespace Common::Literals;
49
50constexpr u32 NUM_VERTEX_BUFFERS = 32;
51constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
52constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
53constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
54constexpr u32 NUM_STORAGE_BUFFERS = 16;
55constexpr u32 NUM_TEXTURE_BUFFERS = 16;
56constexpr u32 NUM_STAGES = 5;
57
58enum class ObtainBufferSynchronize : u32 {
59 NoSynchronize = 0,
60 FullSynchronize = 1,
61 SynchronizeNoDirty = 2,
62};
63
64enum class ObtainBufferOperation : u32 {
65 DoNothing = 0,
66 MarkAsWritten = 1,
67 DiscardWrite = 2,
68 MarkQuery = 3,
69};
70
71using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
72using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
73
74template <typename P>
75class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
76
77 // Page size for caching purposes.
78 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
79 static constexpr u32 YUZU_PAGEBITS = 16;
80 static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS;
81
82 static constexpr bool IS_OPENGL = P::IS_OPENGL;
83 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
84 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
85 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
86 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
87 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
88 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
89 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
90 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
91
92 static constexpr BufferId NULL_BUFFER_ID{0};
93
94 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
95 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
96 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
97
98 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
99
100 using Runtime = typename P::Runtime;
101 using Buffer = typename P::Buffer;
102
103 using IntervalSet = boost::icl::interval_set<VAddr>;
104 using IntervalType = typename IntervalSet::interval_type;
105
106 struct Empty {};
107
108 struct OverlapResult {
109 std::vector<BufferId> ids;
110 VAddr begin;
111 VAddr end;
112 bool has_stream_leap = false;
113 };
114
115 struct Binding {
116 VAddr cpu_addr{};
117 u32 size{};
118 BufferId buffer_id;
119 };
120
121 struct TextureBufferBinding : Binding {
122 PixelFormat format;
123 };
124
125 static constexpr Binding NULL_BINDING{
126 .cpu_addr = 0,
127 .size = 0,
128 .buffer_id = NULL_BUFFER_ID,
129 };
130
131public:
132 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
133
134 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
135 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
136
137 void TickFrame();
138
139 void WriteMemory(VAddr cpu_addr, u64 size);
140
141 void CachedWriteMemory(VAddr cpu_addr, u64 size);
142
143 void DownloadMemory(VAddr cpu_addr, u64 size);
144
145 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
146
147 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
148
149 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
150
151 void UpdateGraphicsBuffers(bool is_indexed);
152
153 void UpdateComputeBuffers();
154
155 void BindHostGeometryBuffers(bool is_indexed);
156
157 void BindHostStageBuffers(size_t stage);
158
159 void BindHostComputeBuffers();
160
161 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
162 const UniformBufferSizes* sizes);
163
164 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
165
166 void UnbindGraphicsStorageBuffers(size_t stage);
167
168 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
169 bool is_written);
170
171 void UnbindGraphicsTextureBuffers(size_t stage);
172
173 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
174 PixelFormat format, bool is_written, bool is_image);
175
176 void UnbindComputeStorageBuffers();
177
178 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
179 bool is_written);
180
181 void UnbindComputeTextureBuffers();
182
183 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
184 bool is_written, bool is_image);
185
186 void FlushCachedWrites();
187
188 /// Return true when there are uncommitted buffers to be downloaded
189 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
190
191 void AccumulateFlushes();
192
193 /// Return true when the caller should wait for async downloads
194 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
195
196 /// Commit asynchronous downloads
197 void CommitAsyncFlushes();
198 void CommitAsyncFlushesHigh();
199
200 /// Pop asynchronous downloads
201 void PopAsyncFlushes();
202
203 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
204
205 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
206
207 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
208 ObtainBufferSynchronize sync_info,
209 ObtainBufferOperation post_op);
210
211 /// Return true when a CPU region is modified from the GPU
212 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
213
214 /// Return true when a region is registered on the cache
215 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
216
217 /// Return true when a CPU region is modified from the CPU
218 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
219
220 void SetDrawIndirect(
221 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
222 current_draw_indirect = current_draw_indirect_;
223 }
224
225 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
226
227 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
228
229 std::recursive_mutex mutex;
230 Runtime& runtime;
231
232private:
233 template <typename Func>
234 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
235 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
236 const int disabled_bits = std::countr_zero(enabled_mask);
237 index += disabled_bits;
238 enabled_mask >>= disabled_bits;
239 func(index);
240 }
241 }
242
243 template <typename Func>
244 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
245 const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE);
246 for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) {
247 const BufferId buffer_id = page_table[page];
248 if (!buffer_id) {
249 ++page;
250 continue;
251 }
252 Buffer& buffer = slot_buffers[buffer_id];
253 func(buffer_id, buffer);
254
255 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
256 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
257 }
258 }
259
260 template <typename Func>
261 void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
262 const VAddr start_address = cpu_addr;
263 const VAddr end_address = start_address + size;
264 const VAddr search_base =
265 static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
266 const IntervalType search_interval{search_base, search_base + 1};
267 auto it = common_ranges.lower_bound(search_interval);
268 if (it == common_ranges.end()) {
269 it = common_ranges.begin();
270 }
271 for (; it != common_ranges.end(); it++) {
272 VAddr inter_addr_end = it->upper();
273 VAddr inter_addr = it->lower();
274 if (inter_addr >= end_address) {
275 break;
276 }
277 if (inter_addr_end <= start_address) {
278 continue;
279 }
280 if (inter_addr_end > end_address) {
281 inter_addr_end = end_address;
282 }
283 if (inter_addr < start_address) {
284 inter_addr = start_address;
285 }
286 func(inter_addr, inter_addr_end);
287 }
288 }
289
290 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
291 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
292 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
293 }
294
295 void RunGarbageCollector();
296
297 void BindHostIndexBuffer();
298
299 void BindHostVertexBuffers();
300
301 void BindHostDrawIndirectBuffers();
302
303 void BindHostGraphicsUniformBuffers(size_t stage);
304
305 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
306
307 void BindHostGraphicsStorageBuffers(size_t stage);
308
309 void BindHostGraphicsTextureBuffers(size_t stage);
310
311 void BindHostTransformFeedbackBuffers();
312
313 void BindHostComputeUniformBuffers();
314
315 void BindHostComputeStorageBuffers();
316
317 void BindHostComputeTextureBuffers();
318
319 void DoUpdateGraphicsBuffers(bool is_indexed);
320
321 void DoUpdateComputeBuffers();
322
323 void UpdateIndexBuffer();
324
325 void UpdateVertexBuffers();
326
327 void UpdateVertexBuffer(u32 index);
328
329 void UpdateDrawIndirect();
330
331 void UpdateUniformBuffers(size_t stage);
332
333 void UpdateStorageBuffers(size_t stage);
334
335 void UpdateTextureBuffers(size_t stage);
336
337 void UpdateTransformFeedbackBuffers();
338
339 void UpdateTransformFeedbackBuffer(u32 index);
340
341 void UpdateComputeUniformBuffers();
342
343 void UpdateComputeStorageBuffers();
344
345 void UpdateComputeTextureBuffers();
346
347 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
348
349 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
350
351 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
352
353 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
354
355 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
356
357 void Register(BufferId buffer_id);
358
359 void Unregister(BufferId buffer_id);
360
361 template <bool insert>
362 void ChangeRegister(BufferId buffer_id);
363
364 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
365
366 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
367
368 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
369
370 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
371 std::span<BufferCopy> copies);
372
373 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
374 std::span<const BufferCopy> copies);
375
376 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
377
378 void DownloadBufferMemory(Buffer& buffer_id);
379
380 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
381
382 void DeleteBuffer(BufferId buffer_id);
383
384 void NotifyBufferDeletion();
385
386 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
387 bool is_written = false) const;
388
389 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
390 PixelFormat format);
391
392 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
393
394 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
395
396 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
397
398 void ClearDownload(IntervalType subtract_interval);
399
400 VideoCore::RasterizerInterface& rasterizer;
401 Core::Memory::Memory& cpu_memory;
402
403 SlotVector<Buffer> slot_buffers;
404 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
405
406 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
407
408 u32 last_index_count = 0;
409
410 Binding index_buffer;
411 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
412 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
413 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
414 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
415 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
416 Binding count_buffer_binding;
417 Binding indirect_buffer_binding;
418
419 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
420 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
421 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
422
423 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
424 u32 enabled_compute_uniform_buffer_mask = 0;
425
426 const UniformBufferSizes* uniform_buffer_sizes{};
427 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
428
429 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
430 std::array<u32, NUM_STAGES> written_storage_buffers{};
431 u32 enabled_compute_storage_buffers = 0;
432 u32 written_compute_storage_buffers = 0;
433
434 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
435 std::array<u32, NUM_STAGES> written_texture_buffers{};
436 std::array<u32, NUM_STAGES> image_texture_buffers{};
437 u32 enabled_compute_texture_buffers = 0;
438 u32 written_compute_texture_buffers = 0;
439 u32 image_compute_texture_buffers = 0;
440
441 std::array<u32, 16> uniform_cache_hits{};
442 std::array<u32, 16> uniform_cache_shots{};
443
444 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
445
446 bool has_deleted_buffers = false;
447 11
448 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 12namespace VideoCommon {
449 dirty_uniform_buffers{};
450 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
451 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
452 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
453 uniform_buffer_binding_sizes{};
454
455 std::vector<BufferId> cached_write_buffer_ids;
456
457 IntervalSet uncommitted_ranges;
458 IntervalSet common_ranges;
459 std::deque<IntervalSet> committed_ranges;
460
461 Common::ScratchBuffer<u8> immediate_buffer_alloc;
462
463 struct LRUItemParams {
464 using ObjectType = BufferId;
465 using TickType = u64;
466 };
467 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
468 u64 frame_tick = 0;
469 u64 total_used_memory = 0;
470 u64 minimum_memory = 0;
471 u64 critical_memory = 0;
472 13
473 std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; 14using Core::Memory::YUZU_PAGESIZE;
474};
475 15
476template <class P> 16template <class P>
477BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
478 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
479 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { 19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
480 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
481 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
482 common_ranges.clear(); 23 common_ranges.clear();
24 inline_buffer_id = NULL_BUFFER_ID;
25
26 active_async_buffers = !Settings::IsGPULevelHigh();
483 27
484 if (!runtime.CanReportMemoryUsage()) { 28 if (!runtime.CanReportMemoryUsage()) {
485 minimum_memory = DEFAULT_EXPECTED_MEMORY; 29 minimum_memory = DEFAULT_EXPECTED_MEMORY;
@@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() {
531 uniform_cache_hits[0] = 0; 75 uniform_cache_hits[0] = 0;
532 uniform_cache_shots[0] = 0; 76 uniform_cache_shots[0] = 0;
533 77
78 active_async_buffers = !Settings::IsGPULevelHigh();
79
534 const bool skip_preferred = hits * 256 < shots * 251; 80 const bool skip_preferred = hits * 256 < shots * 251;
535 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 81 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
536 82
@@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() {
543 } 89 }
544 ++frame_tick; 90 ++frame_tick;
545 delayed_destruction_ring.Tick(); 91 delayed_destruction_ring.Tick();
92
93 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
94 for (auto& buffer : async_buffers_death_ring) {
95 runtime.FreeDeferredStagingBuffer(buffer);
96 }
97 async_buffers_death_ring.clear();
98 }
546} 99}
547 100
548template <class P> 101template <class P>
549void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 102void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
550 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 103 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size);
551 buffer.MarkRegionAsCpuModified(cpu_addr, size); 104 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
552 }); 105 const IntervalType subtract_interval{cpu_addr, cpu_addr + size};
106 ClearDownload(subtract_interval);
107 common_ranges.subtract(subtract_interval);
108 }
553} 109}
554 110
555template <class P> 111template <class P>
556void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 112void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
557 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 113 memory_tracker.CachedCpuWrite(cpu_addr, size);
558 if (!buffer.HasCachedWrites()) { 114 const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE),
559 cached_write_buffer_ids.push_back(buffer_id); 115 Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)};
560 } 116 cached_ranges.add(add_interval);
561 buffer.CachedCpuWrite(cpu_addr, size);
562 });
563} 117}
564 118
565template <class P> 119template <class P>
566void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 120void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
121 WaitOnAsyncFlushes(cpu_addr, size);
567 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 122 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
568 DownloadBufferMemory(buffer, cpu_addr, size); 123 DownloadBufferMemory(buffer, cpu_addr, size);
569 }); 124 });
570} 125}
571 126
572template <class P> 127template <class P>
128void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
129 bool must_wait = false;
130 ForEachInOverlapCounter(async_downloads, cpu_addr, size,
131 [&](VAddr, VAddr, int) { must_wait = true; });
132 bool must_release = false;
133 ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; });
134 if (must_release) {
135 std::function<void()> tmp([]() {});
136 rasterizer.SignalFence(std::move(tmp));
137 }
138 if (must_wait || must_release) {
139 rasterizer.ReleaseFences();
140 }
141}
142
143template <class P>
573void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 144void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
145 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
574 uncommitted_ranges.subtract(subtract_interval); 146 uncommitted_ranges.subtract(subtract_interval);
147 pending_ranges.subtract(subtract_interval);
575 for (auto& interval_set : committed_ranges) { 148 for (auto& interval_set : committed_ranges) {
576 interval_set.subtract(subtract_interval); 149 interval_set.subtract(subtract_interval);
577 } 150 }
@@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
591 } 164 }
592 165
593 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 166 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
167 WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount));
594 ClearDownload(subtract_interval); 168 ClearDownload(subtract_interval);
595 169
596 BufferId buffer_a; 170 BufferId buffer_a;
@@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
616 const VAddr diff = base_address - *cpu_src_address; 190 const VAddr diff = base_address - *cpu_src_address;
617 const VAddr new_base_address = *cpu_dest_address + diff; 191 const VAddr new_base_address = *cpu_dest_address + diff;
618 const IntervalType add_interval{new_base_address, new_base_address + size}; 192 const IntervalType add_interval{new_base_address, new_base_address + size};
619 uncommitted_ranges.add(add_interval);
620 tmp_intervals.push_back(add_interval); 193 tmp_intervals.push_back(add_interval);
194 uncommitted_ranges.add(add_interval);
195 pending_ranges.add(add_interval);
621 }; 196 };
622 ForEachWrittenRange(*cpu_src_address, amount, mirror); 197 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
623 // This subtraction in this order is important for overlapping copies. 198 // This subtraction in this order is important for overlapping copies.
624 common_ranges.subtract(subtract_interval); 199 common_ranges.subtract(subtract_interval);
625 const bool has_new_downloads = tmp_intervals.size() != 0; 200 const bool has_new_downloads = tmp_intervals.size() != 0;
@@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
628 } 203 }
629 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 204 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
630 if (has_new_downloads) { 205 if (has_new_downloads) {
631 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); 206 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
632 } 207 }
633 std::vector<u8> tmp_buffer(amount); 208 std::vector<u8> tmp_buffer(amount);
634 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 209 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
@@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
866 441
867template <class P> 442template <class P>
868void BufferCache<P>::FlushCachedWrites() { 443void BufferCache<P>::FlushCachedWrites() {
869 for (const BufferId buffer_id : cached_write_buffer_ids) {
870 slot_buffers[buffer_id].FlushCachedWrites();
871 }
872 cached_write_buffer_ids.clear(); 444 cached_write_buffer_ids.clear();
445 memory_tracker.FlushCachedWrites();
446 cached_ranges.clear();
873} 447}
874 448
875template <class P> 449template <class P>
@@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
879 453
880template <class P> 454template <class P>
881void BufferCache<P>::AccumulateFlushes() { 455void BufferCache<P>::AccumulateFlushes() {
882 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
883 uncommitted_ranges.clear();
884 return;
885 }
886 if (uncommitted_ranges.empty()) { 456 if (uncommitted_ranges.empty()) {
887 return; 457 return;
888 } 458 }
@@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() {
891 461
892template <class P> 462template <class P>
893bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 463bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
894 return false; 464 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
465 return (!async_buffers.empty() && async_buffers.front().has_value());
466 } else {
467 return false;
468 }
895} 469}
896 470
897template <class P> 471template <class P>
@@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
899 AccumulateFlushes(); 473 AccumulateFlushes();
900 474
901 if (committed_ranges.empty()) { 475 if (committed_ranges.empty()) {
476 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
477 if (active_async_buffers) {
478 async_buffers.emplace_back(std::optional<Async_Buffer>{});
479 }
480 }
902 return; 481 return;
903 } 482 }
904 MICROPROFILE_SCOPE(GPU_DownloadMemory); 483 MICROPROFILE_SCOPE(GPU_DownloadMemory);
905 const bool is_accuracy_normal =
906 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
907 484
485 pending_ranges.clear();
908 auto it = committed_ranges.begin(); 486 auto it = committed_ranges.begin();
909 while (it != committed_ranges.end()) { 487 while (it != committed_ranges.end()) {
910 auto& current_intervals = *it; 488 auto& current_intervals = *it;
@@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
926 const std::size_t size = interval.upper() - interval.lower(); 504 const std::size_t size = interval.upper() - interval.lower();
927 const VAddr cpu_addr = interval.lower(); 505 const VAddr cpu_addr = interval.lower();
928 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 506 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
929 buffer.ForEachDownloadRangeAndClear( 507 const VAddr buffer_start = buffer.CpuAddr();
930 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 508 const VAddr buffer_end = buffer_start + buffer.SizeBytes();
931 if (is_accuracy_normal) { 509 const VAddr new_start = std::max(buffer_start, cpu_addr);
932 return; 510 const VAddr new_end = std::min(buffer_end, cpu_addr + size);
933 } 511 memory_tracker.ForEachDownloadRange(
512 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) {
934 const VAddr buffer_addr = buffer.CpuAddr(); 513 const VAddr buffer_addr = buffer.CpuAddr();
935 const auto add_download = [&](VAddr start, VAddr end) { 514 const auto add_download = [&](VAddr start, VAddr end) {
936 const u64 new_offset = start - buffer_addr; 515 const u64 new_offset = start - buffer_addr;
@@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
944 buffer_id, 523 buffer_id,
945 }); 524 });
946 // Align up to avoid cache conflicts 525 // Align up to avoid cache conflicts
947 constexpr u64 align = 8ULL; 526 constexpr u64 align = 64ULL;
948 constexpr u64 mask = ~(align - 1ULL); 527 constexpr u64 mask = ~(align - 1ULL);
949 total_size_bytes += (new_size + align - 1) & mask; 528 total_size_bytes += (new_size + align - 1) & mask;
950 largest_copy = std::max(largest_copy, new_size); 529 largest_copy = std::max(largest_copy, new_size);
951 }; 530 };
952 531
953 const VAddr start_address = buffer_addr + range_offset; 532 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download);
954 const VAddr end_address = start_address + range_size;
955 ForEachWrittenRange(start_address, range_size, add_download);
956 const IntervalType subtract_interval{start_address, end_address};
957 common_ranges.subtract(subtract_interval);
958 }); 533 });
959 }); 534 });
960 } 535 }
961 } 536 }
962 committed_ranges.clear(); 537 committed_ranges.clear();
963 if (downloads.empty()) { 538 if (downloads.empty()) {
539 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
540 if (active_async_buffers) {
541 async_buffers.emplace_back(std::optional<Async_Buffer>{});
542 }
543 }
964 return; 544 return;
965 } 545 }
966 if constexpr (USE_MEMORY_MAPS) { 546 if (active_async_buffers) {
967 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 547 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
968 runtime.PreCopyBarrier(); 548 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
969 for (auto& [copy, buffer_id] : downloads) { 549 boost::container::small_vector<BufferCopy, 4> normalized_copies;
970 // Have in mind the staging buffer offset for the copy 550 IntervalSet new_async_range{};
971 copy.dst_offset += download_staging.offset; 551 runtime.PreCopyBarrier();
972 const std::array copies{copy}; 552 for (auto& [copy, buffer_id] : downloads) {
973 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); 553 copy.dst_offset += download_staging.offset;
974 } 554 const std::array copies{copy};
975 runtime.PostCopyBarrier(); 555 BufferCopy second_copy{copy};
976 runtime.Finish(); 556 Buffer& buffer = slot_buffers[buffer_id];
977 for (const auto& [copy, buffer_id] : downloads) { 557 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
978 const Buffer& buffer = slot_buffers[buffer_id]; 558 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
979 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 559 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
980 // Undo the modified offset 560 async_downloads += std::make_pair(base_interval, 1);
981 const u64 dst_offset = copy.dst_offset - download_staging.offset; 561 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
982 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 562 normalized_copies.push_back(second_copy);
983 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 563 }
564 runtime.PostCopyBarrier();
565 pending_downloads.emplace_back(std::move(normalized_copies));
566 async_buffers.emplace_back(download_staging);
567 } else {
568 committed_ranges.clear();
569 uncommitted_ranges.clear();
984 } 570 }
985 } else { 571 } else {
986 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 572 if constexpr (USE_MEMORY_MAPS) {
987 for (const auto& [copy, buffer_id] : downloads) { 573 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
988 Buffer& buffer = slot_buffers[buffer_id]; 574 runtime.PreCopyBarrier();
989 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 575 for (auto& [copy, buffer_id] : downloads) {
990 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 576 // Have in mind the staging buffer offset for the copy
991 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 577 copy.dst_offset += download_staging.offset;
578 const std::array copies{copy};
579 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
580 }
581 runtime.PostCopyBarrier();
582 runtime.Finish();
583 for (const auto& [copy, buffer_id] : downloads) {
584 const Buffer& buffer = slot_buffers[buffer_id];
585 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
586 // Undo the modified offset
587 const u64 dst_offset = copy.dst_offset - download_staging.offset;
588 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
589 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
590 }
591 } else {
592 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
593 for (const auto& [copy, buffer_id] : downloads) {
594 Buffer& buffer = slot_buffers[buffer_id];
595 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
596 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
597 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
598 }
992 } 599 }
993 } 600 }
994} 601}
995 602
996template <class P> 603template <class P>
997void BufferCache<P>::CommitAsyncFlushes() { 604void BufferCache<P>::CommitAsyncFlushes() {
998 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 605 CommitAsyncFlushesHigh();
999 CommitAsyncFlushesHigh();
1000 } else {
1001 uncommitted_ranges.clear();
1002 committed_ranges.clear();
1003 }
1004} 606}
1005 607
1006template <class P> 608template <class P>
1007void BufferCache<P>::PopAsyncFlushes() {} 609void BufferCache<P>::PopAsyncFlushes() {
610 MICROPROFILE_SCOPE(GPU_DownloadMemory);
611 PopAsyncBuffers();
612}
1008 613
1009template <class P> 614template <class P>
1010bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 615void BufferCache<P>::PopAsyncBuffers() {
1011 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 616 if (async_buffers.empty()) {
1012 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 617 return;
1013 const BufferId image_id = page_table[page]; 618 }
1014 if (!image_id) { 619 if (!async_buffers.front().has_value()) {
1015 ++page; 620 async_buffers.pop_front();
1016 continue; 621 return;
1017 } 622 }
1018 Buffer& buffer = slot_buffers[image_id]; 623 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
1019 if (buffer.IsRegionGpuModified(addr, size)) { 624 auto& downloads = pending_downloads.front();
1020 return true; 625 auto& async_buffer = async_buffers.front();
626 u8* base = async_buffer->mapped_span.data();
627 const size_t base_offset = async_buffer->offset;
628 for (const auto& copy : downloads) {
629 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset);
630 const u64 dst_offset = copy.dst_offset - base_offset;
631 const u8* read_mapped_memory = base + dst_offset;
632 ForEachInOverlapCounter(
633 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) {
634 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr],
635 end - start);
636 if (count == 1) {
637 const IntervalType base_interval{start, end};
638 common_ranges.subtract(base_interval);
639 }
640 });
641 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size};
642 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
1021 } 643 }
1022 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 644 async_buffers_death_ring.emplace_back(*async_buffer);
1023 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 645 async_buffers.pop_front();
646 pending_downloads.pop_front();
1024 } 647 }
1025 return false; 648}
649
650template <class P>
651bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
652 bool is_dirty = false;
653 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; });
654 return is_dirty;
1026} 655}
1027 656
1028template <class P> 657template <class P>
1029bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 658bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1030 const VAddr end_addr = addr + size; 659 const VAddr end_addr = addr + size;
1031 const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); 660 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1032 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 661 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
1033 const BufferId buffer_id = page_table[page]; 662 const BufferId buffer_id = page_table[page];
1034 if (!buffer_id) { 663 if (!buffer_id) {
1035 ++page; 664 ++page;
@@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1041 if (buf_start_addr < end_addr && addr < buf_end_addr) { 670 if (buf_start_addr < end_addr && addr < buf_end_addr) {
1042 return true; 671 return true;
1043 } 672 }
1044 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 673 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1045 } 674 }
1046 return false; 675 return false;
1047} 676}
1048 677
1049template <class P> 678template <class P>
1050bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 679bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
1051 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 680 return memory_tracker.IsRegionCpuModified(addr, size);
1052 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
1053 const BufferId image_id = page_table[page];
1054 if (!image_id) {
1055 ++page;
1056 continue;
1057 }
1058 Buffer& buffer = slot_buffers[image_id];
1059 if (buffer.IsRegionCpuModified(addr, size)) {
1060 return true;
1061 }
1062 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
1063 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
1064 }
1065 return false;
1066} 681}
1067 682
1068template <class P> 683template <class P>
@@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
1072 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 687 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
1073 const u32 size = index_buffer.size; 688 const u32 size = index_buffer.size;
1074 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 689 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1075 if (!draw_state.inline_index_draw_indexes.empty()) { 690 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1076 if constexpr (USE_MEMORY_MAPS) { 691 if constexpr (USE_MEMORY_MAPS) {
1077 auto upload_staging = runtime.UploadStagingBuffer(size); 692 auto upload_staging = runtime.UploadStagingBuffer(size);
1078 std::array<BufferCopy, 1> copies{ 693 std::array<BufferCopy, 1> copies{
@@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
1155 TouchBuffer(buffer, binding.buffer_id); 770 TouchBuffer(buffer, binding.buffer_id);
1156 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 771 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
1157 size <= uniform_buffer_skip_cache_size && 772 size <= uniform_buffer_skip_cache_size &&
1158 !buffer.IsRegionGpuModified(cpu_addr, size); 773 !memory_tracker.IsRegionGpuModified(cpu_addr, size);
1159 if (use_fast_buffer) { 774 if (use_fast_buffer) {
1160 if constexpr (IS_OPENGL) { 775 if constexpr (IS_OPENGL) {
1161 if (runtime.HasFastBufferSubData()) { 776 if (runtime.HasFastBufferSubData()) {
@@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() {
1378 // We have to check for the dirty flags and index count 993 // We have to check for the dirty flags and index count
1379 // The index count is currently changed without updating the dirty flags 994 // The index count is currently changed without updating the dirty flags
1380 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 995 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1381 const auto& index_array = draw_state.index_buffer; 996 const auto& index_buffer_ref = draw_state.index_buffer;
1382 auto& flags = maxwell3d->dirty.flags; 997 auto& flags = maxwell3d->dirty.flags;
1383 if (!flags[Dirty::IndexBuffer]) { 998 if (!flags[Dirty::IndexBuffer]) {
1384 return; 999 return;
1385 } 1000 }
1386 flags[Dirty::IndexBuffer] = false; 1001 flags[Dirty::IndexBuffer] = false;
1387 last_index_count = index_array.count; 1002 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1388 if (!draw_state.inline_index_draw_indexes.empty()) {
1389 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); 1003 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size());
1004 u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE);
1005 if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] {
1006 inline_buffer_id = CreateBuffer(0, buffer_size);
1007 }
1008 if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
1009 slot_buffers.erase(inline_buffer_id);
1010 inline_buffer_id = CreateBuffer(0, buffer_size);
1011 }
1390 index_buffer = Binding{ 1012 index_buffer = Binding{
1391 .cpu_addr = 0, 1013 .cpu_addr = 0,
1392 .size = inline_index_size, 1014 .size = inline_index_size,
1393 .buffer_id = CreateBuffer(0, inline_index_size), 1015 .buffer_id = inline_buffer_id,
1394 }; 1016 };
1395 return; 1017 return;
1396 } 1018 }
1397 const GPUVAddr gpu_addr_begin = index_array.StartAddress(); 1019
1398 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1020 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1021 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1399 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1022 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1400 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1023 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1401 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); 1024 const u32 draw_size =
1025 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1402 const u32 size = std::min(address_size, draw_size); 1026 const u32 size = std::min(address_size, draw_size);
1403 if (size == 0 || !cpu_addr) { 1027 if (size == 0 || !cpu_addr) {
1404 index_buffer = NULL_BINDING; 1028 index_buffer = NULL_BINDING;
@@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1434 const GPUVAddr gpu_addr_begin = array.Address(); 1058 const GPUVAddr gpu_addr_begin = array.Address();
1435 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1059 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1436 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1060 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1437 u32 address_size = static_cast<u32>( 1061 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1438 std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); 1062 u32 size = address_size; // TODO: Analyze stride and number of vertices
1439 if (array.enable == 0 || address_size == 0 || !cpu_addr) { 1063 if (array.enable == 0 || size == 0 || !cpu_addr) {
1440 vertex_buffers[index] = NULL_BINDING; 1064 vertex_buffers[index] = NULL_BINDING;
1441 return; 1065 return;
1442 } 1066 }
1443 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1067 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1444 address_size = 1068 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1445 static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size));
1446 } 1069 }
1447 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1448 vertex_buffers[index] = Binding{ 1070 vertex_buffers[index] = Binding{
1449 .cpu_addr = *cpu_addr, 1071 .cpu_addr = *cpu_addr,
1450 .size = size, 1072 .size = size,
@@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1591 1213
1592template <class P> 1214template <class P>
1593void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1215void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
1594 Buffer& buffer = slot_buffers[buffer_id]; 1216 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
1595 buffer.MarkRegionAsGpuModified(cpu_addr, size); 1217
1218 if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
1219 SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
1220 }
1596 1221
1597 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1222 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1598 common_ranges.add(base_interval); 1223 common_ranges.add(base_interval);
1599
1600 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
1601 if (!is_async) {
1602 return;
1603 }
1604 uncommitted_ranges.add(base_interval); 1224 uncommitted_ranges.add(base_interval);
1225 pending_ranges.add(base_interval);
1605} 1226}
1606 1227
1607template <class P> 1228template <class P>
@@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
1609 if (cpu_addr == 0) { 1230 if (cpu_addr == 0) {
1610 return NULL_BUFFER_ID; 1231 return NULL_BUFFER_ID;
1611 } 1232 }
1612 const u64 page = cpu_addr >> YUZU_PAGEBITS; 1233 const u64 page = cpu_addr >> CACHING_PAGEBITS;
1613 const BufferId buffer_id = page_table[page]; 1234 const BufferId buffer_id = page_table[page];
1614 if (!buffer_id) { 1235 if (!buffer_id) {
1615 return CreateBuffer(cpu_addr, size); 1236 return CreateBuffer(cpu_addr, size);
@@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1638 .has_stream_leap = has_stream_leap, 1259 .has_stream_leap = has_stream_leap,
1639 }; 1260 };
1640 } 1261 }
1641 for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); 1262 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1642 cpu_addr += YUZU_PAGESIZE) { 1263 cpu_addr += CACHING_PAGESIZE) {
1643 const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; 1264 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS];
1644 if (!overlap_id) { 1265 if (!overlap_id) {
1645 continue; 1266 continue;
1646 } 1267 }
@@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1666 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1287 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1667 has_stream_leap = true; 1288 has_stream_leap = true;
1668 if (expands_right) { 1289 if (expands_right) {
1669 begin -= YUZU_PAGESIZE * 256; 1290 begin -= CACHING_PAGESIZE * 256;
1670 cpu_addr = begin; 1291 cpu_addr = begin;
1671 } 1292 }
1672 if (expands_left) { 1293 if (expands_left) {
1673 end += YUZU_PAGESIZE * 256; 1294 end += CACHING_PAGESIZE * 256;
1674 } 1295 }
1675 } 1296 }
1676 } 1297 }
@@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1690 if (accumulate_stream_score) { 1311 if (accumulate_stream_score) {
1691 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); 1312 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1692 } 1313 }
1693 std::vector<BufferCopy> copies; 1314 boost::container::small_vector<BufferCopy, 1> copies;
1694 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); 1315 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1695 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { 1316 copies.push_back(BufferCopy{
1696 copies.push_back(BufferCopy{ 1317 .src_offset = 0,
1697 .src_offset = begin, 1318 .dst_offset = dst_base_offset,
1698 .dst_offset = dst_base_offset + begin, 1319 .size = overlap.SizeBytes(),
1699 .size = range_size,
1700 });
1701 new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
1702 new_buffer.MarkRegionAsGpuModified(begin, range_size);
1703 }); 1320 });
1704 if (!copies.empty()) { 1321 runtime.CopyBuffer(new_buffer, overlap, copies);
1705 runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); 1322 DeleteBuffer(overlap_id, true);
1706 }
1707 DeleteBuffer(overlap_id);
1708} 1323}
1709 1324
1710template <class P> 1325template <class P>
1711BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1326BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1327 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE);
1328 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE);
1329 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr);
1712 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1330 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1713 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1331 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1714 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1332 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
@@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1718 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1336 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1719 } 1337 }
1720 Register(new_buffer_id); 1338 Register(new_buffer_id);
1721 TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); 1339 TouchBuffer(new_buffer, new_buffer_id);
1722 return new_buffer_id; 1340 return new_buffer_id;
1723} 1341}
1724 1342
@@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1746 } 1364 }
1747 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1365 const VAddr cpu_addr_begin = buffer.CpuAddr();
1748 const VAddr cpu_addr_end = cpu_addr_begin + size; 1366 const VAddr cpu_addr_end = cpu_addr_begin + size;
1749 const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; 1367 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE;
1750 const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); 1368 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE);
1751 for (u64 page = page_begin; page != page_end; ++page) { 1369 for (u64 page = page_begin; page != page_end; ++page) {
1752 if constexpr (insert) { 1370 if constexpr (insert) {
1753 page_table[page] = buffer_id; 1371 page_table[page] = buffer_id;
@@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1766 1384
1767template <class P> 1385template <class P>
1768bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1386bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1769 if (buffer.CpuAddr() == 0) {
1770 return true;
1771 }
1772 return SynchronizeBufferImpl(buffer, cpu_addr, size); 1387 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1773} 1388}
1774 1389
@@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1777 boost::container::small_vector<BufferCopy, 4> copies; 1392 boost::container::small_vector<BufferCopy, 4> copies;
1778 u64 total_size_bytes = 0; 1393 u64 total_size_bytes = 0;
1779 u64 largest_copy = 0; 1394 u64 largest_copy = 0;
1780 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1395 VAddr buffer_start = buffer.CpuAddr();
1396 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1781 copies.push_back(BufferCopy{ 1397 copies.push_back(BufferCopy{
1782 .src_offset = total_size_bytes, 1398 .src_offset = total_size_bytes,
1783 .dst_offset = range_offset, 1399 .dst_offset = cpu_addr_out - buffer_start,
1784 .size = range_size, 1400 .size = range_size,
1785 }); 1401 });
1786 total_size_bytes += range_size; 1402 total_size_bytes += range_size;
@@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1795} 1411}
1796 1412
1797template <class P> 1413template <class P>
1414bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1415 boost::container::small_vector<BufferCopy, 4> copies;
1416 u64 total_size_bytes = 0;
1417 u64 largest_copy = 0;
1418 IntervalSet found_sets{};
1419 auto make_copies = [&] {
1420 for (auto& interval : found_sets) {
1421 const std::size_t sub_size = interval.upper() - interval.lower();
1422 const VAddr cpu_addr_ = interval.lower();
1423 copies.push_back(BufferCopy{
1424 .src_offset = total_size_bytes,
1425 .dst_offset = cpu_addr_ - buffer.CpuAddr(),
1426 .size = sub_size,
1427 });
1428 total_size_bytes += sub_size;
1429 largest_copy = std::max<u64>(largest_copy, sub_size);
1430 }
1431 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1432 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1433 };
1434 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1435 const VAddr base_adr = cpu_addr_out;
1436 const VAddr end_adr = base_adr + range_size;
1437 const IntervalType add_interval{base_adr, end_adr};
1438 found_sets.add(add_interval);
1439 });
1440 if (found_sets.empty()) {
1441 return true;
1442 }
1443 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1444 auto it = common_ranges.lower_bound(search_interval);
1445 auto it_end = common_ranges.upper_bound(search_interval);
1446 if (it == common_ranges.end()) {
1447 make_copies();
1448 return false;
1449 }
1450 while (it != it_end) {
1451 found_sets.subtract(*it);
1452 it++;
1453 }
1454 make_copies();
1455 return false;
1456}
1457
1458template <class P>
1798void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1459void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1799 std::span<BufferCopy> copies) { 1460 std::span<BufferCopy> copies) {
1800 if constexpr (USE_MEMORY_MAPS) { 1461 if constexpr (USE_MEMORY_MAPS) {
@@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg
1805} 1466}
1806 1467
1807template <class P> 1468template <class P>
1808void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, 1469void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1809 std::span<const BufferCopy> copies) { 1470 [[maybe_unused]] u64 largest_copy,
1810 std::span<u8> immediate_buffer; 1471 [[maybe_unused]] std::span<const BufferCopy> copies) {
1811 for (const BufferCopy& copy : copies) { 1472 if constexpr (!USE_MEMORY_MAPS) {
1812 std::span<const u8> upload_span; 1473 std::span<u8> immediate_buffer;
1813 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1474 for (const BufferCopy& copy : copies) {
1814 if (IsRangeGranular(cpu_addr, copy.size)) { 1475 std::span<const u8> upload_span;
1815 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1476 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1816 } else { 1477 if (IsRangeGranular(cpu_addr, copy.size)) {
1817 if (immediate_buffer.empty()) { 1478 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
1818 immediate_buffer = ImmediateBuffer(largest_copy); 1479 } else {
1480 if (immediate_buffer.empty()) {
1481 immediate_buffer = ImmediateBuffer(largest_copy);
1482 }
1483 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
1484 upload_span = immediate_buffer.subspan(0, copy.size);
1819 } 1485 }
1820 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1486 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1821 upload_span = immediate_buffer.subspan(0, copy.size);
1822 } 1487 }
1823 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1824 } 1488 }
1825} 1489}
1826 1490
1827template <class P> 1491template <class P>
1828void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 1492void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1829 std::span<BufferCopy> copies) { 1493 [[maybe_unused]] u64 total_size_bytes,
1830 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); 1494 [[maybe_unused]] std::span<BufferCopy> copies) {
1831 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1495 if constexpr (USE_MEMORY_MAPS) {
1832 for (BufferCopy& copy : copies) { 1496 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1833 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1497 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1834 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1498 for (BufferCopy& copy : copies) {
1835 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1499 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1500 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1501 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1836 1502
1837 // Apply the staging offset 1503 // Apply the staging offset
1838 copy.src_offset += upload_staging.offset; 1504 copy.src_offset += upload_staging.offset;
1505 }
1506 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1839 } 1507 }
1840 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1841} 1508}
1842 1509
1843template <class P> 1510template <class P>
@@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1847 if (!is_dirty) { 1514 if (!is_dirty) {
1848 return false; 1515 return false;
1849 } 1516 }
1850 if (!IsRegionGpuModified(dest_address, copy_size)) { 1517 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1518 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1519 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1851 return false; 1520 return false;
1852 } 1521 }
1853 1522
@@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1886 boost::container::small_vector<BufferCopy, 1> copies; 1555 boost::container::small_vector<BufferCopy, 1> copies;
1887 u64 total_size_bytes = 0; 1556 u64 total_size_bytes = 0;
1888 u64 largest_copy = 0; 1557 u64 largest_copy = 0;
1889 buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1558 memory_tracker.ForEachDownloadRangeAndClear(
1890 const VAddr buffer_addr = buffer.CpuAddr(); 1559 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1891 const auto add_download = [&](VAddr start, VAddr end) { 1560 const VAddr buffer_addr = buffer.CpuAddr();
1892 const u64 new_offset = start - buffer_addr; 1561 const auto add_download = [&](VAddr start, VAddr end) {
1893 const u64 new_size = end - start; 1562 const u64 new_offset = start - buffer_addr;
1894 copies.push_back(BufferCopy{ 1563 const u64 new_size = end - start;
1895 .src_offset = new_offset, 1564 copies.push_back(BufferCopy{
1896 .dst_offset = total_size_bytes, 1565 .src_offset = new_offset,
1897 .size = new_size, 1566 .dst_offset = total_size_bytes,
1898 }); 1567 .size = new_size,
1899 // Align up to avoid cache conflicts 1568 });
1900 constexpr u64 align = 256ULL; 1569 // Align up to avoid cache conflicts
1901 constexpr u64 mask = ~(align - 1ULL); 1570 constexpr u64 align = 64ULL;
1902 total_size_bytes += (new_size + align - 1) & mask; 1571 constexpr u64 mask = ~(align - 1ULL);
1903 largest_copy = std::max(largest_copy, new_size); 1572 total_size_bytes += (new_size + align - 1) & mask;
1904 }; 1573 largest_copy = std::max(largest_copy, new_size);
1905 1574 };
1906 const VAddr start_address = buffer_addr + range_offset; 1575
1907 const VAddr end_address = start_address + range_size; 1576 const VAddr start_address = cpu_addr_out;
1908 ForEachWrittenRange(start_address, range_size, add_download); 1577 const VAddr end_address = start_address + range_size;
1909 const IntervalType subtract_interval{start_address, end_address}; 1578 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1910 ClearDownload(subtract_interval); 1579 const IntervalType subtract_interval{start_address, end_address};
1911 common_ranges.subtract(subtract_interval); 1580 ClearDownload(subtract_interval);
1912 }); 1581 common_ranges.subtract(subtract_interval);
1582 });
1913 if (total_size_bytes == 0) { 1583 if (total_size_bytes == 0) {
1914 return; 1584 return;
1915 } 1585 }
@@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1943} 1613}
1944 1614
1945template <class P> 1615template <class P>
1946void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { 1616void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
1947 const auto scalar_replace = [buffer_id](Binding& binding) { 1617 const auto scalar_replace = [buffer_id](Binding& binding) {
1948 if (binding.buffer_id == buffer_id) { 1618 if (binding.buffer_id == buffer_id) {
1949 binding.buffer_id = BufferId{}; 1619 binding.buffer_id = BufferId{};
@@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1962 std::erase(cached_write_buffer_ids, buffer_id); 1632 std::erase(cached_write_buffer_ids, buffer_id);
1963 1633
1964 // Mark the whole buffer as CPU written to stop tracking CPU writes 1634 // Mark the whole buffer as CPU written to stop tracking CPU writes
1965 Buffer& buffer = slot_buffers[buffer_id]; 1635 if (!do_not_mark) {
1966 buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); 1636 Buffer& buffer = slot_buffers[buffer_id];
1637 memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
1638 }
1967 1639
1968 Unregister(buffer_id); 1640 Unregister(buffer_id);
1969 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); 1641 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
@@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
2011 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1683 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
2012 return NULL_BINDING; 1684 return NULL_BINDING;
2013 } 1685 }
2014 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1686 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE);
2015 const Binding binding{ 1687 const Binding binding{
2016 .cpu_addr = *cpu_addr, 1688 .cpu_addr = *cpu_addr,
2017 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1689 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
new file mode 100644
index 000000000..656baa550
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -0,0 +1,580 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <array>
8#include <functional>
9#include <memory>
10#include <mutex>
11#include <numeric>
12#include <span>
13#include <unordered_map>
14#include <vector>
15
16#include <boost/container/small_vector.hpp>
17#define BOOST_NO_MT
18#include <boost/pool/detail/mutex.hpp>
19#undef BOOST_NO_MT
20#include <boost/icl/interval.hpp>
21#include <boost/icl/interval_base_set.hpp>
22#include <boost/icl/interval_set.hpp>
23#include <boost/icl/split_interval_map.hpp>
24#include <boost/pool/pool.hpp>
25#include <boost/pool/pool_alloc.hpp>
26#include <boost/pool/poolfwd.hpp>
27
28#include "common/common_types.h"
29#include "common/div_ceil.h"
30#include "common/literals.h"
31#include "common/lru_cache.h"
32#include "common/microprofile.h"
33#include "common/scope_exit.h"
34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h"
39#include "video_core/dirty_flags.h"
40#include "video_core/engines/draw_manager.h"
41#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h"
48
49namespace boost {
50template <typename T>
51class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
52}
53
54namespace VideoCommon {
55
56MICROPROFILE_DECLARE(GPU_PrepareBuffers);
57MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
58MICROPROFILE_DECLARE(GPU_DownloadMemory);
59
60using BufferId = SlotId;
61
62using VideoCore::Surface::PixelFormat;
63using namespace Common::Literals;
64
65constexpr u32 NUM_VERTEX_BUFFERS = 32;
66constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
67constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
68constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
69constexpr u32 NUM_STORAGE_BUFFERS = 16;
70constexpr u32 NUM_TEXTURE_BUFFERS = 16;
71constexpr u32 NUM_STAGES = 5;
72
73using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
74using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
75
76enum class ObtainBufferSynchronize : u32 {
77 NoSynchronize = 0,
78 FullSynchronize = 1,
79 SynchronizeNoDirty = 2,
80};
81
82enum class ObtainBufferOperation : u32 {
83 DoNothing = 0,
84 MarkAsWritten = 1,
85 DiscardWrite = 2,
86 MarkQuery = 3,
87};
88
89template <typename P>
90class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
91 // Page size for caching purposes.
92 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
93 static constexpr u32 CACHING_PAGEBITS = 16;
94 static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
95
96 static constexpr bool IS_OPENGL = P::IS_OPENGL;
97 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
98 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
99 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
100 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
101 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
102 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
103 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
104 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
105 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
106
107 static constexpr BufferId NULL_BUFFER_ID{0};
108
109 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
110 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
111 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
112
113 // Debug Flags.
114
115 static constexpr bool DISABLE_DOWNLOADS = true;
116
117 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
118
119 using Runtime = typename P::Runtime;
120 using Buffer = typename P::Buffer;
121 using Async_Buffer = typename P::Async_Buffer;
122 using MemoryTracker = typename P::MemoryTracker;
123
124 using IntervalCompare = std::less<VAddr>;
125 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>;
126 using IntervalAllocator = boost::fast_pool_allocator<VAddr>;
127 using IntervalSet = boost::icl::interval_set<VAddr>;
128 using IntervalType = typename IntervalSet::interval_type;
129
130 template <typename Type>
131 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
132 // types
133 typedef counter_add_functor<Type> type;
134 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
135
136 // public member functions
137 void operator()(Type& current, const Type& added) const {
138 current += added;
139 if (current < base_type::identity_element()) {
140 current = base_type::identity_element();
141 }
142 }
143
144 // public static functions
145 static void version(Type&){};
146 };
147
148 using OverlapCombine = counter_add_functor<int>;
149 using OverlapSection = boost::icl::inter_section<int>;
150 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
151
152 struct Empty {};
153
154 struct OverlapResult {
155 std::vector<BufferId> ids;
156 VAddr begin;
157 VAddr end;
158 bool has_stream_leap = false;
159 };
160
161 struct Binding {
162 VAddr cpu_addr{};
163 u32 size{};
164 BufferId buffer_id;
165 };
166
167 struct TextureBufferBinding : Binding {
168 PixelFormat format;
169 };
170
171 static constexpr Binding NULL_BINDING{
172 .cpu_addr = 0,
173 .size = 0,
174 .buffer_id = NULL_BUFFER_ID,
175 };
176
177public:
178 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
179
180 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
181 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
182
183 void TickFrame();
184
185 void WriteMemory(VAddr cpu_addr, u64 size);
186
187 void CachedWriteMemory(VAddr cpu_addr, u64 size);
188
189 void DownloadMemory(VAddr cpu_addr, u64 size);
190
191 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
192
193 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
194
195 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
196
197 void UpdateGraphicsBuffers(bool is_indexed);
198
199 void UpdateComputeBuffers();
200
201 void BindHostGeometryBuffers(bool is_indexed);
202
203 void BindHostStageBuffers(size_t stage);
204
205 void BindHostComputeBuffers();
206
207 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
208 const UniformBufferSizes* sizes);
209
210 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
211
212 void UnbindGraphicsStorageBuffers(size_t stage);
213
214 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
215 bool is_written);
216
217 void UnbindGraphicsTextureBuffers(size_t stage);
218
219 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
220 PixelFormat format, bool is_written, bool is_image);
221
222 void UnbindComputeStorageBuffers();
223
224 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
225 bool is_written);
226
227 void UnbindComputeTextureBuffers();
228
229 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
230 bool is_written, bool is_image);
231
232 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
233 ObtainBufferSynchronize sync_info,
234 ObtainBufferOperation post_op);
235 void FlushCachedWrites();
236
237 /// Return true when there are uncommitted buffers to be downloaded
238 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
239
240 void AccumulateFlushes();
241
242 /// Return true when the caller should wait for async downloads
243 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
244
245 /// Commit asynchronous downloads
246 void CommitAsyncFlushes();
247 void CommitAsyncFlushesHigh();
248
249 /// Pop asynchronous downloads
250 void PopAsyncFlushes();
251 void PopAsyncBuffers();
252
253 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
254
255 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
256
257 /// Return true when a CPU region is modified from the GPU
258 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
259
260 /// Return true when a region is registered on the cache
261 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
262
263 /// Return true when a CPU region is modified from the CPU
264 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
265
266 void SetDrawIndirect(
267 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
268 current_draw_indirect = current_draw_indirect_;
269 }
270
271 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
272
273 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
274
275 std::recursive_mutex mutex;
276 Runtime& runtime;
277
278private:
279 template <typename Func>
280 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
281 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
282 const int disabled_bits = std::countr_zero(enabled_mask);
283 index += disabled_bits;
284 enabled_mask >>= disabled_bits;
285 func(index);
286 }
287 }
288
289 template <typename Func>
290 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
291 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE);
292 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) {
293 const BufferId buffer_id = page_table[page];
294 if (!buffer_id) {
295 ++page;
296 continue;
297 }
298 Buffer& buffer = slot_buffers[buffer_id];
299 func(buffer_id, buffer);
300
301 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
302 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
303 }
304 }
305
306 template <typename Func>
307 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) {
308 const VAddr start_address = cpu_addr;
309 const VAddr end_address = start_address + size;
310 const IntervalType search_interval{start_address, end_address};
311 auto it = current_range.lower_bound(search_interval);
312 if (it == current_range.end()) {
313 return;
314 }
315 auto end_it = current_range.upper_bound(search_interval);
316 for (; it != end_it; it++) {
317 VAddr inter_addr_end = it->upper();
318 VAddr inter_addr = it->lower();
319 if (inter_addr_end > end_address) {
320 inter_addr_end = end_address;
321 }
322 if (inter_addr < start_address) {
323 inter_addr = start_address;
324 }
325 func(inter_addr, inter_addr_end);
326 }
327 }
328
329 template <typename Func>
330 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
331 Func&& func) {
332 const VAddr start_address = cpu_addr;
333 const VAddr end_address = start_address + size;
334 const IntervalType search_interval{start_address, end_address};
335 auto it = current_range.lower_bound(search_interval);
336 if (it == current_range.end()) {
337 return;
338 }
339 auto end_it = current_range.upper_bound(search_interval);
340 for (; it != end_it; it++) {
341 auto& inter = it->first;
342 VAddr inter_addr_end = inter.upper();
343 VAddr inter_addr = inter.lower();
344 if (inter_addr_end > end_address) {
345 inter_addr_end = end_address;
346 }
347 if (inter_addr < start_address) {
348 inter_addr = start_address;
349 }
350 func(inter_addr, inter_addr_end, it->second);
351 }
352 }
353
354 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
355 const IntervalType search_interval, int subtract_value) {
356 bool any_removals = false;
357 current_range.add(std::make_pair(search_interval, subtract_value));
358 do {
359 any_removals = false;
360 auto it = current_range.lower_bound(search_interval);
361 if (it == current_range.end()) {
362 return;
363 }
364 auto end_it = current_range.upper_bound(search_interval);
365 for (; it != end_it; it++) {
366 if (it->second <= 0) {
367 any_removals = true;
368 current_range.erase(it);
369 break;
370 }
371 }
372 } while (any_removals);
373 }
374
375 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
376 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
377 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
378 }
379
380 void RunGarbageCollector();
381
382 void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size);
383
384 void BindHostIndexBuffer();
385
386 void BindHostVertexBuffers();
387
388 void BindHostDrawIndirectBuffers();
389
390 void BindHostGraphicsUniformBuffers(size_t stage);
391
392 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
393
394 void BindHostGraphicsStorageBuffers(size_t stage);
395
396 void BindHostGraphicsTextureBuffers(size_t stage);
397
398 void BindHostTransformFeedbackBuffers();
399
400 void BindHostComputeUniformBuffers();
401
402 void BindHostComputeStorageBuffers();
403
404 void BindHostComputeTextureBuffers();
405
406 void DoUpdateGraphicsBuffers(bool is_indexed);
407
408 void DoUpdateComputeBuffers();
409
410 void UpdateIndexBuffer();
411
412 void UpdateVertexBuffers();
413
414 void UpdateVertexBuffer(u32 index);
415
416 void UpdateDrawIndirect();
417
418 void UpdateUniformBuffers(size_t stage);
419
420 void UpdateStorageBuffers(size_t stage);
421
422 void UpdateTextureBuffers(size_t stage);
423
424 void UpdateTransformFeedbackBuffers();
425
426 void UpdateTransformFeedbackBuffer(u32 index);
427
428 void UpdateComputeUniformBuffers();
429
430 void UpdateComputeStorageBuffers();
431
432 void UpdateComputeTextureBuffers();
433
434 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
435
436 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
437
438 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
439
440 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
441
442 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
443
444 void Register(BufferId buffer_id);
445
446 void Unregister(BufferId buffer_id);
447
448 template <bool insert>
449 void ChangeRegister(BufferId buffer_id);
450
451 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
452
453 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
454
455 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
456
457 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
458
459 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
460 std::span<BufferCopy> copies);
461
462 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
463 std::span<const BufferCopy> copies);
464
465 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
466
467 void DownloadBufferMemory(Buffer& buffer_id);
468
469 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
470
471 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
472
473 void NotifyBufferDeletion();
474
475 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
476 bool is_written) const;
477
478 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
479 PixelFormat format);
480
481 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
482
483 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
484
485 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
486
487 void ClearDownload(IntervalType subtract_interval);
488
489 VideoCore::RasterizerInterface& rasterizer;
490 Core::Memory::Memory& cpu_memory;
491
492 SlotVector<Buffer> slot_buffers;
493 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
494
495 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
496
497 u32 last_index_count = 0;
498
499 Binding index_buffer;
500 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
501 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
502 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
503 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
504 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
505 Binding count_buffer_binding;
506 Binding indirect_buffer_binding;
507
508 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
509 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
510 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
511
512 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
513 u32 enabled_compute_uniform_buffer_mask = 0;
514
515 const UniformBufferSizes* uniform_buffer_sizes{};
516 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
517
518 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
519 std::array<u32, NUM_STAGES> written_storage_buffers{};
520 u32 enabled_compute_storage_buffers = 0;
521 u32 written_compute_storage_buffers = 0;
522
523 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
524 std::array<u32, NUM_STAGES> written_texture_buffers{};
525 std::array<u32, NUM_STAGES> image_texture_buffers{};
526 u32 enabled_compute_texture_buffers = 0;
527 u32 written_compute_texture_buffers = 0;
528 u32 image_compute_texture_buffers = 0;
529
530 std::array<u32, 16> uniform_cache_hits{};
531 std::array<u32, 16> uniform_cache_shots{};
532
533 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
534
535 bool has_deleted_buffers = false;
536
537 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
538 dirty_uniform_buffers{};
539 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
540 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
541 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
542 uniform_buffer_binding_sizes{};
543
544 std::vector<BufferId> cached_write_buffer_ids;
545
546 MemoryTracker memory_tracker;
547 IntervalSet uncommitted_ranges;
548 IntervalSet common_ranges;
549 IntervalSet cached_ranges;
550 IntervalSet pending_ranges;
551 std::deque<IntervalSet> committed_ranges;
552
553 // Async Buffers
554 OverlapCounter async_downloads;
555 std::deque<std::optional<Async_Buffer>> async_buffers;
556 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
557 std::optional<Async_Buffer> current_buffer;
558
559 std::deque<Async_Buffer> async_buffers_death_ring;
560
561 size_t immediate_buffer_capacity = 0;
562 Common::ScratchBuffer<u8> immediate_buffer_alloc;
563
564 struct LRUItemParams {
565 using ObjectType = BufferId;
566 using TickType = u64;
567 };
568 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
569 u64 frame_tick = 0;
570 u64 total_used_memory = 0;
571 u64 minimum_memory = 0;
572 u64 critical_memory = 0;
573 BufferId inline_buffer_id;
574
575 bool active_async_buffers = false;
576
577 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
578};
579
580} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
new file mode 100644
index 000000000..dc4ebfcaa
--- /dev/null
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -0,0 +1,273 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <deque>
9#include <limits>
10#include <type_traits>
11#include <unordered_set>
12#include <utility>
13
14#include "common/alignment.h"
15#include "common/common_types.h"
16#include "video_core/buffer_cache/word_manager.h"
17
18namespace VideoCommon {
19
20template <class RasterizerInterface>
21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39;
23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>;
30
31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {}
33 ~MemoryTrackerBase() = default;
34
35 /// Returns the inclusive CPU modified range in a begin end pair
36 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
37 u64 query_size) noexcept {
38 return IteratePairs<true>(
39 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
40 return manager->template ModifiedRegion<Type::CPU>(offset, size);
41 });
42 }
43
44 /// Returns the inclusive GPU modified range in a begin end pair
45 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
46 u64 query_size) noexcept {
47 return IteratePairs<false>(
48 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
49 return manager->template ModifiedRegion<Type::GPU>(offset, size);
50 });
51 }
52
53 /// Returns true if a region has been modified from the CPU
54 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
55 return IteratePages<true>(
56 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
57 return manager->template IsRegionModified<Type::CPU>(offset, size);
58 });
59 }
60
61 /// Returns true if a region has been modified from the GPU
62 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
63 return IteratePages<false>(
64 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
65 return manager->template IsRegionModified<Type::GPU>(offset, size);
66 });
67 }
68
69 /// Mark region as CPU modified, notifying the rasterizer about this change
70 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
71 IteratePages<true>(dirty_cpu_addr, query_size,
72 [](Manager* manager, u64 offset, size_t size) {
73 manager->template ChangeRegionState<Type::CPU, true>(
74 manager->GetCpuAddr() + offset, size);
75 });
76 }
77
78 /// Unmark region as CPU modified, notifying the rasterizer about this change
79 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
80 IteratePages<true>(dirty_cpu_addr, query_size,
81 [](Manager* manager, u64 offset, size_t size) {
82 manager->template ChangeRegionState<Type::CPU, false>(
83 manager->GetCpuAddr() + offset, size);
84 });
85 }
86
87 /// Mark region as modified from the host GPU
88 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
89 IteratePages<true>(dirty_cpu_addr, query_size,
90 [](Manager* manager, u64 offset, size_t size) {
91 manager->template ChangeRegionState<Type::GPU, true>(
92 manager->GetCpuAddr() + offset, size);
93 });
94 }
95
96 /// Unmark region as modified from the host GPU
97 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
98 IteratePages<true>(dirty_cpu_addr, query_size,
99 [](Manager* manager, u64 offset, size_t size) {
100 manager->template ChangeRegionState<Type::GPU, false>(
101 manager->GetCpuAddr() + offset, size);
102 });
103 }
104
105 /// Mark region as modified from the CPU
106 /// but don't mark it as modified until FlusHCachedWrites is called.
107 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) {
108 IteratePages<true>(
109 dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) {
110 const VAddr cpu_address = manager->GetCpuAddr() + offset;
111 manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size);
112 cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS));
113 });
114 }
115
116 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
117 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
118 IteratePages<false>(query_cpu_addr, query_size,
119 [](Manager* manager, [[maybe_unused]] u64 offset,
120 [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); });
121 }
122
123 void FlushCachedWrites() noexcept {
124 for (auto id : cached_pages) {
125 top_tier[id]->FlushCachedWrites();
126 }
127 cached_pages.clear();
128 }
129
130 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
131 template <typename Func>
132 void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
133 IteratePages<true>(query_cpu_range, query_size,
134 [&func](Manager* manager, u64 offset, size_t size) {
135 manager->template ForEachModifiedRange<Type::CPU, true>(
136 manager->GetCpuAddr() + offset, size, func);
137 });
138 }
139
140 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
141 template <typename Func>
142 void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) {
143 IteratePages<false>(query_cpu_range, query_size,
144 [&func, clear](Manager* manager, u64 offset, size_t size) {
145 if (clear) {
146 manager->template ForEachModifiedRange<Type::GPU, true>(
147 manager->GetCpuAddr() + offset, size, func);
148 } else {
149 manager->template ForEachModifiedRange<Type::GPU, false>(
150 manager->GetCpuAddr() + offset, size, func);
151 }
152 });
153 }
154
155 template <typename Func>
156 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) {
157 IteratePages<false>(query_cpu_range, query_size,
158 [&func](Manager* manager, u64 offset, size_t size) {
159 manager->template ForEachModifiedRange<Type::GPU, true>(
160 manager->GetCpuAddr() + offset, size, func);
161 });
162 }
163
164private:
165 template <bool create_region_on_fail, typename Func>
166 bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
167 using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
168 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
169 std::size_t remaining_size{size};
170 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
171 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
172 while (remaining_size > 0) {
173 const std::size_t copy_amount{
174 std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
175 auto* manager{top_tier[page_index]};
176 if (manager) {
177 if constexpr (BOOL_BREAK) {
178 if (func(manager, page_offset, copy_amount)) {
179 return true;
180 }
181 } else {
182 func(manager, page_offset, copy_amount);
183 }
184 } else if constexpr (create_region_on_fail) {
185 CreateRegion(page_index);
186 manager = top_tier[page_index];
187 if constexpr (BOOL_BREAK) {
188 if (func(manager, page_offset, copy_amount)) {
189 return true;
190 }
191 } else {
192 func(manager, page_offset, copy_amount);
193 }
194 }
195 page_index++;
196 page_offset = 0;
197 remaining_size -= copy_amount;
198 }
199 return false;
200 }
201
202 template <bool create_region_on_fail, typename Func>
203 std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) {
204 std::size_t remaining_size{size};
205 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
206 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
207 u64 begin = std::numeric_limits<u64>::max();
208 u64 end = 0;
209 while (remaining_size > 0) {
210 const std::size_t copy_amount{
211 std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
212 auto* manager{top_tier[page_index]};
213 const auto execute = [&] {
214 auto [new_begin, new_end] = func(manager, page_offset, copy_amount);
215 if (new_begin != 0 || new_end != 0) {
216 const u64 base_address = page_index << HIGHER_PAGE_BITS;
217 begin = std::min(new_begin + base_address, begin);
218 end = std::max(new_end + base_address, end);
219 }
220 };
221 if (manager) {
222 execute();
223 } else if constexpr (create_region_on_fail) {
224 CreateRegion(page_index);
225 manager = top_tier[page_index];
226 execute();
227 }
228 page_index++;
229 page_offset = 0;
230 remaining_size -= copy_amount;
231 }
232 if (begin < end) {
233 return std::make_pair(begin, end);
234 } else {
235 return std::make_pair(0ULL, 0ULL);
236 }
237 }
238
239 void CreateRegion(std::size_t page_index) {
240 const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
241 top_tier[page_index] = GetNewManager(base_cpu_addr);
242 }
243
244 Manager* GetNewManager(VAddr base_cpu_addess) {
245 const auto on_return = [&] {
246 auto* new_manager = free_managers.front();
247 new_manager->SetCpuAddress(base_cpu_addess);
248 free_managers.pop_front();
249 return new_manager;
250 };
251 if (!free_managers.empty()) {
252 return on_return();
253 }
254 manager_pool.emplace_back();
255 auto& last_pool = manager_pool.back();
256 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
257 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE);
258 free_managers.push_back(&last_pool[i]);
259 }
260 return on_return();
261 }
262
263 std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
264 std::deque<Manager*> free_managers;
265
266 std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
267
268 std::unordered_set<u32> cached_pages;
269
270 RasterizerInterface* rasterizer = nullptr;
271};
272
273} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
new file mode 100644
index 000000000..a42455045
--- /dev/null
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -0,0 +1,462 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <limits>
9#include <span>
10#include <utility>
11
12#include "common/alignment.h"
13#include "common/common_funcs.h"
14#include "common/common_types.h"
15#include "common/div_ceil.h"
16#include "core/memory.h"
17
18namespace VideoCommon {
19
20constexpr u64 PAGES_PER_WORD = 64;
21constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
22constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
23
24enum class Type {
25 CPU,
26 GPU,
27 CachedCPU,
28 Untracked,
29};
30
31/// Vector tracking modified pages tightly packed with small vector optimization
32template <size_t stack_words = 1>
33struct WordsArray {
34 /// Returns the pointer to the words state
35 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
36 return is_short ? stack.data() : heap;
37 }
38
39 /// Returns the pointer to the words state
40 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
41 return is_short ? stack.data() : heap;
42 }
43
44 std::array<u64, stack_words> stack{}; ///< Small buffers storage
45 u64* heap; ///< Not-small buffers pointer to the storage
46};
47
48template <size_t stack_words = 1>
49struct Words {
50 explicit Words() = default;
51 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
52 num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
53 if (IsShort()) {
54 cpu.stack.fill(~u64{0});
55 gpu.stack.fill(0);
56 cached_cpu.stack.fill(0);
57 untracked.stack.fill(~u64{0});
58 } else {
59 // Share allocation between CPU and GPU pages and set their default values
60 u64* const alloc = new u64[num_words * 4];
61 cpu.heap = alloc;
62 gpu.heap = alloc + num_words;
63 cached_cpu.heap = alloc + num_words * 2;
64 untracked.heap = alloc + num_words * 3;
65 std::fill_n(cpu.heap, num_words, ~u64{0});
66 std::fill_n(gpu.heap, num_words, 0);
67 std::fill_n(cached_cpu.heap, num_words, 0);
68 std::fill_n(untracked.heap, num_words, ~u64{0});
69 }
70 // Clean up tailing bits
71 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
72 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
73 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
74 const u64 last_word = (~u64{0} << shift) >> shift;
75 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
76 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
77 }
78
79 ~Words() {
80 Release();
81 }
82
83 Words& operator=(Words&& rhs) noexcept {
84 Release();
85 size_bytes = rhs.size_bytes;
86 num_words = rhs.num_words;
87 cpu = rhs.cpu;
88 gpu = rhs.gpu;
89 cached_cpu = rhs.cached_cpu;
90 untracked = rhs.untracked;
91 rhs.cpu.heap = nullptr;
92 return *this;
93 }
94
95 Words(Words&& rhs) noexcept
96 : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
97 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
98 rhs.cpu.heap = nullptr;
99 }
100
101 Words& operator=(const Words&) = delete;
102 Words(const Words&) = delete;
103
104 /// Returns true when the buffer fits in the small vector optimization
105 [[nodiscard]] bool IsShort() const noexcept {
106 return num_words <= stack_words;
107 }
108
109 /// Returns the number of words of the buffer
110 [[nodiscard]] size_t NumWords() const noexcept {
111 return num_words;
112 }
113
114 /// Release buffer resources
115 void Release() {
116 if (!IsShort()) {
117 // CPU written words is the base for the heap allocation
118 delete[] cpu.heap;
119 }
120 }
121
122 template <Type type>
123 std::span<u64> Span() noexcept {
124 if constexpr (type == Type::CPU) {
125 return std::span<u64>(cpu.Pointer(IsShort()), num_words);
126 } else if constexpr (type == Type::GPU) {
127 return std::span<u64>(gpu.Pointer(IsShort()), num_words);
128 } else if constexpr (type == Type::CachedCPU) {
129 return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words);
130 } else if constexpr (type == Type::Untracked) {
131 return std::span<u64>(untracked.Pointer(IsShort()), num_words);
132 }
133 }
134
135 template <Type type>
136 std::span<const u64> Span() const noexcept {
137 if constexpr (type == Type::CPU) {
138 return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
139 } else if constexpr (type == Type::GPU) {
140 return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
141 } else if constexpr (type == Type::CachedCPU) {
142 return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words);
143 } else if constexpr (type == Type::Untracked) {
144 return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
145 }
146 }
147
148 u64 size_bytes = 0;
149 size_t num_words = 0;
150 WordsArray<stack_words> cpu;
151 WordsArray<stack_words> gpu;
152 WordsArray<stack_words> cached_cpu;
153 WordsArray<stack_words> untracked;
154};
155
156template <class RasterizerInterface, size_t stack_words = 1>
157class WordManager {
158public:
159 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes)
160 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {}
161
162 explicit WordManager() = default;
163
164 void SetCpuAddress(VAddr new_cpu_addr) {
165 cpu_addr = new_cpu_addr;
166 }
167
168 VAddr GetCpuAddr() const {
169 return cpu_addr;
170 }
171
172 static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
173 constexpr size_t number_bits = sizeof(u64) * 8;
174 const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
175 u64 bits = (word >> page_start) << page_start;
176 bits = (bits << limit_page_end) >> limit_page_end;
177 return bits;
178 }
179
180 static std::pair<size_t, size_t> GetWordPage(VAddr address) {
181 const size_t converted_address = static_cast<size_t>(address);
182 const size_t word_number = converted_address / BYTES_PER_WORD;
183 const size_t amount_pages = converted_address % BYTES_PER_WORD;
184 return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
185 }
186
187 template <typename Func>
188 void IterateWords(size_t offset, size_t size, Func&& func) const {
189 using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
190 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
191 const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
192 const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
193 if (start >= SizeBytes() || end <= start) {
194 return;
195 }
196 auto [start_word, start_page] = GetWordPage(start);
197 auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
198 const size_t num_words = NumWords();
199 start_word = std::min(start_word, num_words);
200 end_word = std::min(end_word, num_words);
201 const size_t diff = end_word - start_word;
202 end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
203 end_word = std::min(end_word, num_words);
204 end_page += diff * PAGES_PER_WORD;
205 constexpr u64 base_mask{~0ULL};
206 for (size_t word_index = start_word; word_index < end_word; word_index++) {
207 const u64 mask = ExtractBits(base_mask, start_page, end_page);
208 start_page = 0;
209 end_page -= PAGES_PER_WORD;
210 if constexpr (BOOL_BREAK) {
211 if (func(word_index, mask)) {
212 return;
213 }
214 } else {
215 func(word_index, mask);
216 }
217 }
218 }
219
220 template <typename Func>
221 void IteratePages(u64 mask, Func&& func) const {
222 size_t offset = 0;
223 while (mask != 0) {
224 const size_t empty_bits = std::countr_zero(mask);
225 offset += empty_bits;
226 mask = mask >> empty_bits;
227
228 const size_t continuous_bits = std::countr_one(mask);
229 func(offset, continuous_bits);
230 mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
231 offset += continuous_bits;
232 }
233 }
234
235 /**
236 * Change the state of a range of pages
237 *
238 * @param dirty_addr Base address to mark or unmark as modified
239 * @param size Size in bytes to mark or unmark as modified
240 */
241 template <Type type, bool enable>
242 void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
243 std::span<u64> state_words = words.template Span<type>();
244 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
245 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
246 IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
247 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
248 NotifyRasterizer<!enable>(index, untracked_words[index], mask);
249 }
250 if constexpr (enable) {
251 state_words[index] |= mask;
252 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
253 untracked_words[index] |= mask;
254 }
255 if constexpr (type == Type::CPU) {
256 cached_words[index] &= ~mask;
257 }
258 } else {
259 if constexpr (type == Type::CPU) {
260 const u64 word = state_words[index] & mask;
261 cached_words[index] &= ~word;
262 }
263 state_words[index] &= ~mask;
264 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
265 untracked_words[index] &= ~mask;
266 }
267 }
268 });
269 }
270
271 /**
272 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
273 * needed. Call the given function on each turned off range.
274 *
275 * @param query_cpu_range Base CPU address to loop over
276 * @param size Size in bytes of the CPU range to loop over
277 * @param func Function to call for each turned off region
278 */
279 template <Type type, bool clear, typename Func>
280 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
281 static_assert(type != Type::Untracked);
282
283 std::span<u64> state_words = words.template Span<type>();
284 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
285 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
286 const size_t offset = query_cpu_range - cpu_addr;
287 bool pending = false;
288 size_t pending_offset{};
289 size_t pending_pointer{};
290 const auto release = [&]() {
291 func(cpu_addr + pending_offset * BYTES_PER_PAGE,
292 (pending_pointer - pending_offset) * BYTES_PER_PAGE);
293 };
294 IterateWords(offset, size, [&](size_t index, u64 mask) {
295 const u64 word = state_words[index] & mask;
296 if constexpr (clear) {
297 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
298 NotifyRasterizer<true>(index, untracked_words[index], mask);
299 }
300 state_words[index] &= ~mask;
301 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
302 untracked_words[index] &= ~mask;
303 }
304 if constexpr (type == Type::CPU) {
305 cached_words[index] &= ~word;
306 }
307 }
308 const size_t base_offset = index * PAGES_PER_WORD;
309 IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
310 const auto reset = [&]() {
311 pending_offset = base_offset + pages_offset;
312 pending_pointer = base_offset + pages_offset + pages_size;
313 };
314 if (!pending) {
315 reset();
316 pending = true;
317 return;
318 }
319 if (pending_pointer == base_offset + pages_offset) {
320 pending_pointer += pages_size;
321 return;
322 }
323 release();
324 reset();
325 });
326 });
327 if (pending) {
328 release();
329 }
330 }
331
332 /**
333 * Returns true when a region has been modified
334 *
335 * @param offset Offset in bytes from the start of the buffer
336 * @param size Size in bytes of the region to query for modifications
337 */
338 template <Type type>
339 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
340 static_assert(type != Type::Untracked);
341
342 const std::span<const u64> state_words = words.template Span<type>();
343 bool result = false;
344 IterateWords(offset, size, [&](size_t index, u64 mask) {
345 const u64 word = state_words[index] & mask;
346 if (word != 0) {
347 result = true;
348 return true;
349 }
350 return false;
351 });
352 return result;
353 }
354
355 /**
356 * Returns a begin end pair with the inclusive modified region
357 *
358 * @param offset Offset in bytes from the start of the buffer
359 * @param size Size in bytes of the region to query for modifications
360 */
361 template <Type type>
362 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
363 static_assert(type != Type::Untracked);
364 const std::span<const u64> state_words = words.template Span<type>();
365 u64 begin = std::numeric_limits<u64>::max();
366 u64 end = 0;
367 IterateWords(offset, size, [&](size_t index, u64 mask) {
368 const u64 word = state_words[index] & mask;
369 if (word == 0) {
370 return;
371 }
372 const u64 local_page_begin = std::countr_zero(word);
373 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
374 const u64 page_index = index * PAGES_PER_WORD;
375 begin = std::min(begin, page_index + local_page_begin);
376 end = page_index + local_page_end;
377 });
378 static constexpr std::pair<u64, u64> EMPTY{0, 0};
379 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
380 }
381
382 /// Returns the number of words of the manager
383 [[nodiscard]] size_t NumWords() const noexcept {
384 return words.NumWords();
385 }
386
387 /// Returns the size in bytes of the manager
388 [[nodiscard]] u64 SizeBytes() const noexcept {
389 return words.size_bytes;
390 }
391
392 /// Returns true when the buffer fits in the small vector optimization
393 [[nodiscard]] bool IsShort() const noexcept {
394 return words.IsShort();
395 }
396
397 void FlushCachedWrites() noexcept {
398 const u64 num_words = NumWords();
399 u64* const cached_words = Array<Type::CachedCPU>();
400 u64* const untracked_words = Array<Type::Untracked>();
401 u64* const cpu_words = Array<Type::CPU>();
402 for (u64 word_index = 0; word_index < num_words; ++word_index) {
403 const u64 cached_bits = cached_words[word_index];
404 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
405 untracked_words[word_index] |= cached_bits;
406 cpu_words[word_index] |= cached_bits;
407 cached_words[word_index] = 0;
408 }
409 }
410
411private:
412 template <Type type>
413 u64* Array() noexcept {
414 if constexpr (type == Type::CPU) {
415 return words.cpu.Pointer(IsShort());
416 } else if constexpr (type == Type::GPU) {
417 return words.gpu.Pointer(IsShort());
418 } else if constexpr (type == Type::CachedCPU) {
419 return words.cached_cpu.Pointer(IsShort());
420 } else if constexpr (type == Type::Untracked) {
421 return words.untracked.Pointer(IsShort());
422 }
423 }
424
425 template <Type type>
426 const u64* Array() const noexcept {
427 if constexpr (type == Type::CPU) {
428 return words.cpu.Pointer(IsShort());
429 } else if constexpr (type == Type::GPU) {
430 return words.gpu.Pointer(IsShort());
431 } else if constexpr (type == Type::CachedCPU) {
432 return words.cached_cpu.Pointer(IsShort());
433 } else if constexpr (type == Type::Untracked) {
434 return words.untracked.Pointer(IsShort());
435 }
436 }
437
438 /**
439 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
440 *
441 * @param word_index Index to the word to notify to the rasterizer
442 * @param current_bits Current state of the word
443 * @param new_bits New state of the word
444 *
445 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
446 */
447 template <bool add_to_rasterizer>
448 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
449 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
450 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
451 IteratePages(changed_bits, [&](size_t offset, size_t size) {
452 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
453 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1);
454 });
455 }
456
457 VAddr cpu_addr = 0;
458 RasterizerInterface* rasterizer = nullptr;
459 Words<stack_words> words;
460};
461
462} // namespace VideoCommon
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index 4e75f33ca..ab4f4d407 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -126,15 +126,14 @@ constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
126 PixelFormat::ASTC_2D_8X8_SRGB, 126 PixelFormat::ASTC_2D_8X8_SRGB,
127}; 127};
128 128
129// Missing formats: 129constexpr std::array VIEW_CLASS_ASTC_10x5_RGBA{
130// PixelFormat::ASTC_2D_10X5_UNORM 130 PixelFormat::ASTC_2D_10X5_UNORM,
131// PixelFormat::ASTC_2D_10X5_SRGB 131 PixelFormat::ASTC_2D_10X5_SRGB,
132 132};
133// Missing formats:
134// PixelFormat::ASTC_2D_10X6_SRGB
135 133
136constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{ 134constexpr std::array VIEW_CLASS_ASTC_10x6_RGBA{
137 PixelFormat::ASTC_2D_10X6_UNORM, 135 PixelFormat::ASTC_2D_10X6_UNORM,
136 PixelFormat::ASTC_2D_10X6_SRGB,
138}; 137};
139 138
140constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{ 139constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
@@ -147,9 +146,10 @@ constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
147 PixelFormat::ASTC_2D_10X10_SRGB, 146 PixelFormat::ASTC_2D_10X10_SRGB,
148}; 147};
149 148
150// Missing formats 149constexpr std::array VIEW_CLASS_ASTC_12x10_RGBA{
151// ASTC_2D_12X10_UNORM, 150 PixelFormat::ASTC_2D_12X10_UNORM,
152// ASTC_2D_12X10_SRGB, 151 PixelFormat::ASTC_2D_12X10_SRGB,
152};
153 153
154constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{ 154constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
155 PixelFormat::ASTC_2D_12X12_UNORM, 155 PixelFormat::ASTC_2D_12X12_UNORM,
@@ -229,9 +229,11 @@ constexpr Table MakeViewTable() {
229 EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA); 229 EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
230 EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA); 230 EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
231 EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA); 231 EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
232 EnableRange(view, VIEW_CLASS_ASTC_10x5_RGBA);
232 EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA); 233 EnableRange(view, VIEW_CLASS_ASTC_10x6_RGBA);
233 EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA); 234 EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
234 EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA); 235 EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
236 EnableRange(view, VIEW_CLASS_ASTC_12x10_RGBA);
235 EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA); 237 EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
236 return view; 238 return view;
237} 239}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 614d61db4..2f986097f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
4#include <cstring> 4#include <cstring>
5#include <optional> 5#include <optional>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/bit_util.h"
7#include "common/scope_exit.h" 8#include "common/scope_exit.h"
8#include "common/settings.h" 9#include "common/settings.h"
9#include "core/core.h" 10#include "core/core.h"
@@ -222,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
222} 223}
223 224
224void Maxwell3D::RefreshParametersImpl() { 225void Maxwell3D::RefreshParametersImpl() {
226 if (!Settings::IsGPULevelHigh()) {
227 return;
228 }
225 size_t current_index = 0; 229 size_t current_index = 0;
226 for (auto& segment : macro_segments) { 230 for (auto& segment : macro_segments) {
227 if (segment.first == 0) { 231 if (segment.first == 0) {
@@ -259,12 +263,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
259size_t Maxwell3D::EstimateIndexBufferSize() { 263size_t Maxwell3D::EstimateIndexBufferSize() {
260 GPUVAddr start_address = regs.index_buffer.StartAddress(); 264 GPUVAddr start_address = regs.index_buffer.StartAddress();
261 GPUVAddr end_address = regs.index_buffer.EndAddress(); 265 GPUVAddr end_address = regs.index_buffer.EndAddress();
262 static constexpr std::array<size_t, 4> max_sizes = { 266 static constexpr std::array<size_t, 3> max_sizes = {std::numeric_limits<u8>::max(),
263 std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), 267 std::numeric_limits<u16>::max(),
264 std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; 268 std::numeric_limits<u32>::max()};
265 const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); 269 const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
270 const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
266 return std::min<size_t>( 271 return std::min<size_t>(
267 memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / 272 memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
268 byte_size, 273 byte_size,
269 static_cast<size_t>(end_address - start_address)); 274 static_cast<size_t>(end_address - start_address));
270} 275}
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c390ac91b..3b2f6aab6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,13 +4,20 @@
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <condition_variable>
7#include <cstring> 8#include <cstring>
8#include <deque> 9#include <deque>
9#include <functional> 10#include <functional>
10#include <memory> 11#include <memory>
12#include <mutex>
13#include <thread>
11#include <queue> 14#include <queue>
12 15
13#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/microprofile.h"
18#include "common/scope_exit.h"
19#include "common/settings.h"
20#include "common/thread.h"
14#include "video_core/delayed_destruction_ring.h" 21#include "video_core/delayed_destruction_ring.h"
15#include "video_core/gpu.h" 22#include "video_core/gpu.h"
16#include "video_core/host1x/host1x.h" 23#include "video_core/host1x/host1x.h"
@@ -23,15 +30,26 @@ class FenceBase {
23public: 30public:
24 explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} 31 explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
25 32
33 bool IsStubbed() const {
34 return is_stubbed;
35 }
36
26protected: 37protected:
27 bool is_stubbed; 38 bool is_stubbed;
28}; 39};
29 40
30template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> 41template <typename Traits>
31class FenceManager { 42class FenceManager {
43 using TFence = typename Traits::FenceType;
44 using TTextureCache = typename Traits::TextureCacheType;
45 using TBufferCache = typename Traits::BufferCacheType;
46 using TQueryCache = typename Traits::QueryCacheType;
47 static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
48
32public: 49public:
33 /// Notify the fence manager about a new frame 50 /// Notify the fence manager about a new frame
34 void TickFrame() { 51 void TickFrame() {
52 std::unique_lock lock(ring_guard);
35 delayed_destruction_ring.Tick(); 53 delayed_destruction_ring.Tick();
36 } 54 }
37 55
@@ -46,17 +64,33 @@ public:
46 } 64 }
47 65
48 void SignalFence(std::function<void()>&& func) { 66 void SignalFence(std::function<void()>&& func) {
49 TryReleasePendingFences(); 67 rasterizer.InvalidateGPUCache();
68 bool delay_fence = Settings::IsGPULevelHigh();
69 if constexpr (!can_async_check) {
70 TryReleasePendingFences<false>();
71 }
50 const bool should_flush = ShouldFlush(); 72 const bool should_flush = ShouldFlush();
51 CommitAsyncFlushes(); 73 CommitAsyncFlushes();
52 uncommitted_operations.emplace_back(std::move(func));
53 CommitOperations();
54 TFence new_fence = CreateFence(!should_flush); 74 TFence new_fence = CreateFence(!should_flush);
55 fences.push(new_fence); 75 if constexpr (can_async_check) {
76 guard.lock();
77 }
78 if (delay_fence) {
79 uncommitted_operations.emplace_back(std::move(func));
80 }
81 pending_operations.emplace_back(std::move(uncommitted_operations));
56 QueueFence(new_fence); 82 QueueFence(new_fence);
83 if (!delay_fence) {
84 func();
85 }
86 fences.push(std::move(new_fence));
57 if (should_flush) { 87 if (should_flush) {
58 rasterizer.FlushCommands(); 88 rasterizer.FlushCommands();
59 } 89 }
90 if constexpr (can_async_check) {
91 guard.unlock();
92 cv.notify_all();
93 }
60 } 94 }
61 95
62 void SignalSyncPoint(u32 value) { 96 void SignalSyncPoint(u32 value) {
@@ -66,29 +100,30 @@ public:
66 } 100 }
67 101
68 void WaitPendingFences() { 102 void WaitPendingFences() {
69 while (!fences.empty()) { 103 if constexpr (!can_async_check) {
70 TFence& current_fence = fences.front(); 104 TryReleasePendingFences<true>();
71 if (ShouldWait()) {
72 WaitFence(current_fence);
73 }
74 PopAsyncFlushes();
75 auto operations = std::move(pending_operations.front());
76 pending_operations.pop_front();
77 for (auto& operation : operations) {
78 operation();
79 }
80 PopFence();
81 } 105 }
82 } 106 }
83 107
84protected: 108protected:
85 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 109 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
86 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, 110 TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
87 TQueryCache& query_cache_) 111 TQueryCache& query_cache_)
88 : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, 112 : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
89 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} 113 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
114 if constexpr (can_async_check) {
115 fence_thread =
116 std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
117 }
118 }
90 119
91 virtual ~FenceManager() = default; 120 virtual ~FenceManager() {
121 if constexpr (can_async_check) {
122 fence_thread.request_stop();
123 cv.notify_all();
124 fence_thread.join();
125 }
126 }
92 127
93 /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is 128 /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
94 /// true 129 /// true
@@ -104,15 +139,20 @@ protected:
104 Tegra::GPU& gpu; 139 Tegra::GPU& gpu;
105 Tegra::Host1x::SyncpointManager& syncpoint_manager; 140 Tegra::Host1x::SyncpointManager& syncpoint_manager;
106 TTextureCache& texture_cache; 141 TTextureCache& texture_cache;
107 TTBufferCache& buffer_cache; 142 TBufferCache& buffer_cache;
108 TQueryCache& query_cache; 143 TQueryCache& query_cache;
109 144
110private: 145private:
146 template <bool force_wait>
111 void TryReleasePendingFences() { 147 void TryReleasePendingFences() {
112 while (!fences.empty()) { 148 while (!fences.empty()) {
113 TFence& current_fence = fences.front(); 149 TFence& current_fence = fences.front();
114 if (ShouldWait() && !IsFenceSignaled(current_fence)) { 150 if (ShouldWait() && !IsFenceSignaled(current_fence)) {
115 return; 151 if constexpr (force_wait) {
152 WaitFence(current_fence);
153 } else {
154 return;
155 }
116 } 156 }
117 PopAsyncFlushes(); 157 PopAsyncFlushes();
118 auto operations = std::move(pending_operations.front()); 158 auto operations = std::move(pending_operations.front());
@@ -120,7 +160,49 @@ private:
120 for (auto& operation : operations) { 160 for (auto& operation : operations) {
121 operation(); 161 operation();
122 } 162 }
123 PopFence(); 163 {
164 std::unique_lock lock(ring_guard);
165 delayed_destruction_ring.Push(std::move(current_fence));
166 }
167 fences.pop();
168 }
169 }
170
171 void ReleaseThreadFunc(std::stop_token stop_token) {
172 std::string name = "GPUFencingThread";
173 MicroProfileOnThreadCreate(name.c_str());
174
175 // Cleanup
176 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
177
178 Common::SetCurrentThreadName(name.c_str());
179 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
180
181 TFence current_fence;
182 std::deque<std::function<void()>> current_operations;
183 while (!stop_token.stop_requested()) {
184 {
185 std::unique_lock lock(guard);
186 cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
187 if (stop_token.stop_requested()) [[unlikely]] {
188 return;
189 }
190 current_fence = std::move(fences.front());
191 current_operations = std::move(pending_operations.front());
192 fences.pop();
193 pending_operations.pop_front();
194 }
195 if (!current_fence->IsStubbed()) {
196 WaitFence(current_fence);
197 }
198 PopAsyncFlushes();
199 for (auto& operation : current_operations) {
200 operation();
201 }
202 {
203 std::unique_lock lock(ring_guard);
204 delayed_destruction_ring.Push(std::move(current_fence));
205 }
124 } 206 }
125 } 207 }
126 208
@@ -154,19 +236,16 @@ private:
154 query_cache.CommitAsyncFlushes(); 236 query_cache.CommitAsyncFlushes();
155 } 237 }
156 238
157 void PopFence() {
158 delayed_destruction_ring.Push(std::move(fences.front()));
159 fences.pop();
160 }
161
162 void CommitOperations() {
163 pending_operations.emplace_back(std::move(uncommitted_operations));
164 }
165
166 std::queue<TFence> fences; 239 std::queue<TFence> fences;
167 std::deque<std::function<void()>> uncommitted_operations; 240 std::deque<std::function<void()>> uncommitted_operations;
168 std::deque<std::deque<std::function<void()>>> pending_operations; 241 std::deque<std::deque<std::function<void()>>> pending_operations;
169 242
243 std::mutex guard;
244 std::mutex ring_guard;
245 std::condition_variable cv;
246
247 std::jthread fence_thread;
248
170 DelayedDestructionRing<TFence, 6> delayed_destruction_ring; 249 DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
171}; 250};
172 251
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 01fb5b546..7b2cde7a7 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -82,6 +82,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
82} 82}
83 83
84PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { 84PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
85 std::unique_lock<std::mutex> lock(guard);
85 return kind_map.GetValueAt(gpu_addr); 86 return kind_map.GetValueAt(gpu_addr);
86} 87}
87 88
@@ -160,7 +161,10 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
160 } 161 }
161 remaining_size -= big_page_size; 162 remaining_size -= big_page_size;
162 } 163 }
163 kind_map.Map(gpu_addr, gpu_addr + size, kind); 164 {
165 std::unique_lock<std::mutex> lock(guard);
166 kind_map.Map(gpu_addr, gpu_addr + size, kind);
167 }
164 return gpu_addr; 168 return gpu_addr;
165} 169}
166 170
@@ -553,6 +557,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
553} 557}
554 558
555size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { 559size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
560 std::unique_lock<std::mutex> lock(guard);
556 return kind_map.GetContinuousSizeFrom(gpu_addr); 561 return kind_map.GetContinuousSizeFrom(gpu_addr);
557} 562}
558 563
@@ -745,10 +750,10 @@ void MemoryManager::FlushCaching() {
745 return; 750 return;
746 } 751 }
747 accumulator->Callback([this](GPUVAddr addr, size_t size) { 752 accumulator->Callback([this](GPUVAddr addr, size_t size) {
748 GetSubmappedRangeImpl<false>(addr, size, page_stash); 753 GetSubmappedRangeImpl<false>(addr, size, page_stash2);
749 }); 754 });
750 rasterizer->InnerInvalidation(page_stash); 755 rasterizer->InnerInvalidation(page_stash2);
751 page_stash.clear(); 756 page_stash2.clear();
752 accumulator->Clear(); 757 accumulator->Clear();
753} 758}
754 759
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fbbe856c4..794535122 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -5,6 +5,7 @@
5 5
6#include <atomic> 6#include <atomic>
7#include <map> 7#include <map>
8#include <mutex>
8#include <optional> 9#include <optional>
9#include <vector> 10#include <vector>
10 11
@@ -215,6 +216,9 @@ private:
215 216
216 std::vector<u64> big_page_continuous; 217 std::vector<u64> big_page_continuous;
217 std::vector<std::pair<VAddr, std::size_t>> page_stash{}; 218 std::vector<std::pair<VAddr, std::size_t>> page_stash{};
219 std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
220
221 mutable std::mutex guard;
218 222
219 static constexpr size_t continuous_bits = 64; 223 static constexpr size_t continuous_bits = 64;
220 224
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 8906ba6d8..941de95c1 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -6,6 +6,7 @@
6#include <algorithm> 6#include <algorithm>
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <functional>
9#include <iterator> 10#include <iterator>
10#include <list> 11#include <list>
11#include <memory> 12#include <memory>
@@ -17,13 +18,19 @@
17 18
18#include "common/assert.h" 19#include "common/assert.h"
19#include "common/settings.h" 20#include "common/settings.h"
21#include "core/memory.h"
20#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
21#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
22#include "video_core/memory_manager.h" 24#include "video_core/memory_manager.h"
23#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h"
24 27
25namespace VideoCommon { 28namespace VideoCommon {
26 29
30using AsyncJobId = SlotId;
31
32static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
33
27template <class QueryCache, class HostCounter> 34template <class QueryCache, class HostCounter>
28class CounterStreamBase { 35class CounterStreamBase {
29public: 36public:
@@ -93,9 +100,13 @@ private:
93template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 100template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
94class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 101class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
95public: 102public:
96 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) 103 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
97 : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), 104 Core::Memory::Memory& cpu_memory_)
98 VideoCore::QueryType::SamplesPassed}}} {} 105 : rasterizer{rasterizer_},
106 cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
107 VideoCore::QueryType::SamplesPassed}}} {
108 (void)slot_async_jobs.insert(); // Null value
109 }
99 110
100 void InvalidateRegion(VAddr addr, std::size_t size) { 111 void InvalidateRegion(VAddr addr, std::size_t size) {
101 std::unique_lock lock{mutex}; 112 std::unique_lock lock{mutex};
@@ -126,10 +137,15 @@ public:
126 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 137 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
127 } 138 }
128 139
129 query->BindCounter(Stream(type).Current(), timestamp); 140 auto result = query->BindCounter(Stream(type).Current(), timestamp);
130 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 141 if (result) {
131 AsyncFlushQuery(*cpu_addr); 142 auto async_job_id = query->GetAsyncJob();
143 auto& async_job = slot_async_jobs[async_job_id];
144 async_job.collected = true;
145 async_job.value = *result;
146 query->SetAsyncJob(NULL_ASYNC_JOB_ID);
132 } 147 }
148 AsyncFlushQuery(query, timestamp, lock);
133 } 149 }
134 150
135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 151 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -173,15 +189,18 @@ public:
173 } 189 }
174 190
175 void CommitAsyncFlushes() { 191 void CommitAsyncFlushes() {
192 std::unique_lock lock{mutex};
176 committed_flushes.push_back(uncommitted_flushes); 193 committed_flushes.push_back(uncommitted_flushes);
177 uncommitted_flushes.reset(); 194 uncommitted_flushes.reset();
178 } 195 }
179 196
180 bool HasUncommittedFlushes() const { 197 bool HasUncommittedFlushes() const {
198 std::unique_lock lock{mutex};
181 return uncommitted_flushes != nullptr; 199 return uncommitted_flushes != nullptr;
182 } 200 }
183 201
184 bool ShouldWaitAsyncFlushes() const { 202 bool ShouldWaitAsyncFlushes() const {
203 std::unique_lock lock{mutex};
185 if (committed_flushes.empty()) { 204 if (committed_flushes.empty()) {
186 return false; 205 return false;
187 } 206 }
@@ -189,6 +208,7 @@ public:
189 } 208 }
190 209
191 void PopAsyncFlushes() { 210 void PopAsyncFlushes() {
211 std::unique_lock lock{mutex};
192 if (committed_flushes.empty()) { 212 if (committed_flushes.empty()) {
193 return; 213 return;
194 } 214 }
@@ -197,15 +217,25 @@ public:
197 committed_flushes.pop_front(); 217 committed_flushes.pop_front();
198 return; 218 return;
199 } 219 }
200 for (VAddr query_address : *flush_list) { 220 for (AsyncJobId async_job_id : *flush_list) {
201 FlushAndRemoveRegion(query_address, 4); 221 AsyncJob& async_job = slot_async_jobs[async_job_id];
222 if (!async_job.collected) {
223 FlushAndRemoveRegion(async_job.query_location, 2, true);
224 }
202 } 225 }
203 committed_flushes.pop_front(); 226 committed_flushes.pop_front();
204 } 227 }
205 228
206private: 229private:
230 struct AsyncJob {
231 bool collected = false;
232 u64 value = 0;
233 VAddr query_location = 0;
234 std::optional<u64> timestamp{};
235 };
236
207 /// Flushes a memory range to guest memory and removes it from the cache. 237 /// Flushes a memory range to guest memory and removes it from the cache.
208 void FlushAndRemoveRegion(VAddr addr, std::size_t size) { 238 void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
209 const u64 addr_begin = addr; 239 const u64 addr_begin = addr;
210 const u64 addr_end = addr_begin + size; 240 const u64 addr_end = addr_begin + size;
211 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { 241 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -226,7 +256,16 @@ private:
226 continue; 256 continue;
227 } 257 }
228 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); 258 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
229 query.Flush(); 259 AsyncJobId async_job_id = query.GetAsyncJob();
260 auto flush_result = query.Flush(async);
261 if (async_job_id == NULL_ASYNC_JOB_ID) {
262 ASSERT_MSG(false, "This should not be reachable at all");
263 continue;
264 }
265 AsyncJob& async_job = slot_async_jobs[async_job_id];
266 async_job.collected = true;
267 async_job.value = flush_result;
268 query.SetAsyncJob(NULL_ASYNC_JOB_ID);
230 } 269 }
231 std::erase_if(contents, in_range); 270 std::erase_if(contents, in_range);
232 } 271 }
@@ -253,26 +292,60 @@ private:
253 return found != std::end(contents) ? &*found : nullptr; 292 return found != std::end(contents) ? &*found : nullptr;
254 } 293 }
255 294
256 void AsyncFlushQuery(VAddr addr) { 295 void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
257 if (!uncommitted_flushes) { 296 std::unique_lock<std::recursive_mutex>& lock) {
258 uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); 297 const AsyncJobId new_async_job_id = slot_async_jobs.insert();
298 {
299 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
300 query->SetAsyncJob(new_async_job_id);
301 async_job.query_location = query->GetCpuAddr();
302 async_job.collected = false;
303
304 if (!uncommitted_flushes) {
305 uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
306 }
307 uncommitted_flushes->push_back(new_async_job_id);
259 } 308 }
260 uncommitted_flushes->push_back(addr); 309 lock.unlock();
310 std::function<void()> operation([this, new_async_job_id, timestamp] {
311 std::unique_lock local_lock{mutex};
312 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
313 u64 value = async_job.value;
314 VAddr address = async_job.query_location;
315 slot_async_jobs.erase(new_async_job_id);
316 local_lock.unlock();
317 if (timestamp) {
318 u64 timestamp_value = *timestamp;
319 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
320 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
321 rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
322 VideoCommon::CacheType::NoQueryCache);
323 } else {
324 u32 small_value = static_cast<u32>(value);
325 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
326 rasterizer.InvalidateRegion(address, sizeof(u32),
327 VideoCommon::CacheType::NoQueryCache);
328 }
329 });
330 rasterizer.SyncOperation(std::move(operation));
261 } 331 }
262 332
263 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; 333 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
264 static constexpr unsigned YUZU_PAGEBITS = 12; 334 static constexpr unsigned YUZU_PAGEBITS = 12;
265 335
336 SlotVector<AsyncJob> slot_async_jobs;
337
266 VideoCore::RasterizerInterface& rasterizer; 338 VideoCore::RasterizerInterface& rasterizer;
339 Core::Memory::Memory& cpu_memory;
267 340
268 std::recursive_mutex mutex; 341 mutable std::recursive_mutex mutex;
269 342
270 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; 343 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
271 344
272 std::array<CounterStream, VideoCore::NumQueryTypes> streams; 345 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
273 346
274 std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; 347 std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
275 std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; 348 std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
276}; 349};
277 350
278template <class QueryCache, class HostCounter> 351template <class QueryCache, class HostCounter>
@@ -291,12 +364,12 @@ public:
291 virtual ~HostCounterBase() = default; 364 virtual ~HostCounterBase() = default;
292 365
293 /// Returns the current value of the query. 366 /// Returns the current value of the query.
294 u64 Query() { 367 u64 Query(bool async = false) {
295 if (result) { 368 if (result) {
296 return *result; 369 return *result;
297 } 370 }
298 371
299 u64 value = BlockingQuery() + base_result; 372 u64 value = BlockingQuery(async) + base_result;
300 if (dependency) { 373 if (dependency) {
301 value += dependency->Query(); 374 value += dependency->Query();
302 dependency = nullptr; 375 dependency = nullptr;
@@ -317,7 +390,7 @@ public:
317 390
318protected: 391protected:
319 /// Returns the value of query from the backend API blocking as needed. 392 /// Returns the value of query from the backend API blocking as needed.
320 virtual u64 BlockingQuery() const = 0; 393 virtual u64 BlockingQuery(bool async = false) const = 0;
321 394
322private: 395private:
323 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. 396 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -340,26 +413,33 @@ public:
340 CachedQueryBase& operator=(const CachedQueryBase&) = delete; 413 CachedQueryBase& operator=(const CachedQueryBase&) = delete;
341 414
342 /// Flushes the query to guest memory. 415 /// Flushes the query to guest memory.
343 virtual void Flush() { 416 virtual u64 Flush(bool async = false) {
344 // When counter is nullptr it means that it's just been reset. We are supposed to write a 417 // When counter is nullptr it means that it's just been reset. We are supposed to write a
345 // zero in these cases. 418 // zero in these cases.
346 const u64 value = counter ? counter->Query() : 0; 419 const u64 value = counter ? counter->Query(async) : 0;
420 if (async) {
421 return value;
422 }
347 std::memcpy(host_ptr, &value, sizeof(u64)); 423 std::memcpy(host_ptr, &value, sizeof(u64));
348 424
349 if (timestamp) { 425 if (timestamp) {
350 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); 426 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
351 } 427 }
428 return value;
352 } 429 }
353 430
354 /// Binds a counter to this query. 431 /// Binds a counter to this query.
355 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { 432 std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
433 std::optional<u64> timestamp_) {
434 std::optional<u64> result{};
356 if (counter) { 435 if (counter) {
357 // If there's an old counter set it means the query is being rewritten by the game. 436 // If there's an old counter set it means the query is being rewritten by the game.
358 // To avoid losing the data forever, flush here. 437 // To avoid losing the data forever, flush here.
359 Flush(); 438 result = std::make_optional(Flush());
360 } 439 }
361 counter = std::move(counter_); 440 counter = std::move(counter_);
362 timestamp = timestamp_; 441 timestamp = timestamp_;
442 return result;
363 } 443 }
364 444
365 VAddr GetCpuAddr() const noexcept { 445 VAddr GetCpuAddr() const noexcept {
@@ -374,6 +454,14 @@ public:
374 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; 454 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
375 } 455 }
376 456
457 void SetAsyncJob(AsyncJobId assigned_async_job_) {
458 assigned_async_job = assigned_async_job_;
459 }
460
461 AsyncJobId GetAsyncJob() const {
462 return assigned_async_job;
463 }
464
377protected: 465protected:
378 /// Returns true when querying the counter may potentially block. 466 /// Returns true when querying the counter may potentially block.
379 bool WaitPending() const noexcept { 467 bool WaitPending() const noexcept {
@@ -389,6 +477,7 @@ private:
389 u8* host_ptr; ///< Writable host pointer. 477 u8* host_ptr; ///< Writable host pointer.
390 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. 478 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
391 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. 479 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
480 AsyncJobId assigned_async_job;
392}; 481};
393 482
394} // namespace VideoCommon 483} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..18d3c3ac0 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -8,6 +8,7 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/buffer_cache/memory_tracker_base.h"
11#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -200,6 +201,8 @@ private:
200struct BufferCacheParams { 201struct BufferCacheParams {
201 using Runtime = OpenGL::BufferCacheRuntime; 202 using Runtime = OpenGL::BufferCacheRuntime;
202 using Buffer = OpenGL::Buffer; 203 using Buffer = OpenGL::Buffer;
204 using Async_Buffer = u32;
205 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
203 206
204 static constexpr bool IS_OPENGL = true; 207 static constexpr bool IS_OPENGL = true;
205 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; 208 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
@@ -208,6 +211,7 @@ struct BufferCacheParams {
208 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 211 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
209 static constexpr bool USE_MEMORY_MAPS = false; 212 static constexpr bool USE_MEMORY_MAPS = false;
210 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; 213 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
214 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
211}; 215};
212 216
213using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 217using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
new file mode 100644
index 000000000..f15ae8e25
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_opengl/gl_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>;
9}
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 22ed16ebf..400c21981 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -108,7 +108,8 @@ bool IsASTCSupported() {
108 108
109[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) { 109[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
110 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 110 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
111 return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); 111 return nsight || HasExtension(extensions, "GL_EXT_debug_tool") ||
112 Settings::values.renderer_debug.GetValue();
112} 113}
113} // Anonymous namespace 114} // Anonymous namespace
114 115
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index f1446e732..e21b19dcc 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -30,7 +30,17 @@ private:
30}; 30};
31 31
32using Fence = std::shared_ptr<GLInnerFence>; 32using Fence = std::shared_ptr<GLInnerFence>;
33using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; 33
34struct FenceManagerParams {
35 using FenceType = Fence;
36 using BufferCacheType = BufferCache;
37 using TextureCacheType = TextureCache;
38 using QueryCacheType = QueryCache;
39
40 static constexpr bool HAS_ASYNC_CHECK = false;
41};
42
43using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
34 44
35class FenceManagerOpenGL final : public GenericFenceManager { 45class FenceManagerOpenGL final : public GenericFenceManager {
36public: 46public:
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 5070db441..99d7347f5 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
26 26
27} // Anonymous namespace 27} // Anonymous namespace
28 28
29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_) 29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
30 : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {} 30 : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
31 31
32QueryCache::~QueryCache() = default; 32QueryCache::~QueryCache() = default;
33 33
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
74 glEndQuery(GetTarget(type)); 74 glEndQuery(GetTarget(type));
75} 75}
76 76
77u64 HostCounter::BlockingQuery() const { 77u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
78 GLint64 value; 78 GLint64 value;
79 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); 79 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
80 return static_cast<u64>(value); 80 return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
96 return *this; 96 return *this;
97} 97}
98 98
99void CachedQuery::Flush() { 99u64 CachedQuery::Flush([[maybe_unused]] bool async) {
100 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. 100 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
101 // To avoid this disable and re-enable keeping the dependency stream. 101 // To avoid this disable and re-enable keeping the dependency stream.
102 // But we only have to do this if we have pending waits to be done. 102 // But we only have to do this if we have pending waits to be done.
@@ -106,11 +106,13 @@ void CachedQuery::Flush() {
106 stream.Update(false); 106 stream.Update(false);
107 } 107 }
108 108
109 VideoCommon::CachedQueryBase<HostCounter>::Flush(); 109 auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
110 110
111 if (slice_counter) { 111 if (slice_counter) {
112 stream.Update(true); 112 stream.Update(true);
113 } 113 }
114
115 return result;
114} 116}
115 117
116} // namespace OpenGL 118} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 14ce59990..872513f22 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28class QueryCache final 28class QueryCache final
29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 30public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_); 31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
32 ~QueryCache(); 32 ~QueryCache();
33 33
34 OGLQuery AllocateQuery(VideoCore::QueryType type); 34 OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
51 void EndQuery(); 51 void EndQuery();
52 52
53private: 53private:
54 u64 BlockingQuery() const override; 54 u64 BlockingQuery(bool async = false) const override;
55 55
56 QueryCache& cache; 56 QueryCache& cache;
57 const VideoCore::QueryType type; 57 const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
70 CachedQuery(const CachedQuery&) = delete; 70 CachedQuery(const CachedQuery&) = delete;
71 CachedQuery& operator=(const CachedQuery&) = delete; 71 CachedQuery& operator=(const CachedQuery&) = delete;
72 72
73 void Flush() override; 73 u64 Flush(bool async = false) override;
74 74
75private: 75private:
76 QueryCache* cache; 76 QueryCache* cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 90e35e307..0089b4b27 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
63 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 63 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
64 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, 64 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
65 state_tracker, gpu.ShaderNotify()), 65 state_tracker, gpu.ShaderNotify()),
66 query_cache(*this), accelerate_dma(buffer_cache, texture_cache), 66 query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
67 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 67 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
68 blit_image(program_manager_) {} 68 blit_image(program_manager_) {}
69 69
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
1287 } 1287 }
1288 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); 1288 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
1289 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; 1289 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
1290 const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing 1290 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
1291 : VideoCommon::ObtainBufferOperation::MarkAsWritten;
1292 const auto [buffer, offset] = 1291 const auto [buffer, offset] =
1293 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); 1292 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
1294 1293
@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
1299 if constexpr (IS_IMAGE_UPLOAD) { 1298 if constexpr (IS_IMAGE_UPLOAD) {
1300 image->UploadMemory(buffer->Handle(), offset, copy_span); 1299 image->UploadMemory(buffer->Handle(), offset, copy_span);
1301 } else { 1300 } else {
1302 image->DownloadMemory(buffer->Handle(), offset, copy_span); 1301 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
1302 buffer_operand.address, buffer_size);
1303 } 1303 }
1304 return true; 1304 return true;
1305} 1305}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 0b9c4a904..052456f61 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map,
803 803
804void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, 804void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
805 std::span<const VideoCommon::BufferImageCopy> copies) { 805 std::span<const VideoCommon::BufferImageCopy> copies) {
806 std::array buffer_handles{buffer_handle};
807 std::array buffer_offsets{buffer_offset};
808 DownloadMemory(buffer_handles, buffer_offsets, copies);
809}
810
811void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets,
812 std::span<const VideoCommon::BufferImageCopy> copies) {
806 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); 813 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
807 if (is_rescaled) { 814 if (is_rescaled) {
808 ScaleDown(); 815 ScaleDown();
809 } 816 }
810 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 817 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
811 glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); 818 for (size_t i = 0; i < buffer_handles.size(); i++) {
812 glPixelStorei(GL_PACK_ALIGNMENT, 1); 819 auto& buffer_handle = buffer_handles[i];
820 glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle);
821 glPixelStorei(GL_PACK_ALIGNMENT, 1);
813 822
814 u32 current_row_length = std::numeric_limits<u32>::max(); 823 u32 current_row_length = std::numeric_limits<u32>::max();
815 u32 current_image_height = std::numeric_limits<u32>::max(); 824 u32 current_image_height = std::numeric_limits<u32>::max();
816 825
817 for (const VideoCommon::BufferImageCopy& copy : copies) { 826 for (const VideoCommon::BufferImageCopy& copy : copies) {
818 if (copy.image_subresource.base_level >= gl_num_levels) { 827 if (copy.image_subresource.base_level >= gl_num_levels) {
819 continue; 828 continue;
820 } 829 }
821 if (current_row_length != copy.buffer_row_length) { 830 if (current_row_length != copy.buffer_row_length) {
822 current_row_length = copy.buffer_row_length; 831 current_row_length = copy.buffer_row_length;
823 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); 832 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
824 } 833 }
825 if (current_image_height != copy.buffer_image_height) { 834 if (current_image_height != copy.buffer_image_height) {
826 current_image_height = copy.buffer_image_height; 835 current_image_height = copy.buffer_image_height;
827 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); 836 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
837 }
838 CopyImageToBuffer(copy, buffer_offsets[i]);
828 } 839 }
829 CopyImageToBuffer(copy, buffer_offset);
830 } 840 }
831 if (is_rescaled) { 841 if (is_rescaled) {
832 ScaleUp(true); 842 ScaleUp(true);
@@ -851,9 +861,12 @@ GLuint Image::StorageHandle() noexcept {
851 case PixelFormat::ASTC_2D_8X5_SRGB: 861 case PixelFormat::ASTC_2D_8X5_SRGB:
852 case PixelFormat::ASTC_2D_5X4_SRGB: 862 case PixelFormat::ASTC_2D_5X4_SRGB:
853 case PixelFormat::ASTC_2D_5X5_SRGB: 863 case PixelFormat::ASTC_2D_5X5_SRGB:
864 case PixelFormat::ASTC_2D_10X5_SRGB:
865 case PixelFormat::ASTC_2D_10X6_SRGB:
854 case PixelFormat::ASTC_2D_10X8_SRGB: 866 case PixelFormat::ASTC_2D_10X8_SRGB:
855 case PixelFormat::ASTC_2D_6X6_SRGB: 867 case PixelFormat::ASTC_2D_6X6_SRGB:
856 case PixelFormat::ASTC_2D_10X10_SRGB: 868 case PixelFormat::ASTC_2D_10X10_SRGB:
869 case PixelFormat::ASTC_2D_12X10_SRGB:
857 case PixelFormat::ASTC_2D_12X12_SRGB: 870 case PixelFormat::ASTC_2D_12X12_SRGB:
858 case PixelFormat::ASTC_2D_8X6_SRGB: 871 case PixelFormat::ASTC_2D_8X6_SRGB:
859 case PixelFormat::ASTC_2D_6X5_SRGB: 872 case PixelFormat::ASTC_2D_6X5_SRGB:
@@ -1113,7 +1126,8 @@ bool Image::ScaleDown(bool ignore) {
1113 1126
1114ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, 1127ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
1115 ImageId image_id_, Image& image, const SlotVector<Image>&) 1128 ImageId image_id_, Image& image, const SlotVector<Image>&)
1116 : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { 1129 : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
1130 views{runtime.null_image_views} {
1117 const Device& device = runtime.device; 1131 const Device& device = runtime.device;
1118 if (True(image.flags & ImageFlagBits::Converted)) { 1132 if (True(image.flags & ImageFlagBits::Converted)) {
1119 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 1133 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
@@ -1204,12 +1218,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1204 1218
1205ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, 1219ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1206 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) 1220 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
1207 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, 1221 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
1208 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} 1222 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1209 1223
1210ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, 1224ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1211 const VideoCommon::ImageViewInfo& view_info) 1225 const VideoCommon::ImageViewInfo& view_info)
1212 : VideoCommon::ImageViewBase{info, view_info} {} 1226 : VideoCommon::ImageViewBase{info, view_info, 0} {}
1213 1227
1214ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) 1228ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
1215 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} 1229 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
@@ -1269,7 +1283,7 @@ GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
1269 ApplySwizzle(view.handle, format, casted_swizzle); 1283 ApplySwizzle(view.handle, format, casted_swizzle);
1270 } 1284 }
1271 if (set_object_label) { 1285 if (set_object_label) {
1272 const std::string name = VideoCommon::Name(*this); 1286 const std::string name = VideoCommon::Name(*this, gpu_addr);
1273 glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data()); 1287 glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
1274 } 1288 }
1275 return view.handle; 1289 return view.handle;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 911e4607a..1190999a8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -215,6 +215,9 @@ public:
215 void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, 215 void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
216 std::span<const VideoCommon::BufferImageCopy> copies); 216 std::span<const VideoCommon::BufferImageCopy> copies);
217 217
218 void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
219 std::span<const VideoCommon::BufferImageCopy> copies);
220
218 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); 221 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
219 222
220 GLuint StorageHandle() noexcept; 223 GLuint StorageHandle() noexcept;
@@ -311,7 +314,6 @@ private:
311 std::unique_ptr<StorageViews> storage_views; 314 std::unique_ptr<StorageViews> storage_views;
312 GLenum internal_format = GL_NONE; 315 GLenum internal_format = GL_NONE;
313 GLuint default_handle = 0; 316 GLuint default_handle = 0;
314 GPUVAddr gpu_addr = 0;
315 u32 buffer_size = 0; 317 u32 buffer_size = 0;
316 GLuint original_texture = 0; 318 GLuint original_texture = 0;
317 int num_samples = 0; 319 int num_samples = 0;
@@ -376,6 +378,7 @@ struct TextureCacheParams {
376 using Sampler = OpenGL::Sampler; 378 using Sampler = OpenGL::Sampler;
377 using Framebuffer = OpenGL::Framebuffer; 379 using Framebuffer = OpenGL::Framebuffer;
378 using AsyncBuffer = u32; 380 using AsyncBuffer = u32;
381 using BufferType = GLuint;
379}; 382};
380 383
381using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; 384using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ef1190e1f..c7dc7e0a1 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,10 +100,13 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
100 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM 100 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB 101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
102 {GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM 102 {GL_COMPRESSED_RGBA_ASTC_10x6_KHR}, // ASTC_2D_10X6_UNORM
103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR}, // ASTC_2D_10X6_SRGB
103 {GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM 104 {GL_COMPRESSED_RGBA_ASTC_10x5_KHR}, // ASTC_2D_10X5_UNORM
104 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB 105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR}, // ASTC_2D_10X5_SRGB
105 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM 106 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
106 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB 107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
108 {GL_COMPRESSED_RGBA_ASTC_12x10_KHR}, // ASTC_2D_12X10_UNORM
109 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR}, // ASTC_2D_12X10_SRGB
107 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM 110 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
108 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB 111 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
109 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM 112 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 5dce51be8..8853cf0f7 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -197,10 +197,13 @@ struct FormatTuple {
197 {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM 197 {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM
198 {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB 198 {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
199 {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM 199 {VK_FORMAT_ASTC_10x6_UNORM_BLOCK}, // ASTC_2D_10X6_UNORM
200 {VK_FORMAT_ASTC_10x6_SRGB_BLOCK}, // ASTC_2D_10X6_SRGB
200 {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM 201 {VK_FORMAT_ASTC_10x5_UNORM_BLOCK}, // ASTC_2D_10X5_UNORM
201 {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB 202 {VK_FORMAT_ASTC_10x5_SRGB_BLOCK}, // ASTC_2D_10X5_SRGB
202 {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM 203 {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM
203 {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB 204 {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
205 {VK_FORMAT_ASTC_12x10_UNORM_BLOCK}, // ASTC_2D_12X10_UNORM
206 {VK_FORMAT_ASTC_12x10_SRGB_BLOCK}, // ASTC_2D_12X10_SRGB
204 {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM 207 {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM
205 {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB 208 {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
206 {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM 209 {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2a8d9e377..8e31eba34 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -88,13 +88,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
88 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 88 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
89 Settings::values.renderer_debug.GetValue())), 89 Settings::values.renderer_debug.GetValue())),
90 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 90 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
91 surface(CreateSurface(instance, render_window)), 91 surface(CreateSurface(instance, render_window.GetWindowInfo())),
92 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), 92 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
93 state_tracker(), scheduler(device, state_tracker), 93 state_tracker(), scheduler(device, state_tracker),
94 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, 94 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
95 render_window.GetFramebufferLayout().height, false), 95 render_window.GetFramebufferLayout().height, false),
96 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, 96 present_manager(render_window, device, memory_allocator, scheduler, swapchain),
97 screen_info), 97 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
98 scheduler, screen_info),
98 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 99 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
99 state_tracker, scheduler) { 100 state_tracker, scheduler) {
100 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { 101 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
121 return; 122 return;
122 } 123 }
123 // Update screen info if the framebuffer size has changed. 124 // Update screen info if the framebuffer size has changed.
124 if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) { 125 screen_info.width = framebuffer->width;
125 screen_info.width = framebuffer->width; 126 screen_info.height = framebuffer->height;
126 screen_info.height = framebuffer->height; 127
127 }
128 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 128 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
129 const bool use_accelerated = 129 const bool use_accelerated =
130 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 130 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
131 const bool is_srgb = use_accelerated && screen_info.is_srgb; 131 const bool is_srgb = use_accelerated && screen_info.is_srgb;
132 RenderScreenshot(*framebuffer, use_accelerated); 132 RenderScreenshot(*framebuffer, use_accelerated);
133 133
134 bool has_been_recreated = false; 134 Frame* frame = present_manager.GetRenderFrame();
135 const auto recreate_swapchain = [&](u32 width, u32 height) { 135 blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
136 if (!has_been_recreated) { 136 scheduler.Flush(*frame->render_ready);
137 has_been_recreated = true; 137 present_manager.Present(frame);
138 scheduler.Finish();
139 }
140 swapchain.Create(width, height, is_srgb);
141 };
142
143 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
144 if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
145 swapchain.GetHeight() != layout.height) {
146 recreate_swapchain(layout.width, layout.height);
147 }
148 bool is_outdated;
149 do {
150 swapchain.AcquireNextImage();
151 is_outdated = swapchain.IsOutDated();
152 if (is_outdated) {
153 recreate_swapchain(layout.width, layout.height);
154 }
155 } while (is_outdated);
156 if (has_been_recreated) {
157 blit_screen.Recreate();
158 }
159 const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
160 const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
161 scheduler.Flush(render_semaphore, present_semaphore);
162 scheduler.WaitWorker();
163 swapchain.Present(render_semaphore);
164 138
165 gpu.RendererFrameEndNotify(); 139 gpu.RendererFrameEndNotify();
166 rasterizer.TickFrame(); 140 rasterizer.TickFrame();
@@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
246 }); 220 });
247 const VkExtent2D render_area{.width = layout.width, .height = layout.height}; 221 const VkExtent2D render_area{.width = layout.width, .height = layout.height};
248 const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); 222 const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
249 // Since we're not rendering to the screen, ignore the render semaphore. 223 blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
250 void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
251 224
252 const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); 225 const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
253 const VkBufferCreateInfo dst_buffer_info{ 226 const VkBufferCreateInfo dst_buffer_info{
@@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
270 .pNext = nullptr, 243 .pNext = nullptr,
271 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, 244 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
272 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 245 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
273 .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 246 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
274 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 247 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
275 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 248 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
276 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 249 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 009e75e0d..f44367cb2 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,6 +9,7 @@
9#include "common/dynamic_library.h" 9#include "common/dynamic_library.h"
10#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
11#include "video_core/renderer_vulkan/vk_blit_screen.h" 11#include "video_core/renderer_vulkan/vk_blit_screen.h"
12#include "video_core/renderer_vulkan/vk_present_manager.h"
12#include "video_core/renderer_vulkan/vk_rasterizer.h" 13#include "video_core/renderer_vulkan/vk_rasterizer.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -76,6 +77,7 @@ private:
76 StateTracker state_tracker; 77 StateTracker state_tracker;
77 Scheduler scheduler; 78 Scheduler scheduler;
78 Swapchain swapchain; 79 Swapchain swapchain;
80 PresentManager present_manager;
79 BlitScreen blit_screen; 81 BlitScreen blit_screen;
80 RasterizerVulkan rasterizer; 82 RasterizerVulkan rasterizer;
81 std::optional<TurboMode> turbo_mode; 83 std::optional<TurboMode> turbo_mode;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2f0cc27e8..1e0fdd3d9 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
122 122
123BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, 123BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
124 const Device& device_, MemoryAllocator& memory_allocator_, 124 const Device& device_, MemoryAllocator& memory_allocator_,
125 Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_) 125 Swapchain& swapchain_, PresentManager& present_manager_,
126 Scheduler& scheduler_, const ScreenInfo& screen_info_)
126 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, 127 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
127 memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, 128 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
128 image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 129 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
130 current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
129 resource_ticks.resize(image_count); 131 resource_ticks.resize(image_count);
130 132
131 CreateStaticResources(); 133 CreateStaticResources();
@@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
135BlitScreen::~BlitScreen() = default; 137BlitScreen::~BlitScreen() = default;
136 138
137void BlitScreen::Recreate() { 139void BlitScreen::Recreate() {
140 present_manager.WaitPresent();
141 scheduler.Finish();
142 device.GetLogical().WaitIdle();
138 CreateDynamicResources(); 143 CreateDynamicResources();
139} 144}
140 145
141VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, 146void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
142 const VkFramebuffer& host_framebuffer, 147 const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
143 const Layout::FramebufferLayout layout, VkExtent2D render_area, 148 VkExtent2D render_area, bool use_accelerated) {
144 bool use_accelerated) {
145 RefreshResources(framebuffer); 149 RefreshResources(framebuffer);
146 150
147 // Finish any pending renderpass 151 // Finish any pending renderpass
148 scheduler.RequestOutsideRenderPassOperationContext(); 152 scheduler.RequestOutsideRenderPassOperationContext();
149 153
150 if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
151 image_count = swapchain_images;
152 Recreate();
153 }
154
155 const std::size_t image_index = swapchain.GetImageIndex();
156
157 scheduler.Wait(resource_ticks[image_index]); 154 scheduler.Wait(resource_ticks[image_index]);
158 resource_ticks[image_index] = scheduler.CurrentTick(); 155 resource_ticks[image_index] = scheduler.CurrentTick();
159 156
@@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
169 std::memcpy(mapped_span.data(), &data, sizeof(data)); 166 std::memcpy(mapped_span.data(), &data, sizeof(data));
170 167
171 if (!use_accelerated) { 168 if (!use_accelerated) {
172 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 169 const u64 image_offset = GetRawImageOffset(framebuffer);
173 170
174 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 171 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
175 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 172 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
204 .depth = 1, 201 .depth = 1,
205 }, 202 },
206 }; 203 };
207 scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { 204 scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
208 const VkImage image = *raw_images[image_index]; 205 const VkImage image = *raw_images[index];
209 const VkImageMemoryBarrier base_barrier{ 206 const VkImageMemoryBarrier base_barrier{
210 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 207 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
211 .pNext = nullptr, 208 .pNext = nullptr,
@@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
245 242
246 const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); 243 const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
247 if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { 244 if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
248 UpdateAADescriptorSet(image_index, source_image_view, false); 245 UpdateAADescriptorSet(source_image_view, false);
249 const u32 up_scale = Settings::values.resolution_info.up_scale; 246 const u32 up_scale = Settings::values.resolution_info.up_scale;
250 const u32 down_shift = Settings::values.resolution_info.down_shift; 247 const u32 down_shift = Settings::values.resolution_info.down_shift;
251 VkExtent2D size{ 248 VkExtent2D size{
252 .width = (up_scale * framebuffer.width) >> down_shift, 249 .width = (up_scale * framebuffer.width) >> down_shift,
253 .height = (up_scale * framebuffer.height) >> down_shift, 250 .height = (up_scale * framebuffer.height) >> down_shift,
254 }; 251 };
255 scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { 252 scheduler.Record([this, index = image_index, size,
253 anti_alias_pass](vk::CommandBuffer cmdbuf) {
256 const VkImageMemoryBarrier base_barrier{ 254 const VkImageMemoryBarrier base_barrier{
257 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 255 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
258 .pNext = nullptr, 256 .pNext = nullptr,
@@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
326 324
327 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); 325 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
328 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, 326 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
329 aa_descriptor_sets[image_index], {}); 327 aa_descriptor_sets[index], {});
330 cmdbuf.Draw(4, 1, 0, 0); 328 cmdbuf.Draw(4, 1, 0, 0);
331 cmdbuf.EndRenderPass(); 329 cmdbuf.EndRenderPass();
332 330
@@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
369 }; 367 };
370 VkImageView fsr_image_view = 368 VkImageView fsr_image_view =
371 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); 369 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
372 UpdateDescriptorSet(image_index, fsr_image_view, true); 370 UpdateDescriptorSet(fsr_image_view, true);
373 } else { 371 } else {
374 const bool is_nn = 372 const bool is_nn =
375 Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; 373 Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
376 UpdateDescriptorSet(image_index, source_image_view, is_nn); 374 UpdateDescriptorSet(source_image_view, is_nn);
377 } 375 }
378 376
379 scheduler.Record( 377 scheduler.Record([this, host_framebuffer, index = image_index,
380 [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { 378 size = render_area](vk::CommandBuffer cmdbuf) {
381 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; 379 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
382 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; 380 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
383 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; 381 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
384 const VkClearValue clear_color{ 382 const VkClearValue clear_color{
385 .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, 383 .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
386 }; 384 };
387 const VkRenderPassBeginInfo renderpass_bi{ 385 const VkRenderPassBeginInfo renderpass_bi{
388 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 386 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
389 .pNext = nullptr, 387 .pNext = nullptr,
390 .renderPass = *renderpass, 388 .renderPass = *renderpass,
391 .framebuffer = host_framebuffer, 389 .framebuffer = host_framebuffer,
392 .renderArea = 390 .renderArea =
393 { 391 {
394 .offset = {0, 0}, 392 .offset = {0, 0},
395 .extent = size, 393 .extent = size,
396 }, 394 },
397 .clearValueCount = 1, 395 .clearValueCount = 1,
398 .pClearValues = &clear_color, 396 .pClearValues = &clear_color,
399 }; 397 };
400 const VkViewport viewport{ 398 const VkViewport viewport{
401 .x = 0.0f, 399 .x = 0.0f,
402 .y = 0.0f, 400 .y = 0.0f,
403 .width = static_cast<float>(size.width), 401 .width = static_cast<float>(size.width),
404 .height = static_cast<float>(size.height), 402 .height = static_cast<float>(size.height),
405 .minDepth = 0.0f, 403 .minDepth = 0.0f,
406 .maxDepth = 1.0f, 404 .maxDepth = 1.0f,
407 }; 405 };
408 const VkRect2D scissor{ 406 const VkRect2D scissor{
409 .offset = {0, 0}, 407 .offset = {0, 0},
410 .extent = size, 408 .extent = size,
411 }; 409 };
412 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 410 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
413 auto graphics_pipeline = [this]() { 411 auto graphics_pipeline = [this]() {
414 switch (Settings::values.scaling_filter.GetValue()) { 412 switch (Settings::values.scaling_filter.GetValue()) {
415 case Settings::ScalingFilter::NearestNeighbor: 413 case Settings::ScalingFilter::NearestNeighbor:
416 case Settings::ScalingFilter::Bilinear: 414 case Settings::ScalingFilter::Bilinear:
417 return *bilinear_pipeline; 415 return *bilinear_pipeline;
418 case Settings::ScalingFilter::Bicubic: 416 case Settings::ScalingFilter::Bicubic:
419 return *bicubic_pipeline; 417 return *bicubic_pipeline;
420 case Settings::ScalingFilter::Gaussian: 418 case Settings::ScalingFilter::Gaussian:
421 return *gaussian_pipeline; 419 return *gaussian_pipeline;
422 case Settings::ScalingFilter::ScaleForce: 420 case Settings::ScalingFilter::ScaleForce:
423 return *scaleforce_pipeline; 421 return *scaleforce_pipeline;
424 default: 422 default:
425 return *bilinear_pipeline; 423 return *bilinear_pipeline;
426 } 424 }
427 }(); 425 }();
428 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); 426 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
429 cmdbuf.SetViewport(0, viewport); 427 cmdbuf.SetViewport(0, viewport);
430 cmdbuf.SetScissor(0, scissor); 428 cmdbuf.SetScissor(0, scissor);
431 429
432 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); 430 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
433 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, 431 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
434 descriptor_sets[image_index], {}); 432 descriptor_sets[index], {});
435 cmdbuf.Draw(4, 1, 0, 0); 433 cmdbuf.Draw(4, 1, 0, 0);
436 cmdbuf.EndRenderPass(); 434 cmdbuf.EndRenderPass();
437 }); 435 });
438 return *semaphores[image_index];
439} 436}
440 437
441VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, 438void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
442 bool use_accelerated) { 439 bool use_accelerated, bool is_srgb) {
443 const std::size_t image_index = swapchain.GetImageIndex(); 440 // Recreate dynamic resources if the the image count or colorspace changed
444 const VkExtent2D render_area = swapchain.GetSize(); 441 if (const std::size_t swapchain_images = swapchain.GetImageCount();
442 swapchain_images != image_count || current_srgb != is_srgb) {
443 current_srgb = is_srgb;
444 image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
445 image_count = swapchain_images;
446 Recreate();
447 }
448
449 // Recreate the presentation frame if the dimensions of the window changed
445 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); 450 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
446 return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); 451 if (layout.width != frame->width || layout.height != frame->height ||
452 is_srgb != frame->is_srgb) {
453 Recreate();
454 present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
455 image_view_format, *renderpass);
456 }
457
458 const VkExtent2D render_area{frame->width, frame->height};
459 Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
460 if (++image_index >= image_count) {
461 image_index = 0;
462 }
447} 463}
448 464
449vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { 465vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() {
471} 487}
472 488
473void BlitScreen::CreateDynamicResources() { 489void BlitScreen::CreateDynamicResources() {
474 CreateSemaphores();
475 CreateDescriptorPool(); 490 CreateDescriptorPool();
476 CreateDescriptorSetLayout(); 491 CreateDescriptorSetLayout();
477 CreateDescriptorSets(); 492 CreateDescriptorSets();
478 CreatePipelineLayout(); 493 CreatePipelineLayout();
479 CreateRenderPass(); 494 CreateRenderPass();
480 CreateFramebuffers();
481 CreateGraphicsPipeline(); 495 CreateGraphicsPipeline();
482 fsr.reset(); 496 fsr.reset();
483 smaa.reset(); 497 smaa.reset();
@@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() {
525 } 539 }
526} 540}
527 541
528void BlitScreen::CreateSemaphores() {
529 semaphores.resize(image_count);
530 std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
531}
532
533void BlitScreen::CreateDescriptorPool() { 542void BlitScreen::CreateDescriptorPool() {
534 const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ 543 const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
535 { 544 {
@@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() {
571} 580}
572 581
573void BlitScreen::CreateRenderPass() { 582void BlitScreen::CreateRenderPass() {
574 renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); 583 renderpass = CreateRenderPassImpl(image_view_format);
575} 584}
576 585
577vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { 586vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
578 const VkAttachmentDescription color_attachment{ 587 const VkAttachmentDescription color_attachment{
579 .flags = 0, 588 .flags = 0,
580 .format = format, 589 .format = format,
@@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
584 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, 593 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
585 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, 594 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
586 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 595 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
587 .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, 596 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
588 }; 597 };
589 598
590 const VkAttachmentReference color_attachment_ref{ 599 const VkAttachmentReference color_attachment_ref{
@@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() {
1052 nn_sampler = device.GetLogical().CreateSampler(ci_nn); 1061 nn_sampler = device.GetLogical().CreateSampler(ci_nn);
1053} 1062}
1054 1063
1055void BlitScreen::CreateFramebuffers() {
1056 const VkExtent2D size{swapchain.GetSize()};
1057 framebuffers.resize(image_count);
1058
1059 for (std::size_t i = 0; i < image_count; ++i) {
1060 const VkImageView image_view{swapchain.GetImageViewIndex(i)};
1061 framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
1062 }
1063}
1064
1065void BlitScreen::ReleaseRawImages() { 1064void BlitScreen::ReleaseRawImages() {
1066 for (const u64 tick : resource_ticks) { 1065 for (const u64 tick : resource_ticks) {
1067 scheduler.Wait(tick); 1066 scheduler.Wait(tick);
@@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1175 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); 1174 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
1176 return; 1175 return;
1177 } 1176 }
1178 aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); 1177 aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
1179 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); 1178 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
1180 1179
1181 const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ 1180 const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1319 aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); 1318 aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
1320} 1319}
1321 1320
1322void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, 1321void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
1323 bool nn) const {
1324 const VkDescriptorImageInfo image_info{ 1322 const VkDescriptorImageInfo image_info{
1325 .sampler = nn ? *nn_sampler : *sampler, 1323 .sampler = nn ? *nn_sampler : *sampler,
1326 .imageView = image_view, 1324 .imageView = image_view,
@@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
1356 device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); 1354 device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
1357} 1355}
1358 1356
1359void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, 1357void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
1360 bool nn) const {
1361 const VkDescriptorBufferInfo buffer_info{ 1358 const VkDescriptorBufferInfo buffer_info{
1362 .buffer = *buffer, 1359 .buffer = *buffer,
1363 .offset = offsetof(BufferData, uniform), 1360 .offset = offsetof(BufferData, uniform),
@@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
1480 return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; 1477 return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
1481} 1478}
1482 1479
1483u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, 1480u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
1484 std::size_t image_index) const {
1485 constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); 1481 constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
1486 return first_image_offset + GetSizeInBytes(framebuffer) * image_index; 1482 return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
1487} 1483}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index ebe10b08b..68ec20253 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,6 +5,7 @@
5 5
6#include <memory> 6#include <memory>
7 7
8#include "core/frontend/framebuffer_layout.h"
8#include "video_core/vulkan_common/vulkan_memory_allocator.h" 9#include "video_core/vulkan_common/vulkan_memory_allocator.h"
9#include "video_core/vulkan_common/vulkan_wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
10 11
@@ -42,6 +43,9 @@ class RasterizerVulkan;
42class Scheduler; 43class Scheduler;
43class SMAA; 44class SMAA;
44class Swapchain; 45class Swapchain;
46class PresentManager;
47
48struct Frame;
45 49
46struct ScreenInfo { 50struct ScreenInfo {
47 VkImage image{}; 51 VkImage image{};
@@ -55,18 +59,17 @@ class BlitScreen {
55public: 59public:
56 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, 60 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
57 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, 61 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
58 Scheduler& scheduler, const ScreenInfo& screen_info); 62 PresentManager& present_manager, Scheduler& scheduler,
63 const ScreenInfo& screen_info);
59 ~BlitScreen(); 64 ~BlitScreen();
60 65
61 void Recreate(); 66 void Recreate();
62 67
63 [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, 68 void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
64 const VkFramebuffer& host_framebuffer, 69 const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
65 const Layout::FramebufferLayout layout, VkExtent2D render_area,
66 bool use_accelerated);
67 70
68 [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, 71 void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
69 bool use_accelerated); 72 bool use_accelerated, bool is_srgb);
70 73
71 [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, 74 [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
72 VkExtent2D extent); 75 VkExtent2D extent);
@@ -79,10 +82,9 @@ private:
79 82
80 void CreateStaticResources(); 83 void CreateStaticResources();
81 void CreateShaders(); 84 void CreateShaders();
82 void CreateSemaphores();
83 void CreateDescriptorPool(); 85 void CreateDescriptorPool();
84 void CreateRenderPass(); 86 void CreateRenderPass();
85 vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); 87 vk::RenderPass CreateRenderPassImpl(VkFormat format);
86 void CreateDescriptorSetLayout(); 88 void CreateDescriptorSetLayout();
87 void CreateDescriptorSets(); 89 void CreateDescriptorSets();
88 void CreatePipelineLayout(); 90 void CreatePipelineLayout();
@@ -90,15 +92,14 @@ private:
90 void CreateSampler(); 92 void CreateSampler();
91 93
92 void CreateDynamicResources(); 94 void CreateDynamicResources();
93 void CreateFramebuffers();
94 95
95 void RefreshResources(const Tegra::FramebufferConfig& framebuffer); 96 void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
96 void ReleaseRawImages(); 97 void ReleaseRawImages();
97 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); 98 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
98 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); 99 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
99 100
100 void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; 101 void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
101 void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; 102 void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
102 void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; 103 void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
103 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, 104 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
104 const Layout::FramebufferLayout layout) const; 105 const Layout::FramebufferLayout layout) const;
@@ -107,16 +108,17 @@ private:
107 void CreateFSR(); 108 void CreateFSR();
108 109
109 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; 110 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
110 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, 111 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
111 std::size_t image_index) const;
112 112
113 Core::Memory::Memory& cpu_memory; 113 Core::Memory::Memory& cpu_memory;
114 Core::Frontend::EmuWindow& render_window; 114 Core::Frontend::EmuWindow& render_window;
115 const Device& device; 115 const Device& device;
116 MemoryAllocator& memory_allocator; 116 MemoryAllocator& memory_allocator;
117 Swapchain& swapchain; 117 Swapchain& swapchain;
118 PresentManager& present_manager;
118 Scheduler& scheduler; 119 Scheduler& scheduler;
119 std::size_t image_count; 120 std::size_t image_count;
121 std::size_t image_index{};
120 const ScreenInfo& screen_info; 122 const ScreenInfo& screen_info;
121 123
122 vk::ShaderModule vertex_shader; 124 vk::ShaderModule vertex_shader;
@@ -135,7 +137,6 @@ private:
135 vk::Pipeline gaussian_pipeline; 137 vk::Pipeline gaussian_pipeline;
136 vk::Pipeline scaleforce_pipeline; 138 vk::Pipeline scaleforce_pipeline;
137 vk::RenderPass renderpass; 139 vk::RenderPass renderpass;
138 std::vector<vk::Framebuffer> framebuffers;
139 vk::DescriptorSets descriptor_sets; 140 vk::DescriptorSets descriptor_sets;
140 vk::Sampler nn_sampler; 141 vk::Sampler nn_sampler;
141 vk::Sampler sampler; 142 vk::Sampler sampler;
@@ -145,7 +146,6 @@ private:
145 146
146 std::vector<u64> resource_ticks; 147 std::vector<u64> resource_ticks;
147 148
148 std::vector<vk::Semaphore> semaphores;
149 std::vector<vk::Image> raw_images; 149 std::vector<vk::Image> raw_images;
150 std::vector<vk::ImageView> raw_image_views; 150 std::vector<vk::ImageView> raw_image_views;
151 std::vector<MemoryCommit> raw_buffer_commits; 151 std::vector<MemoryCommit> raw_buffer_commits;
@@ -164,6 +164,8 @@ private:
164 u32 raw_width = 0; 164 u32 raw_width = 0;
165 u32 raw_height = 0; 165 u32 raw_height = 0;
166 Service::android::PixelFormat pixel_format{}; 166 Service::android::PixelFormat pixel_format{};
167 bool current_srgb;
168 VkFormat image_view_format;
167 169
168 std::unique_ptr<FSR> fsr; 170 std::unique_ptr<FSR> fsr;
169 std::unique_ptr<SMAA> smaa; 171 std::unique_ptr<SMAA> smaa;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 9cbcb3c8f..510602e8e 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
314 return staging_pool.Request(size, MemoryUsage::Upload); 314 return staging_pool.Request(size, MemoryUsage::Upload);
315} 315}
316 316
317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { 317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
318 return staging_pool.Request(size, MemoryUsage::Download); 318 return staging_pool.Request(size, MemoryUsage::Download, deferred);
319}
320
321void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
322 staging_pool.FreeDeferred(ref);
319} 323}
320 324
321u64 BufferCacheRuntime::GetDeviceLocalMemory() const { 325u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..879f1ed94 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,7 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include "video_core/buffer_cache/buffer_cache.h" 6#include "video_core/buffer_cache/buffer_cache_base.h"
7#include "video_core/buffer_cache/memory_tracker_base.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
9#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -75,7 +76,9 @@ public:
75 76
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 77 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 78
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 79 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
80
81 void FreeDeferredStagingBuffer(StagingBufferRef& ref);
79 82
80 void PreCopyBarrier(); 83 void PreCopyBarrier();
81 84
@@ -142,6 +145,8 @@ private:
142struct BufferCacheParams { 145struct BufferCacheParams {
143 using Runtime = Vulkan::BufferCacheRuntime; 146 using Runtime = Vulkan::BufferCacheRuntime;
144 using Buffer = Vulkan::Buffer; 147 using Buffer = Vulkan::Buffer;
148 using Async_Buffer = Vulkan::StagingBufferRef;
149 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
145 150
146 static constexpr bool IS_OPENGL = false; 151 static constexpr bool IS_OPENGL = false;
147 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; 152 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
@@ -150,6 +155,7 @@ struct BufferCacheParams {
150 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 155 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
151 static constexpr bool USE_MEMORY_MAPS = true; 156 static constexpr bool USE_MEMORY_MAPS = true;
152 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; 157 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
158 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
153}; 159};
154 160
155using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 161using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
new file mode 100644
index 000000000..f9e271507
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_vulkan/vk_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>;
9}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 0214b103a..fad9e3832 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -5,6 +5,7 @@
5 5
6#include "video_core/renderer_vulkan/vk_buffer_cache.h" 6#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_fence_manager.h" 7#include "video_core/renderer_vulkan/vk_fence_manager.h"
8#include "video_core/renderer_vulkan/vk_query_cache.h"
8#include "video_core/renderer_vulkan/vk_scheduler.h" 9#include "video_core/renderer_vulkan/vk_scheduler.h"
9#include "video_core/renderer_vulkan/vk_texture_cache.h" 10#include "video_core/renderer_vulkan/vk_texture_cache.h"
10#include "video_core/vulkan_common/vulkan_device.h" 11#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7fe2afcd9..145359d4e 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -40,7 +40,16 @@ private:
40}; 40};
41using Fence = std::shared_ptr<InnerFence>; 41using Fence = std::shared_ptr<InnerFence>;
42 42
43using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; 43struct FenceManagerParams {
44 using FenceType = Fence;
45 using BufferCacheType = BufferCache;
46 using TextureCacheType = TextureCache;
47 using QueryCacheType = QueryCache;
48
49 static constexpr bool HAS_ASYNC_CHECK = true;
50};
51
52using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
44 53
45class FenceManager final : public GenericFenceManager { 54class FenceManager final : public GenericFenceManager {
46public: 55public:
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 985cc3203..a318d643e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -696,6 +696,13 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
696std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( 696std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
697 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, 697 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
698 PipelineStatistics* statistics, bool build_in_parallel) try { 698 PipelineStatistics* statistics, bool build_in_parallel) try {
699 // TODO: Remove this when Intel fixes their shader compiler.
700 // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
701 if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
702 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
703 return nullptr;
704 }
705
699 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); 706 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
700 707
701 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; 708 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
new file mode 100644
index 000000000..c49583013
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -0,0 +1,457 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/microprofile.h"
5#include "common/settings.h"
6#include "common/thread.h"
7#include "video_core/renderer_vulkan/vk_present_manager.h"
8#include "video_core/renderer_vulkan/vk_scheduler.h"
9#include "video_core/renderer_vulkan/vk_swapchain.h"
10#include "video_core/vulkan_common/vulkan_device.h"
11
12namespace Vulkan {
13
14MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
15MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
16
17namespace {
18
19bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
20 const VkFormatProperties props{physical_device.GetFormatProperties(format)};
21 return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
22}
23
24[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
25 return VkImageSubresourceLayers{
26 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
27 .mipLevel = 0,
28 .baseArrayLayer = 0,
29 .layerCount = 1,
30 };
31}
32
33[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
34 s32 swapchain_height) {
35 return VkImageBlit{
36 .srcSubresource = MakeImageSubresourceLayers(),
37 .srcOffsets =
38 {
39 {
40 .x = 0,
41 .y = 0,
42 .z = 0,
43 },
44 {
45 .x = frame_width,
46 .y = frame_height,
47 .z = 1,
48 },
49 },
50 .dstSubresource = MakeImageSubresourceLayers(),
51 .dstOffsets =
52 {
53 {
54 .x = 0,
55 .y = 0,
56 .z = 0,
57 },
58 {
59 .x = swapchain_width,
60 .y = swapchain_height,
61 .z = 1,
62 },
63 },
64 };
65}
66
67[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
68 u32 swapchain_height) {
69 return VkImageCopy{
70 .srcSubresource = MakeImageSubresourceLayers(),
71 .srcOffset =
72 {
73 .x = 0,
74 .y = 0,
75 .z = 0,
76 },
77 .dstSubresource = MakeImageSubresourceLayers(),
78 .dstOffset =
79 {
80 .x = 0,
81 .y = 0,
82 .z = 0,
83 },
84 .extent =
85 {
86 .width = std::min(frame_width, swapchain_width),
87 .height = std::min(frame_height, swapchain_height),
88 .depth = 1,
89 },
90 };
91}
92
93} // Anonymous namespace
94
95PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
96 MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
97 Swapchain& swapchain_)
98 : render_window{render_window_}, device{device_},
99 memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
100 blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
101 use_present_thread{Settings::values.async_presentation.GetValue()},
102 image_count{swapchain.GetImageCount()} {
103
104 auto& dld = device.GetLogical();
105 cmdpool = dld.CreateCommandPool({
106 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
107 .pNext = nullptr,
108 .flags =
109 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
110 .queueFamilyIndex = device.GetGraphicsFamily(),
111 });
112 auto cmdbuffers = cmdpool.Allocate(image_count);
113
114 frames.resize(image_count);
115 for (u32 i = 0; i < frames.size(); i++) {
116 Frame& frame = frames[i];
117 frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
118 frame.render_ready = dld.CreateSemaphore({
119 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
120 .pNext = nullptr,
121 .flags = 0,
122 });
123 frame.present_done = dld.CreateFence({
124 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
125 .pNext = nullptr,
126 .flags = VK_FENCE_CREATE_SIGNALED_BIT,
127 });
128 free_queue.push(&frame);
129 }
130
131 if (use_present_thread) {
132 present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
133 }
134}
135
136PresentManager::~PresentManager() = default;
137
138Frame* PresentManager::GetRenderFrame() {
139 MICROPROFILE_SCOPE(Vulkan_WaitPresent);
140
141 // Wait for free presentation frames
142 std::unique_lock lock{free_mutex};
143 free_cv.wait(lock, [this] { return !free_queue.empty(); });
144
145 // Take the frame from the queue
146 Frame* frame = free_queue.front();
147 free_queue.pop();
148
149 // Wait for the presentation to be finished so all frame resources are free
150 frame->present_done.Wait();
151 frame->present_done.Reset();
152
153 return frame;
154}
155
156void PresentManager::Present(Frame* frame) {
157 if (!use_present_thread) {
158 scheduler.WaitWorker();
159 CopyToSwapchain(frame);
160 free_queue.push(frame);
161 return;
162 }
163
164 scheduler.Record([this, frame](vk::CommandBuffer) {
165 std::unique_lock lock{queue_mutex};
166 present_queue.push(frame);
167 frame_cv.notify_one();
168 });
169}
170
171void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
172 VkFormat image_view_format, VkRenderPass rd) {
173 auto& dld = device.GetLogical();
174
175 frame->width = width;
176 frame->height = height;
177 frame->is_srgb = is_srgb;
178
179 frame->image = dld.CreateImage({
180 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
181 .pNext = nullptr,
182 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
183 .imageType = VK_IMAGE_TYPE_2D,
184 .format = swapchain.GetImageFormat(),
185 .extent =
186 {
187 .width = width,
188 .height = height,
189 .depth = 1,
190 },
191 .mipLevels = 1,
192 .arrayLayers = 1,
193 .samples = VK_SAMPLE_COUNT_1_BIT,
194 .tiling = VK_IMAGE_TILING_OPTIMAL,
195 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
196 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
197 .queueFamilyIndexCount = 0,
198 .pQueueFamilyIndices = nullptr,
199 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
200 });
201
202 frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
203
204 frame->image_view = dld.CreateImageView({
205 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
206 .pNext = nullptr,
207 .flags = 0,
208 .image = *frame->image,
209 .viewType = VK_IMAGE_VIEW_TYPE_2D,
210 .format = image_view_format,
211 .components =
212 {
213 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
214 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
215 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
216 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
217 },
218 .subresourceRange =
219 {
220 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
221 .baseMipLevel = 0,
222 .levelCount = 1,
223 .baseArrayLayer = 0,
224 .layerCount = 1,
225 },
226 });
227
228 const VkImageView image_view{*frame->image_view};
229 frame->framebuffer = dld.CreateFramebuffer({
230 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
231 .pNext = nullptr,
232 .flags = 0,
233 .renderPass = rd,
234 .attachmentCount = 1,
235 .pAttachments = &image_view,
236 .width = width,
237 .height = height,
238 .layers = 1,
239 });
240}
241
242void PresentManager::WaitPresent() {
243 if (!use_present_thread) {
244 return;
245 }
246
247 // Wait for the present queue to be empty
248 {
249 std::unique_lock queue_lock{queue_mutex};
250 frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
251 }
252
253 // The above condition will be satisfied when the last frame is taken from the queue.
254 // To ensure that frame has been presented as well take hold of the swapchain
255 // mutex.
256 std::scoped_lock swapchain_lock{swapchain_mutex};
257}
258
259void PresentManager::PresentThread(std::stop_token token) {
260 Common::SetCurrentThreadName("VulkanPresent");
261 while (!token.stop_requested()) {
262 std::unique_lock lock{queue_mutex};
263
264 // Wait for presentation frames
265 Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
266 if (token.stop_requested()) {
267 return;
268 }
269
270 // Take the frame and notify anyone waiting
271 Frame* frame = present_queue.front();
272 present_queue.pop();
273 frame_cv.notify_one();
274
275 // By exchanging the lock ownership we take the swapchain lock
276 // before the queue lock goes out of scope. This way the swapchain
277 // lock in WaitPresent is guaranteed to occur after here.
278 std::exchange(lock, std::unique_lock{swapchain_mutex});
279
280 CopyToSwapchain(frame);
281
282 // Free the frame for reuse
283 std::scoped_lock fl{free_mutex};
284 free_queue.push(frame);
285 free_cv.notify_one();
286 }
287}
288
289void PresentManager::CopyToSwapchain(Frame* frame) {
290 MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
291
292 const auto recreate_swapchain = [&] {
293 swapchain.Create(frame->width, frame->height, frame->is_srgb);
294 image_count = swapchain.GetImageCount();
295 };
296
297 // If the size or colorspace of the incoming frames has changed, recreate the swapchain
298 // to account for that.
299 const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
300 const bool size_changed =
301 swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
302 if (srgb_changed || size_changed) {
303 recreate_swapchain();
304 }
305
306 while (swapchain.AcquireNextImage()) {
307 recreate_swapchain();
308 }
309
310 const vk::CommandBuffer cmdbuf{frame->cmdbuf};
311 cmdbuf.Begin({
312 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
313 .pNext = nullptr,
314 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
315 .pInheritanceInfo = nullptr,
316 });
317
318 const VkImage image{swapchain.CurrentImage()};
319 const VkExtent2D extent = swapchain.GetExtent();
320 const std::array pre_barriers{
321 VkImageMemoryBarrier{
322 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
323 .pNext = nullptr,
324 .srcAccessMask = 0,
325 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
326 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
327 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
328 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
329 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
330 .image = image,
331 .subresourceRange{
332 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
333 .baseMipLevel = 0,
334 .levelCount = 1,
335 .baseArrayLayer = 0,
336 .layerCount = VK_REMAINING_ARRAY_LAYERS,
337 },
338 },
339 VkImageMemoryBarrier{
340 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
341 .pNext = nullptr,
342 .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
343 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
344 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
345 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
346 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
347 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
348 .image = *frame->image,
349 .subresourceRange{
350 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
351 .baseMipLevel = 0,
352 .levelCount = 1,
353 .baseArrayLayer = 0,
354 .layerCount = VK_REMAINING_ARRAY_LAYERS,
355 },
356 },
357 };
358 const std::array post_barriers{
359 VkImageMemoryBarrier{
360 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
361 .pNext = nullptr,
362 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
363 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
364 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
365 .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
366 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
367 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
368 .image = image,
369 .subresourceRange{
370 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
371 .baseMipLevel = 0,
372 .levelCount = 1,
373 .baseArrayLayer = 0,
374 .layerCount = VK_REMAINING_ARRAY_LAYERS,
375 },
376 },
377 VkImageMemoryBarrier{
378 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
379 .pNext = nullptr,
380 .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
381 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
382 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
383 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
384 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
385 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
386 .image = *frame->image,
387 .subresourceRange{
388 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
389 .baseMipLevel = 0,
390 .levelCount = 1,
391 .baseArrayLayer = 0,
392 .layerCount = VK_REMAINING_ARRAY_LAYERS,
393 },
394 },
395 };
396
397 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
398 {}, {}, pre_barriers);
399
400 if (blit_supported) {
401 cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
402 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
403 MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
404 VK_FILTER_LINEAR);
405 } else {
406 cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
407 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
408 MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
409 }
410
411 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
412 {}, {}, post_barriers);
413
414 cmdbuf.End();
415
416 const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
417 const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
418 const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
419
420 static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
421 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
422 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
423 };
424
425 const VkSubmitInfo submit_info{
426 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
427 .pNext = nullptr,
428 .waitSemaphoreCount = 2U,
429 .pWaitSemaphores = wait_semaphores.data(),
430 .pWaitDstStageMask = wait_stage_masks.data(),
431 .commandBufferCount = 1,
432 .pCommandBuffers = cmdbuf.address(),
433 .signalSemaphoreCount = 1U,
434 .pSignalSemaphores = &render_semaphore,
435 };
436
437 // Submit the image copy/blit to the swapchain
438 {
439 std::scoped_lock lock{scheduler.submit_mutex};
440 switch (const VkResult result =
441 device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
442 case VK_SUCCESS:
443 break;
444 case VK_ERROR_DEVICE_LOST:
445 device.ReportLoss();
446 [[fallthrough]];
447 default:
448 vk::Check(result);
449 break;
450 }
451 }
452
453 // Present
454 swapchain.Present(render_semaphore);
455}
456
457} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
new file mode 100644
index 000000000..420a775e2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -0,0 +1,83 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <condition_variable>
7#include <mutex>
8#include <queue>
9
10#include "common/common_types.h"
11#include "common/polyfill_thread.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14
15namespace Core::Frontend {
16class EmuWindow;
17} // namespace Core::Frontend
18
19namespace Vulkan {
20
21class Device;
22class Scheduler;
23class Swapchain;
24
25struct Frame {
26 u32 width;
27 u32 height;
28 bool is_srgb;
29 vk::Image image;
30 vk::ImageView image_view;
31 vk::Framebuffer framebuffer;
32 MemoryCommit image_commit;
33 vk::CommandBuffer cmdbuf;
34 vk::Semaphore render_ready;
35 vk::Fence present_done;
36};
37
38class PresentManager {
39public:
40 PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
41 MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
42 ~PresentManager();
43
44 /// Returns the last used presentation frame
45 Frame* GetRenderFrame();
46
47 /// Pushes a frame for presentation
48 void Present(Frame* frame);
49
50 /// Recreates the present frame to match the provided parameters
51 void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
52 VkFormat image_view_format, VkRenderPass rd);
53
54 /// Waits for the present thread to finish presenting all queued frames.
55 void WaitPresent();
56
57private:
58 void PresentThread(std::stop_token token);
59
60 void CopyToSwapchain(Frame* frame);
61
62private:
63 Core::Frontend::EmuWindow& render_window;
64 const Device& device;
65 MemoryAllocator& memory_allocator;
66 Scheduler& scheduler;
67 Swapchain& swapchain;
68 vk::CommandPool cmdpool;
69 std::vector<Frame> frames;
70 std::queue<Frame*> present_queue;
71 std::queue<Frame*> free_queue;
72 std::condition_variable_any frame_cv;
73 std::condition_variable free_cv;
74 std::mutex swapchain_mutex;
75 std::mutex queue_mutex;
76 std::mutex free_mutex;
77 std::jthread present_thread;
78 bool blit_supported;
79 bool use_present_thread;
80 std::size_t image_count;
81};
82
83} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 929c8ece6..d67490449 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
66 } 66 }
67} 67}
68 68
69QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, 69QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
70 Core::Memory::Memory& cpu_memory_, const Device& device_,
70 Scheduler& scheduler_) 71 Scheduler& scheduler_)
71 : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, 72 : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
72 query_pools{ 73 query_pools{
73 QueryPool{device_, scheduler_, QueryType::SamplesPassed}, 74 QueryPool{device_, scheduler_, QueryType::SamplesPassed},
74 } {} 75 } {}
@@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
98 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { 99 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
99 const vk::Device* logical = &cache.GetDevice().GetLogical(); 100 const vk::Device* logical = &cache.GetDevice().GetLogical();
100 cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { 101 cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
102 const bool use_precise = Settings::IsGPULevelHigh();
101 logical->ResetQueryPool(query.first, query.second, 1); 103 logical->ResetQueryPool(query.first, query.second, 1);
102 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); 104 cmdbuf.BeginQuery(query.first, query.second,
105 use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
103 }); 106 });
104} 107}
105 108
@@ -112,8 +115,10 @@ void HostCounter::EndQuery() {
112 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); 115 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
113} 116}
114 117
115u64 HostCounter::BlockingQuery() const { 118u64 HostCounter::BlockingQuery(bool async) const {
116 cache.GetScheduler().Wait(tick); 119 if (!async) {
120 cache.GetScheduler().Wait(tick);
121 }
117 u64 data; 122 u64 data;
118 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 123 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
119 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 124 query.first, query.second, 1, sizeof(data), &data, sizeof(data),
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 26762ee09..c1b9552eb 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,7 +52,8 @@ private:
52class QueryCache final 52class QueryCache final
53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
54public: 54public:
55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, 55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, const Device& device_,
56 Scheduler& scheduler_); 57 Scheduler& scheduler_);
57 ~QueryCache(); 58 ~QueryCache();
58 59
@@ -83,7 +84,7 @@ public:
83 void EndQuery(); 84 void EndQuery();
84 85
85private: 86private:
86 u64 BlockingQuery() const override; 87 u64 BlockingQuery(bool async = false) const override;
87 88
88 QueryCache& cache; 89 QueryCache& cache;
89 const VideoCore::QueryType type; 90 const VideoCore::QueryType type;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 673ab478e..d1489fc95 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
172 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 172 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
173 pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, 173 pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
174 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), 174 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
175 query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), 175 query_cache{*this, cpu_memory_, device, scheduler},
176 accelerate_dma(buffer_cache, texture_cache, scheduler),
176 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 177 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
177 wfi_event(device.GetLogical().CreateEvent()) { 178 wfi_event(device.GetLogical().CreateEvent()) {
178 scheduler.SetQueryCache(query_cache); 179 scheduler.SetQueryCache(query_cache);
@@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
675 const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; 676 const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
676 Maxwell::ReportSemaphore::Compare cmp; 677 Maxwell::ReportSemaphore::Compare cmp;
677 if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), 678 if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
678 VideoCommon::CacheType::BufferCache)) { 679 VideoCommon::CacheType::BufferCache |
680 VideoCommon::CacheType::QueryCache)) {
679 return true; 681 return true;
680 } 682 }
681 return false; 683 return false;
@@ -781,8 +783,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
781 } 783 }
782 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); 784 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
783 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; 785 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
784 const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing 786 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
785 : VideoCommon::ObtainBufferOperation::MarkAsWritten;
786 const auto [buffer, offset] = 787 const auto [buffer, offset] =
787 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); 788 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
788 789
@@ -793,7 +794,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
793 if constexpr (IS_IMAGE_UPLOAD) { 794 if constexpr (IS_IMAGE_UPLOAD) {
794 image->UploadMemory(buffer->Handle(), offset, copy_span); 795 image->UploadMemory(buffer->Handle(), offset, copy_span);
795 } else { 796 } else {
796 image->DownloadMemory(buffer->Handle(), offset, copy_span); 797 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
798 buffer_operand.address, buffer_size);
797 } 799 }
798 return true; 800 return true;
799} 801}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 057e16967..80455ec08 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
46 46
47Scheduler::~Scheduler() = default; 47Scheduler::~Scheduler() = default;
48 48
49void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 49u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
50 // When flushing, we only send data to the worker thread; no waiting is necessary. 50 // When flushing, we only send data to the worker thread; no waiting is necessary.
51 SubmitExecution(signal_semaphore, wait_semaphore); 51 const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
52 AllocateNewContext(); 52 AllocateNewContext();
53 return signal_value;
53} 54}
54 55
55void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 56void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() {
205 }); 206 });
206} 207}
207 208
208void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 209u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
209 EndPendingOperations(); 210 EndPendingOperations();
210 InvalidateState(); 211 InvalidateState();
211 212
@@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
217 on_submit(); 218 on_submit();
218 } 219 }
219 220
221 std::scoped_lock lock{submit_mutex};
220 switch (const VkResult result = master_semaphore->SubmitQueue( 222 switch (const VkResult result = master_semaphore->SubmitQueue(
221 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { 223 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
222 case VK_SUCCESS: 224 case VK_SUCCESS:
@@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
231 }); 233 });
232 chunk->MarkSubmit(); 234 chunk->MarkSubmit();
233 DispatchWork(); 235 DispatchWork();
236 return signal_value;
234} 237}
235 238
236void Scheduler::AllocateNewContext() { 239void Scheduler::AllocateNewContext() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 8d75ce987..475c682eb 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,7 +34,7 @@ public:
34 ~Scheduler(); 34 ~Scheduler();
35 35
36 /// Sends the current execution context to the GPU. 36 /// Sends the current execution context to the GPU.
37 void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); 37 u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
38 38
39 /// Sends the current execution context to the GPU and waits for it to complete. 39 /// Sends the current execution context to the GPU and waits for it to complete.
40 void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); 40 void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -106,6 +106,8 @@ public:
106 return *master_semaphore; 106 return *master_semaphore;
107 } 107 }
108 108
109 std::mutex submit_mutex;
110
109private: 111private:
110 class Command { 112 class Command {
111 public: 113 public:
@@ -201,7 +203,7 @@ private:
201 203
202 void AllocateWorkerCommandBuffer(); 204 void AllocateWorkerCommandBuffer();
203 205
204 void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); 206 u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
205 207
206 void AllocateNewContext(); 208 void AllocateNewContext();
207 209
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 85fdce6e5..1e80ce463 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -14,6 +14,7 @@
14#include "video_core/renderer_vulkan/vk_swapchain.h" 14#include "video_core/renderer_vulkan/vk_swapchain.h"
15#include "video_core/vulkan_common/vulkan_device.h" 15#include "video_core/vulkan_common/vulkan_device.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h" 16#include "video_core/vulkan_common/vulkan_wrapper.h"
17#include "vulkan/vulkan_core.h"
17 18
18namespace Vulkan { 19namespace Vulkan {
19 20
@@ -33,23 +34,47 @@ VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats)
33 return found != formats.end() ? *found : formats[0]; 34 return found != formats.end() ? *found : formats[0];
34} 35}
35 36
36VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { 37static constexpr VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox,
37 // Mailbox (triple buffering) doesn't lock the application like fifo (vsync), 38 bool has_fifo_relaxed) {
38 // prefer it if vsync option is not selected 39 // Mailbox doesn't lock the application like FIFO (vsync)
39 const auto found_mailbox = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); 40 // FIFO present mode locks the framerate to the monitor's refresh rate
40 if (Settings::values.fullscreen_mode.GetValue() == Settings::FullscreenMode::Borderless && 41 Settings::VSyncMode setting = [has_imm, has_mailbox]() {
41 found_mailbox != modes.end() && !Settings::values.use_vsync.GetValue()) { 42 // Choose Mailbox or Immediate if unlocked and those modes are supported
42 return VK_PRESENT_MODE_MAILBOX_KHR; 43 const auto mode = Settings::values.vsync_mode.GetValue();
43 } 44 if (Settings::values.use_speed_limit.GetValue()) {
44 if (!Settings::values.use_speed_limit.GetValue()) { 45 return mode;
45 // FIFO present mode locks the framerate to the monitor's refresh rate, 46 }
46 // Find an alternative to surpass this limitation if FPS is unlocked. 47 switch (mode) {
47 const auto found_imm = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_IMMEDIATE_KHR); 48 case Settings::VSyncMode::FIFO:
48 if (found_imm != modes.end()) { 49 case Settings::VSyncMode::FIFORelaxed:
49 return VK_PRESENT_MODE_IMMEDIATE_KHR; 50 if (has_mailbox) {
51 return Settings::VSyncMode::Mailbox;
52 } else if (has_imm) {
53 return Settings::VSyncMode::Immediate;
54 }
55 [[fallthrough]];
56 default:
57 return mode;
50 } 58 }
59 }();
60 if ((setting == Settings::VSyncMode::Mailbox && !has_mailbox) ||
61 (setting == Settings::VSyncMode::Immediate && !has_imm) ||
62 (setting == Settings::VSyncMode::FIFORelaxed && !has_fifo_relaxed)) {
63 setting = Settings::VSyncMode::FIFO;
64 }
65
66 switch (setting) {
67 case Settings::VSyncMode::Immediate:
68 return VK_PRESENT_MODE_IMMEDIATE_KHR;
69 case Settings::VSyncMode::Mailbox:
70 return VK_PRESENT_MODE_MAILBOX_KHR;
71 case Settings::VSyncMode::FIFO:
72 return VK_PRESENT_MODE_FIFO_KHR;
73 case Settings::VSyncMode::FIFORelaxed:
74 return VK_PRESENT_MODE_FIFO_RELAXED_KHR;
75 default:
76 return VK_PRESENT_MODE_FIFO_KHR;
51 } 77 }
52 return VK_PRESENT_MODE_FIFO_KHR;
53} 78}
54 79
55VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { 80VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
@@ -65,6 +90,18 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
65 return extent; 90 return extent;
66} 91}
67 92
93VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& capabilities) {
94 if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) {
95 return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
96 } else if (capabilities.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
97 return VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR;
98 } else {
99 LOG_ERROR(Render_Vulkan, "Unknown composite alpha flags value {:#x}",
100 capabilities.supportedCompositeAlpha);
101 return VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
102 }
103}
104
68} // Anonymous namespace 105} // Anonymous namespace
69 106
70Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, 107Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_,
@@ -87,18 +124,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
87 return; 124 return;
88 } 125 }
89 126
90 device.GetLogical().WaitIdle();
91 Destroy(); 127 Destroy();
92 128
93 CreateSwapchain(capabilities, srgb); 129 CreateSwapchain(capabilities, srgb);
94 CreateSemaphores(); 130 CreateSemaphores();
95 CreateImageViews();
96 131
97 resource_ticks.clear(); 132 resource_ticks.clear();
98 resource_ticks.resize(image_count); 133 resource_ticks.resize(image_count);
99} 134}
100 135
101void Swapchain::AcquireNextImage() { 136bool Swapchain::AcquireNextImage() {
102 const VkResult result = device.GetLogical().AcquireNextImageKHR( 137 const VkResult result = device.GetLogical().AcquireNextImageKHR(
103 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], 138 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
104 VK_NULL_HANDLE, &image_index); 139 VK_NULL_HANDLE, &image_index);
@@ -115,8 +150,11 @@ void Swapchain::AcquireNextImage() {
115 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); 150 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
116 break; 151 break;
117 } 152 }
153
118 scheduler.Wait(resource_ticks[image_index]); 154 scheduler.Wait(resource_ticks[image_index]);
119 resource_ticks[image_index] = scheduler.CurrentTick(); 155 resource_ticks[image_index] = scheduler.CurrentTick();
156
157 return is_suboptimal || is_outdated;
120} 158}
121 159
122void Swapchain::Present(VkSemaphore render_semaphore) { 160void Swapchain::Present(VkSemaphore render_semaphore) {
@@ -131,6 +169,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
131 .pImageIndices = &image_index, 169 .pImageIndices = &image_index,
132 .pResults = nullptr, 170 .pResults = nullptr,
133 }; 171 };
172 std::scoped_lock lock{scheduler.submit_mutex};
134 switch (const VkResult result = present_queue.Present(present_info)) { 173 switch (const VkResult result = present_queue.Present(present_info)) {
135 case VK_SUCCESS: 174 case VK_SUCCESS:
136 break; 175 break;
@@ -153,10 +192,17 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
153void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) { 192void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb) {
154 const auto physical_device{device.GetPhysical()}; 193 const auto physical_device{device.GetPhysical()};
155 const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; 194 const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
156 const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; 195 const auto present_modes = physical_device.GetSurfacePresentModesKHR(surface);
196 has_mailbox = std::find(present_modes.begin(), present_modes.end(),
197 VK_PRESENT_MODE_MAILBOX_KHR) != present_modes.end();
198 has_imm = std::find(present_modes.begin(), present_modes.end(),
199 VK_PRESENT_MODE_IMMEDIATE_KHR) != present_modes.end();
200 has_fifo_relaxed = std::find(present_modes.begin(), present_modes.end(),
201 VK_PRESENT_MODE_FIFO_RELAXED_KHR) != present_modes.end();
157 202
158 const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; 203 const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
159 present_mode = ChooseSwapPresentMode(present_modes); 204 surface_format = ChooseSwapSurfaceFormat(formats);
205 present_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed);
160 206
161 u32 requested_image_count{capabilities.minImageCount + 1}; 207 u32 requested_image_count{capabilities.minImageCount + 1};
162 // Ensure Triple buffering if possible. 208 // Ensure Triple buffering if possible.
@@ -180,12 +226,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
180 .imageColorSpace = surface_format.colorSpace, 226 .imageColorSpace = surface_format.colorSpace,
181 .imageExtent = {}, 227 .imageExtent = {},
182 .imageArrayLayers = 1, 228 .imageArrayLayers = 1,
183 .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 229 .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
184 .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, 230 .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
185 .queueFamilyIndexCount = 0, 231 .queueFamilyIndexCount = 0,
186 .pQueueFamilyIndices = nullptr, 232 .pQueueFamilyIndices = nullptr,
187 .preTransform = capabilities.currentTransform, 233 .preTransform = capabilities.currentTransform,
188 .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, 234 .compositeAlpha = alpha_flags,
189 .presentMode = present_mode, 235 .presentMode = present_mode,
190 .clipped = VK_FALSE, 236 .clipped = VK_FALSE,
191 .oldSwapchain = nullptr, 237 .oldSwapchain = nullptr,
@@ -217,7 +263,6 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
217 263
218 extent = swapchain_ci.imageExtent; 264 extent = swapchain_ci.imageExtent;
219 current_srgb = srgb; 265 current_srgb = srgb;
220 current_fps_unlocked = !Settings::values.use_speed_limit.GetValue();
221 266
222 images = swapchain.GetImages(); 267 images = swapchain.GetImages();
223 image_count = static_cast<u32>(images.size()); 268 image_count = static_cast<u32>(images.size());
@@ -228,56 +273,20 @@ void Swapchain::CreateSemaphores() {
228 present_semaphores.resize(image_count); 273 present_semaphores.resize(image_count);
229 std::ranges::generate(present_semaphores, 274 std::ranges::generate(present_semaphores,
230 [this] { return device.GetLogical().CreateSemaphore(); }); 275 [this] { return device.GetLogical().CreateSemaphore(); });
231} 276 render_semaphores.resize(image_count);
232 277 std::ranges::generate(render_semaphores,
233void Swapchain::CreateImageViews() { 278 [this] { return device.GetLogical().CreateSemaphore(); });
234 VkImageViewCreateInfo ci{
235 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
236 .pNext = nullptr,
237 .flags = 0,
238 .image = {},
239 .viewType = VK_IMAGE_VIEW_TYPE_2D,
240 .format = image_view_format,
241 .components =
242 {
243 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
244 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
245 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
246 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
247 },
248 .subresourceRange =
249 {
250 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
251 .baseMipLevel = 0,
252 .levelCount = 1,
253 .baseArrayLayer = 0,
254 .layerCount = 1,
255 },
256 };
257
258 image_views.resize(image_count);
259 for (std::size_t i = 0; i < image_count; i++) {
260 ci.image = images[i];
261 image_views[i] = device.GetLogical().CreateImageView(ci);
262 }
263} 279}
264 280
265void Swapchain::Destroy() { 281void Swapchain::Destroy() {
266 frame_index = 0; 282 frame_index = 0;
267 present_semaphores.clear(); 283 present_semaphores.clear();
268 framebuffers.clear();
269 image_views.clear();
270 swapchain.reset(); 284 swapchain.reset();
271} 285}
272 286
273bool Swapchain::HasFpsUnlockChanged() const {
274 return current_fps_unlocked != !Settings::values.use_speed_limit.GetValue();
275}
276
277bool Swapchain::NeedsPresentModeUpdate() const { 287bool Swapchain::NeedsPresentModeUpdate() const {
278 // Mailbox present mode is the ideal for all scenarios. If it is not available, 288 const auto requested_mode = ChooseSwapPresentMode(has_imm, has_mailbox, has_fifo_relaxed);
279 // A different present mode is needed to support unlocked FPS above the monitor's refresh rate. 289 return present_mode != requested_mode;
280 return present_mode != VK_PRESENT_MODE_MAILBOX_KHR && HasFpsUnlockChanged();
281} 290}
282 291
283} // namespace Vulkan 292} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index caf1ff32b..bf1ea7254 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -27,7 +27,7 @@ public:
27 void Create(u32 width, u32 height, bool srgb); 27 void Create(u32 width, u32 height, bool srgb);
28 28
29 /// Acquires the next image in the swapchain, waits as needed. 29 /// Acquires the next image in the swapchain, waits as needed.
30 void AcquireNextImage(); 30 bool AcquireNextImage();
31 31
32 /// Presents the rendered image to the swapchain. 32 /// Presents the rendered image to the swapchain.
33 void Present(VkSemaphore render_semaphore); 33 void Present(VkSemaphore render_semaphore);
@@ -52,6 +52,11 @@ public:
52 return is_suboptimal; 52 return is_suboptimal;
53 } 53 }
54 54
55 /// Returns true when the swapchain format is in the srgb color space
56 bool IsSrgb() const {
57 return current_srgb;
58 }
59
55 VkExtent2D GetSize() const { 60 VkExtent2D GetSize() const {
56 return extent; 61 return extent;
57 } 62 }
@@ -64,22 +69,34 @@ public:
64 return image_index; 69 return image_index;
65 } 70 }
66 71
72 std::size_t GetFrameIndex() const {
73 return frame_index;
74 }
75
67 VkImage GetImageIndex(std::size_t index) const { 76 VkImage GetImageIndex(std::size_t index) const {
68 return images[index]; 77 return images[index];
69 } 78 }
70 79
71 VkImageView GetImageViewIndex(std::size_t index) const { 80 VkImage CurrentImage() const {
72 return *image_views[index]; 81 return images[image_index];
73 } 82 }
74 83
75 VkFormat GetImageViewFormat() const { 84 VkFormat GetImageViewFormat() const {
76 return image_view_format; 85 return image_view_format;
77 } 86 }
78 87
88 VkFormat GetImageFormat() const {
89 return surface_format.format;
90 }
91
79 VkSemaphore CurrentPresentSemaphore() const { 92 VkSemaphore CurrentPresentSemaphore() const {
80 return *present_semaphores[frame_index]; 93 return *present_semaphores[frame_index];
81 } 94 }
82 95
96 VkSemaphore CurrentRenderSemaphore() const {
97 return *render_semaphores[frame_index];
98 }
99
83 u32 GetWidth() const { 100 u32 GetWidth() const {
84 return width; 101 return width;
85 } 102 }
@@ -88,6 +105,10 @@ public:
88 return height; 105 return height;
89 } 106 }
90 107
108 VkExtent2D GetExtent() const {
109 return extent;
110 }
111
91private: 112private:
92 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); 113 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
93 void CreateSemaphores(); 114 void CreateSemaphores();
@@ -95,8 +116,6 @@ private:
95 116
96 void Destroy(); 117 void Destroy();
97 118
98 bool HasFpsUnlockChanged() const;
99
100 bool NeedsPresentModeUpdate() const; 119 bool NeedsPresentModeUpdate() const;
101 120
102 const VkSurfaceKHR surface; 121 const VkSurfaceKHR surface;
@@ -107,10 +126,9 @@ private:
107 126
108 std::size_t image_count{}; 127 std::size_t image_count{};
109 std::vector<VkImage> images; 128 std::vector<VkImage> images;
110 std::vector<vk::ImageView> image_views;
111 std::vector<vk::Framebuffer> framebuffers;
112 std::vector<u64> resource_ticks; 129 std::vector<u64> resource_ticks;
113 std::vector<vk::Semaphore> present_semaphores; 130 std::vector<vk::Semaphore> present_semaphores;
131 std::vector<vk::Semaphore> render_semaphores;
114 132
115 u32 width; 133 u32 width;
116 u32 height; 134 u32 height;
@@ -121,9 +139,12 @@ private:
121 VkFormat image_view_format{}; 139 VkFormat image_view_format{};
122 VkExtent2D extent{}; 140 VkExtent2D extent{};
123 VkPresentModeKHR present_mode{}; 141 VkPresentModeKHR present_mode{};
142 VkSurfaceFormatKHR surface_format{};
143 bool has_imm{false};
144 bool has_mailbox{false};
145 bool has_fifo_relaxed{false};
124 146
125 bool current_srgb{}; 147 bool current_srgb{};
126 bool current_fps_unlocked{};
127 bool is_outdated{}; 148 bool is_outdated{};
128 bool is_suboptimal{}; 149 bool is_suboptimal{};
129}; 150};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ae15f6976..99dd1260a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1,10 +1,11 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include <algorithm> 4#include <algorithm>
5#include <array> 5#include <array>
6#include <span> 6#include <span>
7#include <vector> 7#include <vector>
8#include <boost/container/small_vector.hpp>
8 9
9#include "common/bit_cast.h" 10#include "common/bit_cast.h"
10#include "common/bit_util.h" 11#include "common/bit_util.h"
@@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
1343 1344
1344void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, 1345void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
1345 std::span<const VideoCommon::BufferImageCopy> copies) { 1346 std::span<const VideoCommon::BufferImageCopy> copies) {
1347 std::array buffer_handles{
1348 buffer,
1349 };
1350 std::array buffer_offsets{
1351 offset,
1352 };
1353 DownloadMemory(buffer_handles, buffer_offsets, copies);
1354}
1355
1356void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
1357 std::span<const VideoCommon::BufferImageCopy> copies) {
1346 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); 1358 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
1347 if (is_rescaled) { 1359 if (is_rescaled) {
1348 ScaleDown(); 1360 ScaleDown();
1349 } 1361 }
1350 std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); 1362 boost::container::small_vector<VkBuffer, 1> buffers_vector{};
1363 boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
1364 for (size_t index = 0; index < buffers_span.size(); index++) {
1365 buffers_vector.emplace_back(buffers_span[index]);
1366 vk_copies.emplace_back(
1367 TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
1368 }
1351 scheduler->RequestOutsideRenderPassOperationContext(); 1369 scheduler->RequestOutsideRenderPassOperationContext();
1352 scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask, 1370 scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
1353 vk_copies](vk::CommandBuffer cmdbuf) { 1371 aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
1354 const VkImageMemoryBarrier read_barrier{ 1372 const VkImageMemoryBarrier read_barrier{
1355 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 1373 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
1356 .pNext = nullptr, 1374 .pNext = nullptr,
@@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
1369 .layerCount = VK_REMAINING_ARRAY_LAYERS, 1387 .layerCount = VK_REMAINING_ARRAY_LAYERS,
1370 }, 1388 },
1371 }; 1389 };
1390 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1391 0, read_barrier);
1392
1393 for (size_t index = 0; index < buffers.size(); index++) {
1394 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
1395 vk_copies[index]);
1396 }
1397
1398 const VkMemoryBarrier memory_write_barrier{
1399 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
1400 .pNext = nullptr,
1401 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
1402 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
1403 };
1372 const VkImageMemoryBarrier image_write_barrier{ 1404 const VkImageMemoryBarrier image_write_barrier{
1373 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 1405 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
1374 .pNext = nullptr, 1406 .pNext = nullptr,
@@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
1387 .layerCount = VK_REMAINING_ARRAY_LAYERS, 1419 .layerCount = VK_REMAINING_ARRAY_LAYERS,
1388 }, 1420 },
1389 }; 1421 };
1390 const VkMemoryBarrier memory_write_barrier{
1391 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
1392 .pNext = nullptr,
1393 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
1394 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
1395 };
1396 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1397 0, read_barrier);
1398 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies);
1399 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 1422 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1400 0, memory_write_barrier, nullptr, image_write_barrier); 1423 0, memory_write_barrier, nullptr, image_write_barrier);
1401 }); 1424 });
@@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
1405} 1428}
1406 1429
1407void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 1430void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
1408 DownloadMemory(map.buffer, map.offset, copies); 1431 std::array buffers{
1432 map.buffer,
1433 };
1434 std::array offsets{
1435 map.offset,
1436 };
1437 DownloadMemory(buffers, offsets, copies);
1409} 1438}
1410 1439
1411bool Image::IsRescaled() const noexcept { 1440bool Image::IsRescaled() const noexcept {
@@ -1555,8 +1584,9 @@ bool Image::NeedsScaleHelper() const {
1555 1584
1556ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, 1585ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
1557 ImageId image_id_, Image& image) 1586 ImageId image_id_, Image& image)
1558 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, 1587 : VideoCommon::ImageViewBase{info, image.info, image_id_, image.gpu_addr},
1559 image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) { 1588 device{&runtime.device}, image_handle{image.Handle()},
1589 samples(ConvertSampleCount(image.info.num_samples)) {
1560 using Shader::TextureType; 1590 using Shader::TextureType;
1561 1591
1562 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); 1592 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
@@ -1602,7 +1632,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1602 } 1632 }
1603 vk::ImageView handle = device->GetLogical().CreateImageView(ci); 1633 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
1604 if (device->HasDebuggingToolAttached()) { 1634 if (device->HasDebuggingToolAttached()) {
1605 handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); 1635 handle.SetObjectNameEXT(VideoCommon::Name(*this, gpu_addr).c_str());
1606 } 1636 }
1607 image_views[static_cast<size_t>(tex_type)] = std::move(handle); 1637 image_views[static_cast<size_t>(tex_type)] = std::move(handle);
1608 }; 1638 };
@@ -1643,7 +1673,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1643 1673
1644ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, 1674ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1645 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) 1675 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
1646 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, 1676 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
1647 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} 1677 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1648 1678
1649ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) 1679ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params)
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d5ee23f8d..6f360177a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
@@ -141,6 +141,9 @@ public:
141 void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, 141 void DownloadMemory(VkBuffer buffer, VkDeviceSize offset,
142 std::span<const VideoCommon::BufferImageCopy> copies); 142 std::span<const VideoCommon::BufferImageCopy> copies);
143 143
144 void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
145 std::span<const VideoCommon::BufferImageCopy> copies);
146
144 void DownloadMemory(const StagingBufferRef& map, 147 void DownloadMemory(const StagingBufferRef& map,
145 std::span<const VideoCommon::BufferImageCopy> copies); 148 std::span<const VideoCommon::BufferImageCopy> copies);
146 149
@@ -262,7 +265,6 @@ private:
262 VkImage image_handle = VK_NULL_HANDLE; 265 VkImage image_handle = VK_NULL_HANDLE;
263 VkImageView render_target = VK_NULL_HANDLE; 266 VkImageView render_target = VK_NULL_HANDLE;
264 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 267 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
265 GPUVAddr gpu_addr = 0;
266 u32 buffer_size = 0; 268 u32 buffer_size = 0;
267}; 269};
268 270
@@ -371,6 +373,7 @@ struct TextureCacheParams {
371 using Sampler = Vulkan::Sampler; 373 using Sampler = Vulkan::Sampler;
372 using Framebuffer = Vulkan::Framebuffer; 374 using Framebuffer = Vulkan::Framebuffer;
373 using AsyncBuffer = Vulkan::StagingBufferRef; 375 using AsyncBuffer = Vulkan::StagingBufferRef;
376 using BufferType = VkBuffer;
374}; 377};
375 378
376using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; 379using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 009dab0b6..0630ebda5 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -14,13 +14,18 @@ namespace Vulkan {
14 14
15UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) 15UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
16 : device{device_}, scheduler{scheduler_} { 16 : device{device_}, scheduler{scheduler_} {
17 payload_start = payload.data();
17 payload_cursor = payload.data(); 18 payload_cursor = payload.data();
18} 19}
19 20
20UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; 21UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
21 22
22void UpdateDescriptorQueue::TickFrame() { 23void UpdateDescriptorQueue::TickFrame() {
23 payload_cursor = payload.data(); 24 if (++frame_index >= FRAMES_IN_FLIGHT) {
25 frame_index = 0;
26 }
27 payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
28 payload_cursor = payload_start;
24} 29}
25 30
26void UpdateDescriptorQueue::Acquire() { 31void UpdateDescriptorQueue::Acquire() {
@@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() {
28 // This is the maximum number of entries a single draw call might use. 33 // This is the maximum number of entries a single draw call might use.
29 static constexpr size_t MIN_ENTRIES = 0x400; 34 static constexpr size_t MIN_ENTRIES = 0x400;
30 35
31 if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { 36 if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
32 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 37 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
33 scheduler.WaitWorker(); 38 scheduler.WaitWorker();
34 payload_cursor = payload.data(); 39 payload_cursor = payload_start;
35 } 40 }
36 upload_start = payload_cursor; 41 upload_start = payload_cursor;
37} 42}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 625bcc809..1c1a7020b 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
29}; 29};
30 30
31class UpdateDescriptorQueue final { 31class UpdateDescriptorQueue final {
32 // This should be plenty for the vast majority of cases. Most desktop platforms only
33 // provide up to 3 swapchain images.
34 static constexpr size_t FRAMES_IN_FLIGHT = 5;
35 static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000;
36 static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
37
32public: 38public:
33 explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); 39 explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
34 ~UpdateDescriptorQueue(); 40 ~UpdateDescriptorQueue();
@@ -73,9 +79,11 @@ private:
73 const Device& device; 79 const Device& device;
74 Scheduler& scheduler; 80 Scheduler& scheduler;
75 81
82 size_t frame_index{0};
76 DescriptorUpdateEntry* payload_cursor = nullptr; 83 DescriptorUpdateEntry* payload_cursor = nullptr;
84 DescriptorUpdateEntry* payload_start = nullptr;
77 const DescriptorUpdateEntry* upload_start = nullptr; 85 const DescriptorUpdateEntry* upload_start = nullptr;
78 std::array<DescriptorUpdateEntry, 0x10000> payload; 86 std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
79}; 87};
80 88
81} // namespace Vulkan 89} // namespace Vulkan
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index d9482371b..c5213875b 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -228,14 +228,14 @@ const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu
228 auto info = std::make_unique<ShaderInfo>(); 228 auto info = std::make_unique<ShaderInfo>();
229 if (const std::optional<u64> cached_hash{env.Analyze()}) { 229 if (const std::optional<u64> cached_hash{env.Analyze()}) {
230 info->unique_hash = *cached_hash; 230 info->unique_hash = *cached_hash;
231 info->size_bytes = env.CachedSize(); 231 info->size_bytes = env.CachedSizeBytes();
232 } else { 232 } else {
233 // Slow path, not really hit on commercial games 233 // Slow path, not really hit on commercial games
234 // Build a control flow graph to get the real shader size 234 // Build a control flow graph to get the real shader size
235 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; 235 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
236 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; 236 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
237 info->unique_hash = env.CalculateHash(); 237 info->unique_hash = env.CalculateHash();
238 info->size_bytes = env.ReadSize(); 238 info->size_bytes = env.ReadSizeBytes();
239 } 239 }
240 const size_t size_bytes{info->size_bytes}; 240 const size_t size_bytes{info->size_bytes};
241 const ShaderInfo* const result{info.get()}; 241 const ShaderInfo* const result{info.get()};
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 574760f80..c7cb56243 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -170,15 +170,19 @@ std::optional<u64> GenericEnvironment::Analyze() {
170void GenericEnvironment::SetCachedSize(size_t size_bytes) { 170void GenericEnvironment::SetCachedSize(size_t size_bytes) {
171 cached_lowest = start_address; 171 cached_lowest = start_address;
172 cached_highest = start_address + static_cast<u32>(size_bytes); 172 cached_highest = start_address + static_cast<u32>(size_bytes);
173 code.resize(CachedSize()); 173 code.resize(CachedSizeWords());
174 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); 174 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
175} 175}
176 176
177size_t GenericEnvironment::CachedSize() const noexcept { 177size_t GenericEnvironment::CachedSizeWords() const noexcept {
178 return cached_highest - cached_lowest + INST_SIZE; 178 return CachedSizeBytes() / INST_SIZE;
179} 179}
180 180
181size_t GenericEnvironment::ReadSize() const noexcept { 181size_t GenericEnvironment::CachedSizeBytes() const noexcept {
182 return static_cast<size_t>(cached_highest) - cached_lowest + INST_SIZE;
183}
184
185size_t GenericEnvironment::ReadSizeBytes() const noexcept {
182 return read_highest - read_lowest + INST_SIZE; 186 return read_highest - read_lowest + INST_SIZE;
183} 187}
184 188
@@ -187,7 +191,7 @@ bool GenericEnvironment::CanBeSerialized() const noexcept {
187} 191}
188 192
189u64 GenericEnvironment::CalculateHash() const { 193u64 GenericEnvironment::CalculateHash() const {
190 const size_t size{ReadSize()}; 194 const size_t size{ReadSizeBytes()};
191 const auto data{std::make_unique<char[]>(size)}; 195 const auto data{std::make_unique<char[]>(size)};
192 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); 196 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
193 return Common::CityHash64(data.get(), size); 197 return Common::CityHash64(data.get(), size);
@@ -198,7 +202,7 @@ void GenericEnvironment::Dump(u64 hash) {
198} 202}
199 203
200void GenericEnvironment::Serialize(std::ofstream& file) const { 204void GenericEnvironment::Serialize(std::ofstream& file) const {
201 const u64 code_size{static_cast<u64>(CachedSize())}; 205 const u64 code_size{static_cast<u64>(CachedSizeBytes())};
202 const u64 num_texture_types{static_cast<u64>(texture_types.size())}; 206 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
203 const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; 207 const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
204 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; 208 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index d75987a52..a0f61cbda 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -48,9 +48,11 @@ public:
48 48
49 void SetCachedSize(size_t size_bytes); 49 void SetCachedSize(size_t size_bytes);
50 50
51 [[nodiscard]] size_t CachedSize() const noexcept; 51 [[nodiscard]] size_t CachedSizeWords() const noexcept;
52 52
53 [[nodiscard]] size_t ReadSize() const noexcept; 53 [[nodiscard]] size_t CachedSizeBytes() const noexcept;
54
55 [[nodiscard]] size_t ReadSizeBytes() const noexcept;
54 56
55 [[nodiscard]] bool CanBeSerialized() const noexcept; 57 [[nodiscard]] bool CanBeSerialized() const noexcept;
56 58
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1a76d4178..cb51529e4 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -250,10 +250,13 @@ bool IsPixelFormatASTC(PixelFormat format) {
250 case PixelFormat::ASTC_2D_6X6_UNORM: 250 case PixelFormat::ASTC_2D_6X6_UNORM:
251 case PixelFormat::ASTC_2D_6X6_SRGB: 251 case PixelFormat::ASTC_2D_6X6_SRGB:
252 case PixelFormat::ASTC_2D_10X6_UNORM: 252 case PixelFormat::ASTC_2D_10X6_UNORM:
253 case PixelFormat::ASTC_2D_10X6_SRGB:
253 case PixelFormat::ASTC_2D_10X5_UNORM: 254 case PixelFormat::ASTC_2D_10X5_UNORM:
254 case PixelFormat::ASTC_2D_10X5_SRGB: 255 case PixelFormat::ASTC_2D_10X5_SRGB:
255 case PixelFormat::ASTC_2D_10X10_UNORM: 256 case PixelFormat::ASTC_2D_10X10_UNORM:
256 case PixelFormat::ASTC_2D_10X10_SRGB: 257 case PixelFormat::ASTC_2D_10X10_SRGB:
258 case PixelFormat::ASTC_2D_12X10_UNORM:
259 case PixelFormat::ASTC_2D_12X10_SRGB:
257 case PixelFormat::ASTC_2D_12X12_UNORM: 260 case PixelFormat::ASTC_2D_12X12_UNORM:
258 case PixelFormat::ASTC_2D_12X12_SRGB: 261 case PixelFormat::ASTC_2D_12X12_SRGB:
259 case PixelFormat::ASTC_2D_8X6_UNORM: 262 case PixelFormat::ASTC_2D_8X6_UNORM:
@@ -279,11 +282,13 @@ bool IsPixelFormatSRGB(PixelFormat format) {
279 case PixelFormat::ASTC_2D_8X5_SRGB: 282 case PixelFormat::ASTC_2D_8X5_SRGB:
280 case PixelFormat::ASTC_2D_5X4_SRGB: 283 case PixelFormat::ASTC_2D_5X4_SRGB:
281 case PixelFormat::ASTC_2D_5X5_SRGB: 284 case PixelFormat::ASTC_2D_5X5_SRGB:
285 case PixelFormat::ASTC_2D_10X6_SRGB:
282 case PixelFormat::ASTC_2D_10X8_SRGB: 286 case PixelFormat::ASTC_2D_10X8_SRGB:
283 case PixelFormat::ASTC_2D_6X6_SRGB: 287 case PixelFormat::ASTC_2D_6X6_SRGB:
284 case PixelFormat::ASTC_2D_10X5_SRGB: 288 case PixelFormat::ASTC_2D_10X5_SRGB:
285 case PixelFormat::ASTC_2D_10X10_SRGB: 289 case PixelFormat::ASTC_2D_10X10_SRGB:
286 case PixelFormat::ASTC_2D_12X12_SRGB: 290 case PixelFormat::ASTC_2D_12X12_SRGB:
291 case PixelFormat::ASTC_2D_12X10_SRGB:
287 case PixelFormat::ASTC_2D_8X6_SRGB: 292 case PixelFormat::ASTC_2D_8X6_SRGB:
288 case PixelFormat::ASTC_2D_6X5_SRGB: 293 case PixelFormat::ASTC_2D_6X5_SRGB:
289 return true; 294 return true;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 44b79af20..0225d3287 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -95,10 +95,13 @@ enum class PixelFormat {
95 ASTC_2D_6X6_UNORM, 95 ASTC_2D_6X6_UNORM,
96 ASTC_2D_6X6_SRGB, 96 ASTC_2D_6X6_SRGB,
97 ASTC_2D_10X6_UNORM, 97 ASTC_2D_10X6_UNORM,
98 ASTC_2D_10X6_SRGB,
98 ASTC_2D_10X5_UNORM, 99 ASTC_2D_10X5_UNORM,
99 ASTC_2D_10X5_SRGB, 100 ASTC_2D_10X5_SRGB,
100 ASTC_2D_10X10_UNORM, 101 ASTC_2D_10X10_UNORM,
101 ASTC_2D_10X10_SRGB, 102 ASTC_2D_10X10_SRGB,
103 ASTC_2D_12X10_UNORM,
104 ASTC_2D_12X10_SRGB,
102 ASTC_2D_12X12_UNORM, 105 ASTC_2D_12X12_UNORM,
103 ASTC_2D_12X12_SRGB, 106 ASTC_2D_12X12_SRGB,
104 ASTC_2D_8X6_UNORM, 107 ASTC_2D_8X6_UNORM,
@@ -232,10 +235,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
232 6, // ASTC_2D_6X6_UNORM 235 6, // ASTC_2D_6X6_UNORM
233 6, // ASTC_2D_6X6_SRGB 236 6, // ASTC_2D_6X6_SRGB
234 10, // ASTC_2D_10X6_UNORM 237 10, // ASTC_2D_10X6_UNORM
238 10, // ASTC_2D_10X6_SRGB
235 10, // ASTC_2D_10X5_UNORM 239 10, // ASTC_2D_10X5_UNORM
236 10, // ASTC_2D_10X5_SRGB 240 10, // ASTC_2D_10X5_SRGB
237 10, // ASTC_2D_10X10_UNORM 241 10, // ASTC_2D_10X10_UNORM
238 10, // ASTC_2D_10X10_SRGB 242 10, // ASTC_2D_10X10_SRGB
243 12, // ASTC_2D_12X10_UNORM
244 12, // ASTC_2D_12X10_SRGB
239 12, // ASTC_2D_12X12_UNORM 245 12, // ASTC_2D_12X12_UNORM
240 12, // ASTC_2D_12X12_SRGB 246 12, // ASTC_2D_12X12_SRGB
241 8, // ASTC_2D_8X6_UNORM 247 8, // ASTC_2D_8X6_UNORM
@@ -338,10 +344,13 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
338 6, // ASTC_2D_6X6_UNORM 344 6, // ASTC_2D_6X6_UNORM
339 6, // ASTC_2D_6X6_SRGB 345 6, // ASTC_2D_6X6_SRGB
340 6, // ASTC_2D_10X6_UNORM 346 6, // ASTC_2D_10X6_UNORM
347 6, // ASTC_2D_10X6_SRGB
341 5, // ASTC_2D_10X5_UNORM 348 5, // ASTC_2D_10X5_UNORM
342 5, // ASTC_2D_10X5_SRGB 349 5, // ASTC_2D_10X5_SRGB
343 10, // ASTC_2D_10X10_UNORM 350 10, // ASTC_2D_10X10_UNORM
344 10, // ASTC_2D_10X10_SRGB 351 10, // ASTC_2D_10X10_SRGB
352 10, // ASTC_2D_12X10_UNORM
353 10, // ASTC_2D_12X10_SRGB
345 12, // ASTC_2D_12X12_UNORM 354 12, // ASTC_2D_12X12_UNORM
346 12, // ASTC_2D_12X12_SRGB 355 12, // ASTC_2D_12X12_SRGB
347 6, // ASTC_2D_8X6_UNORM 356 6, // ASTC_2D_8X6_UNORM
@@ -444,10 +453,13 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
444 128, // ASTC_2D_6X6_UNORM 453 128, // ASTC_2D_6X6_UNORM
445 128, // ASTC_2D_6X6_SRGB 454 128, // ASTC_2D_6X6_SRGB
446 128, // ASTC_2D_10X6_UNORM 455 128, // ASTC_2D_10X6_UNORM
456 128, // ASTC_2D_10X6_SRGB
447 128, // ASTC_2D_10X5_UNORM 457 128, // ASTC_2D_10X5_UNORM
448 128, // ASTC_2D_10X5_SRGB 458 128, // ASTC_2D_10X5_SRGB
449 128, // ASTC_2D_10X10_UNORM 459 128, // ASTC_2D_10X10_UNORM
450 128, // ASTC_2D_10X10_SRGB 460 128, // ASTC_2D_10X10_SRGB
461 128, // ASTC_2D_12X10_UNORM
462 128, // ASTC_2D_12X10_SRGB
451 128, // ASTC_2D_12X12_UNORM 463 128, // ASTC_2D_12X12_UNORM
452 128, // ASTC_2D_12X12_SRGB 464 128, // ASTC_2D_12X12_SRGB
453 128, // ASTC_2D_8X6_UNORM 465 128, // ASTC_2D_8X6_UNORM
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 5fc2b2fec..11ced6c38 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -210,6 +210,8 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
210 return PixelFormat::ASTC_2D_6X6_SRGB; 210 return PixelFormat::ASTC_2D_6X6_SRGB;
211 case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR): 211 case Hash(TextureFormat::ASTC_2D_10X6, UNORM, LINEAR):
212 return PixelFormat::ASTC_2D_10X6_UNORM; 212 return PixelFormat::ASTC_2D_10X6_UNORM;
213 case Hash(TextureFormat::ASTC_2D_10X6, UNORM, SRGB):
214 return PixelFormat::ASTC_2D_10X6_SRGB;
213 case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR): 215 case Hash(TextureFormat::ASTC_2D_10X5, UNORM, LINEAR):
214 return PixelFormat::ASTC_2D_10X5_UNORM; 216 return PixelFormat::ASTC_2D_10X5_UNORM;
215 case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB): 217 case Hash(TextureFormat::ASTC_2D_10X5, UNORM, SRGB):
@@ -218,6 +220,10 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
218 return PixelFormat::ASTC_2D_10X10_UNORM; 220 return PixelFormat::ASTC_2D_10X10_UNORM;
219 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): 221 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
220 return PixelFormat::ASTC_2D_10X10_SRGB; 222 return PixelFormat::ASTC_2D_10X10_SRGB;
223 case Hash(TextureFormat::ASTC_2D_12X10, UNORM, LINEAR):
224 return PixelFormat::ASTC_2D_12X10_UNORM;
225 case Hash(TextureFormat::ASTC_2D_12X10, UNORM, SRGB):
226 return PixelFormat::ASTC_2D_12X10_SRGB;
221 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): 227 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
222 return PixelFormat::ASTC_2D_12X12_UNORM; 228 return PixelFormat::ASTC_2D_12X12_UNORM;
223 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): 229 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 30f72361d..6279d8e9e 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -46,7 +46,7 @@ std::string Name(const ImageBase& image) {
46 return "Invalid"; 46 return "Invalid";
47} 47}
48 48
49std::string Name(const ImageViewBase& image_view) { 49std::string Name(const ImageViewBase& image_view, GPUVAddr addr) {
50 const u32 width = image_view.size.width; 50 const u32 width = image_view.size.width;
51 const u32 height = image_view.size.height; 51 const u32 height = image_view.size.height;
52 const u32 depth = image_view.size.depth; 52 const u32 depth = image_view.size.depth;
@@ -56,23 +56,25 @@ std::string Name(const ImageViewBase& image_view) {
56 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; 56 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
57 switch (image_view.type) { 57 switch (image_view.type) {
58 case ImageViewType::e1D: 58 case ImageViewType::e1D:
59 return fmt::format("ImageView 1D {}{}", width, level); 59 return fmt::format("ImageView 1D 0x{:X} {}{}", addr, width, level);
60 case ImageViewType::e2D: 60 case ImageViewType::e2D:
61 return fmt::format("ImageView 2D {}x{}{}", width, height, level); 61 return fmt::format("ImageView 2D 0x{:X} {}x{}{}", addr, width, height, level);
62 case ImageViewType::Cube: 62 case ImageViewType::Cube:
63 return fmt::format("ImageView Cube {}x{}{}", width, height, level); 63 return fmt::format("ImageView Cube 0x{:X} {}x{}{}", addr, width, height, level);
64 case ImageViewType::e3D: 64 case ImageViewType::e3D:
65 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); 65 return fmt::format("ImageView 3D 0x{:X} {}x{}x{}{}", addr, width, height, depth, level);
66 case ImageViewType::e1DArray: 66 case ImageViewType::e1DArray:
67 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); 67 return fmt::format("ImageView 1DArray 0x{:X} {}{}|{}", addr, width, level, num_layers);
68 case ImageViewType::e2DArray: 68 case ImageViewType::e2DArray:
69 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); 69 return fmt::format("ImageView 2DArray 0x{:X} {}x{}{}|{}", addr, width, height, level,
70 num_layers);
70 case ImageViewType::CubeArray: 71 case ImageViewType::CubeArray:
71 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); 72 return fmt::format("ImageView CubeArray 0x{:X} {}x{}{}|{}", addr, width, height, level,
73 num_layers);
72 case ImageViewType::Rect: 74 case ImageViewType::Rect:
73 return fmt::format("ImageView Rect {}x{}{}", width, height, level); 75 return fmt::format("ImageView Rect 0x{:X} {}x{}{}", addr, width, height, level);
74 case ImageViewType::Buffer: 76 case ImageViewType::Buffer:
75 return fmt::format("BufferView {}", width); 77 return fmt::format("BufferView 0x{:X} {}", addr, width);
76 } 78 }
77 return "Invalid"; 79 return "Invalid";
78} 80}
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index f1f0a057b..9ee57a076 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -179,6 +179,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
179 return "ASTC_2D_6X6_SRGB"; 179 return "ASTC_2D_6X6_SRGB";
180 case PixelFormat::ASTC_2D_10X6_UNORM: 180 case PixelFormat::ASTC_2D_10X6_UNORM:
181 return "ASTC_2D_10X6_UNORM"; 181 return "ASTC_2D_10X6_UNORM";
182 case PixelFormat::ASTC_2D_10X6_SRGB:
183 return "ASTC_2D_10X6_SRGB";
182 case PixelFormat::ASTC_2D_10X5_UNORM: 184 case PixelFormat::ASTC_2D_10X5_UNORM:
183 return "ASTC_2D_10X5_UNORM"; 185 return "ASTC_2D_10X5_UNORM";
184 case PixelFormat::ASTC_2D_10X5_SRGB: 186 case PixelFormat::ASTC_2D_10X5_SRGB:
@@ -187,6 +189,10 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
187 return "ASTC_2D_10X10_UNORM"; 189 return "ASTC_2D_10X10_UNORM";
188 case PixelFormat::ASTC_2D_10X10_SRGB: 190 case PixelFormat::ASTC_2D_10X10_SRGB:
189 return "ASTC_2D_10X10_SRGB"; 191 return "ASTC_2D_10X10_SRGB";
192 case PixelFormat::ASTC_2D_12X10_UNORM:
193 return "ASTC_2D_12X10_UNORM";
194 case PixelFormat::ASTC_2D_12X10_SRGB:
195 return "ASTC_2D_12X10_SRGB";
190 case PixelFormat::ASTC_2D_12X12_UNORM: 196 case PixelFormat::ASTC_2D_12X12_UNORM:
191 return "ASTC_2D_12X12_UNORM"; 197 return "ASTC_2D_12X12_UNORM";
192 case PixelFormat::ASTC_2D_12X12_SRGB: 198 case PixelFormat::ASTC_2D_12X12_SRGB:
@@ -268,7 +274,7 @@ struct RenderTargets;
268 274
269[[nodiscard]] std::string Name(const ImageBase& image); 275[[nodiscard]] std::string Name(const ImageBase& image);
270 276
271[[nodiscard]] std::string Name(const ImageViewBase& image_view); 277[[nodiscard]] std::string Name(const ImageViewBase& image_view, GPUVAddr addr);
272 278
273[[nodiscard]] std::string Name(const RenderTargets& render_targets); 279[[nodiscard]] std::string Name(const RenderTargets& render_targets);
274 280
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
index 04fb84bfa..bcad40353 100644
--- a/src/video_core/texture_cache/image_view_base.cpp
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -16,8 +16,8 @@
16namespace VideoCommon { 16namespace VideoCommon {
17 17
18ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, 18ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
19 ImageId image_id_) 19 ImageId image_id_, GPUVAddr addr)
20 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, 20 : image_id{image_id_}, gpu_addr{addr}, format{info.format}, type{info.type}, range{info.range},
21 size{ 21 size{
22 .width = std::max(image_info.size.width >> range.base.level, 1u), 22 .width = std::max(image_info.size.width >> range.base.level, 1u),
23 .height = std::max(image_info.size.height >> range.base.level, 1u), 23 .height = std::max(image_info.size.height >> range.base.level, 1u),
@@ -35,8 +35,8 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
35 } 35 }
36} 36}
37 37
38ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) 38ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr)
39 : image_id{NULL_IMAGE_ID}, format{info.format}, type{ImageViewType::Buffer}, 39 : image_id{NULL_IMAGE_ID}, gpu_addr{addr}, format{info.format}, type{ImageViewType::Buffer},
40 size{ 40 size{
41 .width = info.size.width, 41 .width = info.size.width,
42 .height = 1, 42 .height = 1,
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
index 69c9776e7..a25ae1d4a 100644
--- a/src/video_core/texture_cache/image_view_base.h
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -24,9 +24,9 @@ enum class ImageViewFlagBits : u16 {
24DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) 24DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
25 25
26struct ImageViewBase { 26struct ImageViewBase {
27 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, 27 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id,
28 ImageId image_id); 28 GPUVAddr addr);
29 explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); 29 explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info, GPUVAddr addr);
30 explicit ImageViewBase(const NullImageViewParams&); 30 explicit ImageViewBase(const NullImageViewParams&);
31 31
32 [[nodiscard]] bool IsBuffer() const noexcept { 32 [[nodiscard]] bool IsBuffer() const noexcept {
@@ -34,6 +34,7 @@ struct ImageViewBase {
34 } 34 }
35 35
36 ImageId image_id{}; 36 ImageId image_id{};
37 GPUVAddr gpu_addr = 0;
37 PixelFormat format{}; 38 PixelFormat format{};
38 ImageViewType type{}; 39 ImageViewType type{};
39 SubresourceRange range; 40 SubresourceRange range;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ed5c768d8..b5297e76b 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,9 +1,10 @@
1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <unordered_set> 6#include <unordered_set>
7#include <boost/container/small_vector.hpp>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
9#include "common/settings.h" 10#include "common/settings.h"
@@ -17,15 +18,10 @@
17 18
18namespace VideoCommon { 19namespace VideoCommon {
19 20
20using Tegra::Texture::SwizzleSource;
21using Tegra::Texture::TextureType;
22using Tegra::Texture::TICEntry; 21using Tegra::Texture::TICEntry;
23using Tegra::Texture::TSCEntry; 22using Tegra::Texture::TSCEntry;
24using VideoCore::Surface::GetFormatType; 23using VideoCore::Surface::GetFormatType;
25using VideoCore::Surface::IsCopyCompatible;
26using VideoCore::Surface::PixelFormat; 24using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::PixelFormatFromDepthFormat;
28using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
29using VideoCore::Surface::SurfaceType; 25using VideoCore::Surface::SurfaceType;
30using namespace Common::Literals; 26using namespace Common::Literals;
31 27
@@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() {
143 runtime.TickFrame(); 139 runtime.TickFrame();
144 critical_gc = 0; 140 critical_gc = 0;
145 ++frame_tick; 141 ++frame_tick;
142
143 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
144 for (auto& buffer : async_buffers_death_ring) {
145 runtime.FreeDeferredStagingBuffer(buffer);
146 }
147 async_buffers_death_ring.clear();
148 }
146} 149}
147 150
148template <class P> 151template <class P>
@@ -661,25 +664,39 @@ template <class P>
661void TextureCache<P>::CommitAsyncFlushes() { 664void TextureCache<P>::CommitAsyncFlushes() {
662 // This is intentionally passing the value by copy 665 // This is intentionally passing the value by copy
663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 666 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
664 const std::span<const ImageId> download_ids = uncommitted_downloads; 667 auto& download_ids = uncommitted_downloads;
665 if (download_ids.empty()) { 668 if (download_ids.empty()) {
666 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 669 committed_downloads.emplace_back(std::move(uncommitted_downloads));
667 uncommitted_downloads.clear(); 670 uncommitted_downloads.clear();
668 async_buffers.emplace_back(std::optional<AsyncBuffer>{}); 671 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
672 uncommitted_async_buffers.clear();
669 return; 673 return;
670 } 674 }
671 size_t total_size_bytes = 0; 675 size_t total_size_bytes = 0;
672 for (const ImageId image_id : download_ids) { 676 size_t last_async_buffer_id = uncommitted_async_buffers.size();
673 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 677 bool any_none_dma = false;
678 for (PendingDownload& download_info : download_ids) {
679 if (download_info.is_swizzle) {
680 total_size_bytes +=
681 Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64);
682 any_none_dma = true;
683 download_info.async_buffer_id = last_async_buffer_id;
684 }
674 } 685 }
675 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); 686 if (any_none_dma) {
676 for (const ImageId image_id : download_ids) { 687 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
677 Image& image = slot_images[image_id]; 688 for (const PendingDownload& download_info : download_ids) {
678 const auto copies = FullDownloadCopies(image.info); 689 if (download_info.is_swizzle) {
679 image.DownloadMemory(download_map, copies); 690 Image& image = slot_images[download_info.object_id];
680 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); 691 const auto copies = FullDownloadCopies(image.info);
692 image.DownloadMemory(download_map, copies);
693 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
694 }
695 }
696 uncommitted_async_buffers.emplace_back(download_map);
681 } 697 }
682 async_buffers.emplace_back(download_map); 698 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
699 uncommitted_async_buffers.clear();
683 } 700 }
684 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 701 committed_downloads.emplace_back(std::move(uncommitted_downloads));
685 uncommitted_downloads.clear(); 702 uncommitted_downloads.clear();
@@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() {
691 return; 708 return;
692 } 709 }
693 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 710 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
694 const std::span<const ImageId> download_ids = committed_downloads.front(); 711 const auto& download_ids = committed_downloads.front();
695 if (download_ids.empty()) { 712 if (download_ids.empty()) {
696 committed_downloads.pop_front(); 713 committed_downloads.pop_front();
697 async_buffers.pop_front(); 714 async_buffers.pop_front();
698 return; 715 return;
699 } 716 }
700 auto download_map = *async_buffers.front(); 717 auto download_map = std::move(async_buffers.front());
701 std::span<u8> download_span = download_map.mapped_span;
702 for (size_t i = download_ids.size(); i > 0; i--) { 718 for (size_t i = download_ids.size(); i > 0; i--) {
703 const ImageBase& image = slot_images[download_ids[i - 1]]; 719 auto& download_info = download_ids[i - 1];
704 const auto copies = FullDownloadCopies(image.info); 720 auto& download_buffer = download_map[download_info.async_buffer_id];
705 download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); 721 if (download_info.is_swizzle) {
706 std::span<u8> download_span_alt = download_span.subspan(download_map.offset); 722 const ImageBase& image = slot_images[download_info.object_id];
707 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, 723 const auto copies = FullDownloadCopies(image.info);
708 swizzle_data_buffer); 724 download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
725 std::span<u8> download_span =
726 download_buffer.mapped_span.subspan(download_buffer.offset);
727 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
728 swizzle_data_buffer);
729 } else {
730 const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
731 std::span<u8> download_span =
732 download_buffer.mapped_span.subspan(download_buffer.offset);
733 gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
734 buffer_info.size);
735 slot_buffer_downloads.erase(download_info.object_id);
736 }
737 }
738 for (auto& download_buffer : download_map) {
739 async_buffers_death_ring.emplace_back(download_buffer);
709 } 740 }
710 runtime.FreeDeferredStagingBuffer(download_map);
711 committed_downloads.pop_front(); 741 committed_downloads.pop_front();
712 async_buffers.pop_front(); 742 async_buffers.pop_front();
713 } else { 743 } else {
714 const std::span<const ImageId> download_ids = committed_downloads.front(); 744 const auto& download_ids = committed_downloads.front();
715 if (download_ids.empty()) { 745 if (download_ids.empty()) {
716 committed_downloads.pop_front(); 746 committed_downloads.pop_front();
717 return; 747 return;
718 } 748 }
719 size_t total_size_bytes = 0; 749 size_t total_size_bytes = 0;
720 for (const ImageId image_id : download_ids) { 750 for (const PendingDownload& download_info : download_ids) {
721 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 751 if (download_info.is_swizzle) {
752 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
753 }
722 } 754 }
723 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); 755 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
724 const size_t original_offset = download_map.offset; 756 const size_t original_offset = download_map.offset;
725 for (const ImageId image_id : download_ids) { 757 for (const PendingDownload& download_info : download_ids) {
726 Image& image = slot_images[image_id]; 758 if (!download_info.is_swizzle) {
759 continue;
760 }
761 Image& image = slot_images[download_info.object_id];
727 const auto copies = FullDownloadCopies(image.info); 762 const auto copies = FullDownloadCopies(image.info);
728 image.DownloadMemory(download_map, copies); 763 image.DownloadMemory(download_map, copies);
729 download_map.offset += image.unswizzled_size_bytes; 764 download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() {
732 runtime.Finish(); 767 runtime.Finish();
733 download_map.offset = original_offset; 768 download_map.offset = original_offset;
734 std::span<u8> download_span = download_map.mapped_span; 769 std::span<u8> download_span = download_map.mapped_span;
735 for (const ImageId image_id : download_ids) { 770 for (const PendingDownload& download_info : download_ids) {
736 const ImageBase& image = slot_images[image_id]; 771 if (!download_info.is_swizzle) {
772 continue;
773 }
774 const ImageBase& image = slot_images[download_info.object_id];
737 const auto copies = FullDownloadCopies(image.info); 775 const auto copies = FullDownloadCopies(image.info);
738 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, 776 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
739 swizzle_data_buffer); 777 swizzle_data_buffer);
@@ -834,6 +872,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
834} 872}
835 873
836template <class P> 874template <class P>
875void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image,
876 typename TextureCache<P>::BufferType buffer,
877 size_t buffer_offset,
878 std::span<const VideoCommon::BufferImageCopy> copies,
879 GPUVAddr address, size_t size) {
880 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
881 const BufferDownload new_buffer_download{address, size};
882 auto slot = slot_buffer_downloads.insert(new_buffer_download);
883 const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot};
884 uncommitted_downloads.emplace_back(new_download);
885 auto download_map = runtime.DownloadStagingBuffer(size, true);
886 uncommitted_async_buffers.emplace_back(download_map);
887 std::array buffers{
888 buffer,
889 download_map.buffer,
890 };
891 std::array<u64, 2> buffer_offsets{
892 buffer_offset,
893 download_map.offset,
894 };
895 image->DownloadMemory(buffers, buffer_offsets, copies);
896 } else {
897 image->DownloadMemory(buffer, buffer_offset, copies);
898 }
899}
900
901template <class P>
837void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { 902void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
838 if (False(image.flags & ImageFlagBits::CpuModified)) { 903 if (False(image.flags & ImageFlagBits::CpuModified)) {
839 // Only upload modified images 904 // Only upload modified images
@@ -1294,6 +1359,12 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1294 ScaleDown(new_image); 1359 ScaleDown(new_image);
1295 } 1360 }
1296 1361
1362 std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) {
1363 const ImageBase& lhs_image = slot_images[lhs];
1364 const ImageBase& rhs_image = slot_images[rhs];
1365 return lhs_image.modification_tick < rhs_image.modification_tick;
1366 });
1367
1297 for (const ImageId overlap_id : overlap_ids) { 1368 for (const ImageId overlap_id : overlap_ids) {
1298 Image& overlap = slot_images[overlap_id]; 1369 Image& overlap = slot_images[overlap_id];
1299 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1370 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
@@ -2209,7 +2280,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
2209 if (new_id) { 2280 if (new_id) {
2210 const ImageViewBase& old_view = slot_image_views[new_id]; 2281 const ImageViewBase& old_view = slot_image_views[new_id];
2211 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { 2282 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
2212 uncommitted_downloads.push_back(old_view.image_id); 2283 const PendingDownload new_download{true, 0, old_view.image_id};
2284 uncommitted_downloads.emplace_back(new_download);
2213 } 2285 }
2214 } 2286 }
2215 *old_id = new_id; 2287 *old_id = new_id;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 5a5b4179c..758b7e212 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,4 +1,4 @@
1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
@@ -40,14 +40,9 @@ struct ChannelState;
40 40
41namespace VideoCommon { 41namespace VideoCommon {
42 42
43using Tegra::Texture::SwizzleSource;
44using Tegra::Texture::TICEntry; 43using Tegra::Texture::TICEntry;
45using Tegra::Texture::TSCEntry; 44using Tegra::Texture::TSCEntry;
46using VideoCore::Surface::GetFormatType;
47using VideoCore::Surface::IsCopyCompatible;
48using VideoCore::Surface::PixelFormat; 45using VideoCore::Surface::PixelFormat;
49using VideoCore::Surface::PixelFormatFromDepthFormat;
50using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
51using namespace Common::Literals; 46using namespace Common::Literals;
52 47
53struct ImageViewInOut { 48struct ImageViewInOut {
@@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
119 using Sampler = typename P::Sampler; 114 using Sampler = typename P::Sampler;
120 using Framebuffer = typename P::Framebuffer; 115 using Framebuffer = typename P::Framebuffer;
121 using AsyncBuffer = typename P::AsyncBuffer; 116 using AsyncBuffer = typename P::AsyncBuffer;
117 using BufferType = typename P::BufferType;
122 118
123 struct BlitImages { 119 struct BlitImages {
124 ImageId dst_id; 120 ImageId dst_id;
@@ -215,6 +211,10 @@ public:
215 const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, 211 const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
216 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); 212 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
217 213
214 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
215 std::span<const VideoCommon::BufferImageCopy> copies,
216 GPUVAddr address = 0, size_t size = 0);
217
218 /// Return true when a CPU region is modified from the GPU 218 /// Return true when a CPU region is modified from the GPU
219 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 219 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
220 220
@@ -424,17 +424,32 @@ private:
424 u64 critical_memory; 424 u64 critical_memory;
425 size_t critical_gc; 425 size_t critical_gc;
426 426
427 struct BufferDownload {
428 GPUVAddr address;
429 size_t size;
430 };
431
432 struct PendingDownload {
433 bool is_swizzle;
434 size_t async_buffer_id;
435 SlotId object_id;
436 };
437
427 SlotVector<Image> slot_images; 438 SlotVector<Image> slot_images;
428 SlotVector<ImageMapView> slot_map_views; 439 SlotVector<ImageMapView> slot_map_views;
429 SlotVector<ImageView> slot_image_views; 440 SlotVector<ImageView> slot_image_views;
430 SlotVector<ImageAlloc> slot_image_allocs; 441 SlotVector<ImageAlloc> slot_image_allocs;
431 SlotVector<Sampler> slot_samplers; 442 SlotVector<Sampler> slot_samplers;
432 SlotVector<Framebuffer> slot_framebuffers; 443 SlotVector<Framebuffer> slot_framebuffers;
444 SlotVector<BufferDownload> slot_buffer_downloads;
433 445
434 // TODO: This data structure is not optimal and it should be reworked 446 // TODO: This data structure is not optimal and it should be reworked
435 std::vector<ImageId> uncommitted_downloads; 447
436 std::deque<std::vector<ImageId>> committed_downloads; 448 std::vector<PendingDownload> uncommitted_downloads;
437 std::deque<std::optional<AsyncBuffer>> async_buffers; 449 std::deque<std::vector<PendingDownload>> committed_downloads;
450 std::vector<AsyncBuffer> uncommitted_async_buffers;
451 std::deque<std::vector<AsyncBuffer>> async_buffers;
452 std::deque<AsyncBuffer> async_buffers_death_ring;
438 453
439 struct LRUItemParams { 454 struct LRUItemParams {
440 using ObjectType = ImageId; 455 using ObjectType = ImageId;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 6f288b3f8..6ffca2af2 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -617,7 +617,9 @@ bool Device::ShouldBoostClocks() const {
617 617
618 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; 618 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
619 619
620 return validated_driver && !is_steam_deck; 620 const bool is_debugging = this->HasDebuggingToolAttached();
621
622 return validated_driver && !is_steam_deck && !is_debugging;
621} 623}
622 624
623bool Device::GetSuitability(bool requires_swapchain) { 625bool Device::GetSuitability(bool requires_swapchain) {
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 7d5018151..5f1c63ff9 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/settings.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
14 15
15// Define all features which may be used by the implementation here. 16// Define all features which may be used by the implementation here.
@@ -510,7 +511,7 @@ public:
510 511
511 /// Returns true when a known debugging tool is attached. 512 /// Returns true when a known debugging tool is attached.
512 bool HasDebuggingToolAttached() const { 513 bool HasDebuggingToolAttached() const {
513 return has_renderdoc || has_nsight_graphics; 514 return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue();
514 } 515 }
515 516
516 /// Returns true when the device does not properly support cube compatibility. 517 /// Returns true when the device does not properly support cube compatibility.
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
index fa9bafa20..c34599365 100644
--- a/src/video_core/vulkan_common/vulkan_surface.cpp
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -23,10 +23,10 @@
23 23
24namespace Vulkan { 24namespace Vulkan {
25 25
26vk::SurfaceKHR CreateSurface(const vk::Instance& instance, 26vk::SurfaceKHR CreateSurface(
27 const Core::Frontend::EmuWindow& emu_window) { 27 const vk::Instance& instance,
28 [[maybe_unused]] const Core::Frontend::EmuWindow::WindowSystemInfo& window_info) {
28 [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch(); 29 [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch();
29 [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo();
30 VkSurfaceKHR unsafe_surface = nullptr; 30 VkSurfaceKHR unsafe_surface = nullptr;
31 31
32#ifdef _WIN32 32#ifdef _WIN32
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h
index 5725143e6..5e18c06c4 100644
--- a/src/video_core/vulkan_common/vulkan_surface.h
+++ b/src/video_core/vulkan_common/vulkan_surface.h
@@ -3,15 +3,12 @@
3 3
4#pragma once 4#pragma once
5 5
6#include "core/frontend/emu_window.h"
6#include "video_core/vulkan_common/vulkan_wrapper.h" 7#include "video_core/vulkan_common/vulkan_wrapper.h"
7 8
8namespace Core::Frontend {
9class EmuWindow;
10}
11
12namespace Vulkan { 9namespace Vulkan {
13 10
14[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance, 11[[nodiscard]] vk::SurfaceKHR CreateSurface(
15 const Core::Frontend::EmuWindow& emu_window); 12 const vk::Instance& instance, const Core::Frontend::EmuWindow::WindowSystemInfo& window_info);
16 13
17} // namespace Vulkan 14} // namespace Vulkan
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 0f8c1e6a6..2d7b9ab65 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -189,6 +189,8 @@ add_executable(yuzu
189 multiplayer/state.h 189 multiplayer/state.h
190 multiplayer/validation.h 190 multiplayer/validation.h
191 precompiled_headers.h 191 precompiled_headers.h
192 qt_common.cpp
193 qt_common.h
192 startup_checks.cpp 194 startup_checks.cpp
193 startup_checks.h 195 startup_checks.h
194 uisettings.cpp 196 uisettings.cpp
diff --git a/src/yuzu/applets/qt_profile_select.cpp b/src/yuzu/applets/qt_profile_select.cpp
index 2448e46b6..1f3f23038 100644
--- a/src/yuzu/applets/qt_profile_select.cpp
+++ b/src/yuzu/applets/qt_profile_select.cpp
@@ -95,6 +95,7 @@ QtProfileSelectionDialog::QtProfileSelectionDialog(
95 scroll_area->setLayout(layout); 95 scroll_area->setLayout(layout);
96 96
97 connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser); 97 connect(tree_view, &QTreeView::clicked, this, &QtProfileSelectionDialog::SelectUser);
98 connect(tree_view, &QTreeView::doubleClicked, this, &QtProfileSelectionDialog::accept);
98 connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent, 99 connect(controller_navigation, &ControllerNavigation::TriggerKeyboardEvent,
99 [this](Qt::Key key) { 100 [this](Qt::Key key) {
100 if (!this->isActiveWindow()) { 101 if (!this->isActiveWindow()) {
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 4c7bf28d8..59d226113 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -1,36 +1,48 @@
1// SPDX-FileCopyrightText: 2014 Citra Emulator Project 1// SPDX-FileCopyrightText: 2014 Citra Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <algorithm>
5#include <array>
6#include <cmath>
7#include <cstring>
8#include <string>
9#include <tuple>
10#include <type_traits>
4#include <glad/glad.h> 11#include <glad/glad.h>
5 12
6#include <QApplication> 13#include <QtCore/qglobal.h>
7#if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) && YUZU_USE_QT_MULTIMEDIA 14#if (QT_VERSION < QT_VERSION_CHECK(6, 0, 0)) && YUZU_USE_QT_MULTIMEDIA
15#include <QCamera>
8#include <QCameraImageCapture> 16#include <QCameraImageCapture>
9#include <QCameraInfo> 17#include <QCameraInfo>
10#endif 18#endif
19#include <QCursor>
20#include <QEvent>
21#include <QGuiApplication>
11#include <QHBoxLayout> 22#include <QHBoxLayout>
23#include <QKeyEvent>
24#include <QLayout>
25#include <QList>
12#include <QMessageBox> 26#include <QMessageBox>
13#include <QPainter>
14#include <QScreen> 27#include <QScreen>
15#include <QString> 28#include <QSize>
16#include <QStringList> 29#include <QStringLiteral>
30#include <QSurfaceFormat>
31#include <QTimer>
17#include <QWindow> 32#include <QWindow>
33#include <QtCore/qobjectdefs.h>
18 34
19#ifdef HAS_OPENGL 35#ifdef HAS_OPENGL
20#include <QOffscreenSurface> 36#include <QOffscreenSurface>
21#include <QOpenGLContext> 37#include <QOpenGLContext>
22#endif 38#endif
23 39
24#if !defined(WIN32)
25#include <qpa/qplatformnativeinterface.h>
26#endif
27
28#include <fmt/format.h>
29
30#include "common/assert.h"
31#include "common/microprofile.h" 40#include "common/microprofile.h"
41#include "common/polyfill_thread.h"
32#include "common/scm_rev.h" 42#include "common/scm_rev.h"
33#include "common/settings.h" 43#include "common/settings.h"
44#include "common/settings_input.h"
45#include "common/thread.h"
34#include "core/core.h" 46#include "core/core.h"
35#include "core/cpu_manager.h" 47#include "core/cpu_manager.h"
36#include "core/frontend/framebuffer_layout.h" 48#include "core/frontend/framebuffer_layout.h"
@@ -40,11 +52,16 @@
40#include "input_common/drivers/tas_input.h" 52#include "input_common/drivers/tas_input.h"
41#include "input_common/drivers/touch_screen.h" 53#include "input_common/drivers/touch_screen.h"
42#include "input_common/main.h" 54#include "input_common/main.h"
55#include "video_core/gpu.h"
56#include "video_core/rasterizer_interface.h"
43#include "video_core/renderer_base.h" 57#include "video_core/renderer_base.h"
44#include "yuzu/bootmanager.h" 58#include "yuzu/bootmanager.h"
45#include "yuzu/main.h" 59#include "yuzu/main.h"
60#include "yuzu/qt_common.h"
46 61
47static Core::Frontend::WindowSystemType GetWindowSystemType(); 62class QObject;
63class QPaintEngine;
64class QSurface;
48 65
49EmuThread::EmuThread(Core::System& system) : m_system{system} {} 66EmuThread::EmuThread(Core::System& system) : m_system{system} {}
50 67
@@ -154,7 +171,10 @@ public:
154 171
155 // disable vsync for any shared contexts 172 // disable vsync for any shared contexts
156 auto format = share_context->format(); 173 auto format = share_context->format();
157 format.setSwapInterval(main_surface ? Settings::values.use_vsync.GetValue() : 0); 174 const int swap_interval =
175 Settings::values.vsync_mode.GetValue() == Settings::VSyncMode::Immediate ? 0 : 1;
176
177 format.setSwapInterval(main_surface ? swap_interval : 0);
158 178
159 context = std::make_unique<QOpenGLContext>(); 179 context = std::make_unique<QOpenGLContext>();
160 context->setShareContext(share_context); 180 context->setShareContext(share_context);
@@ -221,7 +241,7 @@ public:
221 explicit RenderWidget(GRenderWindow* parent) : QWidget(parent), render_window(parent) { 241 explicit RenderWidget(GRenderWindow* parent) : QWidget(parent), render_window(parent) {
222 setAttribute(Qt::WA_NativeWindow); 242 setAttribute(Qt::WA_NativeWindow);
223 setAttribute(Qt::WA_PaintOnScreen); 243 setAttribute(Qt::WA_PaintOnScreen);
224 if (GetWindowSystemType() == Core::Frontend::WindowSystemType::Wayland) { 244 if (QtCommon::GetWindowSystemType() == Core::Frontend::WindowSystemType::Wayland) {
225 setAttribute(Qt::WA_DontCreateNativeAncestors); 245 setAttribute(Qt::WA_DontCreateNativeAncestors);
226 } 246 }
227 } 247 }
@@ -259,46 +279,6 @@ struct NullRenderWidget : public RenderWidget {
259 explicit NullRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {} 279 explicit NullRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {}
260}; 280};
261 281
262static Core::Frontend::WindowSystemType GetWindowSystemType() {
263 // Determine WSI type based on Qt platform.
264 QString platform_name = QGuiApplication::platformName();
265 if (platform_name == QStringLiteral("windows"))
266 return Core::Frontend::WindowSystemType::Windows;
267 else if (platform_name == QStringLiteral("xcb"))
268 return Core::Frontend::WindowSystemType::X11;
269 else if (platform_name == QStringLiteral("wayland"))
270 return Core::Frontend::WindowSystemType::Wayland;
271 else if (platform_name == QStringLiteral("wayland-egl"))
272 return Core::Frontend::WindowSystemType::Wayland;
273 else if (platform_name == QStringLiteral("cocoa"))
274 return Core::Frontend::WindowSystemType::Cocoa;
275 else if (platform_name == QStringLiteral("android"))
276 return Core::Frontend::WindowSystemType::Android;
277
278 LOG_CRITICAL(Frontend, "Unknown Qt platform {}!", platform_name.toStdString());
279 return Core::Frontend::WindowSystemType::Windows;
280}
281
282static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
283 Core::Frontend::EmuWindow::WindowSystemInfo wsi;
284 wsi.type = GetWindowSystemType();
285
286 // Our Win32 Qt external doesn't have the private API.
287#if defined(WIN32) || defined(__APPLE__)
288 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
289#else
290 QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
291 wsi.display_connection = pni->nativeResourceForWindow("display", window);
292 if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
293 wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
294 else
295 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
296#endif
297 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
298
299 return wsi;
300}
301
302GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread_, 282GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread_,
303 std::shared_ptr<InputCommon::InputSubsystem> input_subsystem_, 283 std::shared_ptr<InputCommon::InputSubsystem> input_subsystem_,
304 Core::System& system_) 284 Core::System& system_)
@@ -904,7 +884,7 @@ bool GRenderWindow::InitRenderTarget() {
904 } 884 }
905 885
906 // Update the Window System information with the new render target 886 // Update the Window System information with the new render target
907 window_info = GetWindowSystemInfo(child_widget->windowHandle()); 887 window_info = QtCommon::GetWindowSystemInfo(child_widget->windowHandle());
908 888
909 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); 889 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
910 layout()->addWidget(child_widget); 890 layout()->addWidget(child_widget);
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index bb4eca07f..4276be82b 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -5,27 +5,46 @@
5 5
6#include <atomic> 6#include <atomic>
7#include <condition_variable> 7#include <condition_variable>
8#include <cstddef>
8#include <memory> 9#include <memory>
9#include <mutex> 10#include <mutex>
11#include <stop_token>
12#include <utility>
13#include <vector>
10 14
15#include <QByteArray>
11#include <QImage> 16#include <QImage>
17#include <QObject>
18#include <QPoint>
19#include <QString>
12#include <QStringList> 20#include <QStringList>
13#include <QThread> 21#include <QThread>
14#include <QTouchEvent>
15#include <QWidget> 22#include <QWidget>
23#include <qglobal.h>
24#include <qnamespace.h>
25#include <qobjectdefs.h>
16 26
27#include "common/common_types.h"
28#include "common/logging/log.h"
17#include "common/polyfill_thread.h" 29#include "common/polyfill_thread.h"
18#include "common/thread.h" 30#include "common/thread.h"
19#include "core/frontend/emu_window.h" 31#include "core/frontend/emu_window.h"
20 32
21class GRenderWindow;
22class GMainWindow; 33class GMainWindow;
23class QCamera; 34class QCamera;
24class QCameraImageCapture; 35class QCameraImageCapture;
36class QCloseEvent;
37class QFocusEvent;
25class QKeyEvent; 38class QKeyEvent;
39class QMouseEvent;
40class QObject;
41class QResizeEvent;
42class QShowEvent;
43class QTimer;
44class QTouchEvent;
45class QWheelEvent;
26 46
27namespace Core { 47namespace Core {
28enum class SystemResultStatus : u32;
29class System; 48class System;
30} // namespace Core 49} // namespace Core
31 50
@@ -40,7 +59,6 @@ enum class TasState;
40 59
41namespace VideoCore { 60namespace VideoCore {
42enum class LoadCallbackStage; 61enum class LoadCallbackStage;
43class RendererBase;
44} // namespace VideoCore 62} // namespace VideoCore
45 63
46class EmuThread final : public QThread { 64class EmuThread final : public QThread {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index bb731276e..a85eb4687 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -6,6 +6,7 @@
6#include <QSettings> 6#include <QSettings>
7#include "common/fs/fs.h" 7#include "common/fs/fs.h"
8#include "common/fs/path_util.h" 8#include "common/fs/path_util.h"
9#include "common/settings.h"
9#include "core/core.h" 10#include "core/core.h"
10#include "core/hle/service/acc/profile_manager.h" 11#include "core/hle/service/acc/profile_manager.h"
11#include "core/hle/service/hid/controllers/npad.h" 12#include "core/hle/service/hid/controllers/npad.h"
@@ -497,7 +498,7 @@ void Config::ReadCoreValues() {
497 qt_config->beginGroup(QStringLiteral("Core")); 498 qt_config->beginGroup(QStringLiteral("Core"));
498 499
499 ReadGlobalSetting(Settings::values.use_multi_core); 500 ReadGlobalSetting(Settings::values.use_multi_core);
500 ReadGlobalSetting(Settings::values.use_extended_memory_layout); 501 ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
501 502
502 qt_config->endGroup(); 503 qt_config->endGroup();
503} 504}
@@ -692,6 +693,7 @@ void Config::ReadRendererValues() {
692 qt_config->beginGroup(QStringLiteral("Renderer")); 693 qt_config->beginGroup(QStringLiteral("Renderer"));
693 694
694 ReadGlobalSetting(Settings::values.renderer_backend); 695 ReadGlobalSetting(Settings::values.renderer_backend);
696 ReadGlobalSetting(Settings::values.async_presentation);
695 ReadGlobalSetting(Settings::values.renderer_force_max_clock); 697 ReadGlobalSetting(Settings::values.renderer_force_max_clock);
696 ReadGlobalSetting(Settings::values.vulkan_device); 698 ReadGlobalSetting(Settings::values.vulkan_device);
697 ReadGlobalSetting(Settings::values.fullscreen_mode); 699 ReadGlobalSetting(Settings::values.fullscreen_mode);
@@ -708,17 +710,19 @@ void Config::ReadRendererValues() {
708 ReadGlobalSetting(Settings::values.nvdec_emulation); 710 ReadGlobalSetting(Settings::values.nvdec_emulation);
709 ReadGlobalSetting(Settings::values.accelerate_astc); 711 ReadGlobalSetting(Settings::values.accelerate_astc);
710 ReadGlobalSetting(Settings::values.async_astc); 712 ReadGlobalSetting(Settings::values.async_astc);
711 ReadGlobalSetting(Settings::values.use_vsync);
712 ReadGlobalSetting(Settings::values.shader_backend); 713 ReadGlobalSetting(Settings::values.shader_backend);
713 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 714 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
714 ReadGlobalSetting(Settings::values.use_fast_gpu_time); 715 ReadGlobalSetting(Settings::values.use_fast_gpu_time);
715 ReadGlobalSetting(Settings::values.use_pessimistic_flushes);
716 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); 716 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
717 ReadGlobalSetting(Settings::values.bg_red); 717 ReadGlobalSetting(Settings::values.bg_red);
718 ReadGlobalSetting(Settings::values.bg_green); 718 ReadGlobalSetting(Settings::values.bg_green);
719 ReadGlobalSetting(Settings::values.bg_blue); 719 ReadGlobalSetting(Settings::values.bg_blue);
720 720
721 if (global) { 721 if (global) {
722 Settings::values.vsync_mode.SetValue(static_cast<Settings::VSyncMode>(
723 ReadSetting(QString::fromStdString(Settings::values.vsync_mode.GetLabel()),
724 static_cast<u32>(Settings::values.vsync_mode.GetDefault()))
725 .value<u32>()));
722 ReadBasicSetting(Settings::values.renderer_debug); 726 ReadBasicSetting(Settings::values.renderer_debug);
723 ReadBasicSetting(Settings::values.renderer_shader_feedback); 727 ReadBasicSetting(Settings::values.renderer_shader_feedback);
724 ReadBasicSetting(Settings::values.enable_nsight_aftermath); 728 ReadBasicSetting(Settings::values.enable_nsight_aftermath);
@@ -1161,7 +1165,7 @@ void Config::SaveCoreValues() {
1161 qt_config->beginGroup(QStringLiteral("Core")); 1165 qt_config->beginGroup(QStringLiteral("Core"));
1162 1166
1163 WriteGlobalSetting(Settings::values.use_multi_core); 1167 WriteGlobalSetting(Settings::values.use_multi_core);
1164 WriteGlobalSetting(Settings::values.use_extended_memory_layout); 1168 WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
1165 1169
1166 qt_config->endGroup(); 1170 qt_config->endGroup();
1167} 1171}
@@ -1313,6 +1317,7 @@ void Config::SaveRendererValues() {
1313 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), 1317 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
1314 static_cast<u32>(Settings::values.renderer_backend.GetDefault()), 1318 static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
1315 Settings::values.renderer_backend.UsingGlobal()); 1319 Settings::values.renderer_backend.UsingGlobal());
1320 WriteGlobalSetting(Settings::values.async_presentation);
1316 WriteGlobalSetting(Settings::values.renderer_force_max_clock); 1321 WriteGlobalSetting(Settings::values.renderer_force_max_clock);
1317 WriteGlobalSetting(Settings::values.vulkan_device); 1322 WriteGlobalSetting(Settings::values.vulkan_device);
1318 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), 1323 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
@@ -1350,20 +1355,21 @@ void Config::SaveRendererValues() {
1350 Settings::values.nvdec_emulation.UsingGlobal()); 1355 Settings::values.nvdec_emulation.UsingGlobal());
1351 WriteGlobalSetting(Settings::values.accelerate_astc); 1356 WriteGlobalSetting(Settings::values.accelerate_astc);
1352 WriteGlobalSetting(Settings::values.async_astc); 1357 WriteGlobalSetting(Settings::values.async_astc);
1353 WriteGlobalSetting(Settings::values.use_vsync);
1354 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), 1358 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
1355 static_cast<u32>(Settings::values.shader_backend.GetValue(global)), 1359 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
1356 static_cast<u32>(Settings::values.shader_backend.GetDefault()), 1360 static_cast<u32>(Settings::values.shader_backend.GetDefault()),
1357 Settings::values.shader_backend.UsingGlobal()); 1361 Settings::values.shader_backend.UsingGlobal());
1358 WriteGlobalSetting(Settings::values.use_asynchronous_shaders); 1362 WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
1359 WriteGlobalSetting(Settings::values.use_fast_gpu_time); 1363 WriteGlobalSetting(Settings::values.use_fast_gpu_time);
1360 WriteGlobalSetting(Settings::values.use_pessimistic_flushes);
1361 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); 1364 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
1362 WriteGlobalSetting(Settings::values.bg_red); 1365 WriteGlobalSetting(Settings::values.bg_red);
1363 WriteGlobalSetting(Settings::values.bg_green); 1366 WriteGlobalSetting(Settings::values.bg_green);
1364 WriteGlobalSetting(Settings::values.bg_blue); 1367 WriteGlobalSetting(Settings::values.bg_blue);
1365 1368
1366 if (global) { 1369 if (global) {
1370 WriteSetting(QString::fromStdString(Settings::values.vsync_mode.GetLabel()),
1371 static_cast<u32>(Settings::values.vsync_mode.GetValue()),
1372 static_cast<u32>(Settings::values.vsync_mode.GetDefault()));
1367 WriteBasicSetting(Settings::values.renderer_debug); 1373 WriteBasicSetting(Settings::values.renderer_debug);
1368 WriteBasicSetting(Settings::values.renderer_shader_feedback); 1374 WriteBasicSetting(Settings::values.renderer_shader_feedback);
1369 WriteBasicSetting(Settings::values.enable_nsight_aftermath); 1375 WriteBasicSetting(Settings::values.enable_nsight_aftermath);
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 207bcdc4d..26258d744 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() {
35 35
36 ui->use_multi_core->setEnabled(runtime_lock); 36 ui->use_multi_core->setEnabled(runtime_lock);
37 ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); 37 ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue());
38 ui->use_extended_memory_layout->setEnabled(runtime_lock);
39 ui->use_extended_memory_layout->setChecked(
40 Settings::values.use_extended_memory_layout.GetValue());
41 38
42 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); 39 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue());
43 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); 40 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue());
@@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() {
79void ConfigureGeneral::ApplyConfiguration() { 76void ConfigureGeneral::ApplyConfiguration() {
80 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, 77 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core,
81 use_multi_core); 78 use_multi_core);
82 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout,
83 ui->use_extended_memory_layout,
84 use_extended_memory_layout);
85 79
86 if (Settings::IsConfiguringGlobal()) { 80 if (Settings::IsConfiguringGlobal()) {
87 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); 81 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
@@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() {
141 Settings::values.use_speed_limit, use_speed_limit); 135 Settings::values.use_speed_limit, use_speed_limit);
142 ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, 136 ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core,
143 use_multi_core); 137 use_multi_core);
144 ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout,
145 Settings::values.use_extended_memory_layout,
146 use_extended_memory_layout);
147 138
148 connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { 139 connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() {
149 ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && 140 ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() &&
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h
index a090c1a3f..7ff63f425 100644
--- a/src/yuzu/configuration/configure_general.h
+++ b/src/yuzu/configuration/configure_general.h
@@ -47,7 +47,6 @@ private:
47 47
48 ConfigurationShared::CheckState use_speed_limit; 48 ConfigurationShared::CheckState use_speed_limit;
49 ConfigurationShared::CheckState use_multi_core; 49 ConfigurationShared::CheckState use_multi_core;
50 ConfigurationShared::CheckState use_extended_memory_layout;
51 50
52 const Core::System& system; 51 const Core::System& system;
53}; 52};
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index add110bb0..986a1625b 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -62,13 +62,6 @@
62 </widget> 62 </widget>
63 </item> 63 </item>
64 <item> 64 <item>
65 <widget class="QCheckBox" name="use_extended_memory_layout">
66 <property name="text">
67 <string>Extended memory layout (8GB DRAM)</string>
68 </property>
69 </widget>
70 </item>
71 <item>
72 <widget class="QCheckBox" name="toggle_check_exit"> 65 <widget class="QCheckBox" name="toggle_check_exit">
73 <property name="text"> 66 <property name="text">
74 <string>Confirm exit while emulation is running</string> 67 <string>Confirm exit while emulation is running</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index e9388daad..76e5b7499 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -4,20 +4,76 @@
4// Include this early to include Vulkan headers how we want to 4// Include this early to include Vulkan headers how we want to
5#include "video_core/vulkan_common/vulkan_wrapper.h" 5#include "video_core/vulkan_common/vulkan_wrapper.h"
6 6
7#include <algorithm>
8#include <iosfwd>
9#include <iterator>
10#include <string>
11#include <tuple>
12#include <utility>
13#include <vector>
14#include <QBoxLayout>
15#include <QCheckBox>
7#include <QColorDialog> 16#include <QColorDialog>
8#include <QVulkanInstance> 17#include <QComboBox>
18#include <QIcon>
19#include <QLabel>
20#include <QPixmap>
21#include <QPushButton>
22#include <QSlider>
23#include <QStringLiteral>
24#include <QtCore/qobjectdefs.h>
25#include <qcoreevent.h>
26#include <qglobal.h>
27#include <vulkan/vulkan_core.h>
9 28
10#include "common/common_types.h" 29#include "common/common_types.h"
30#include "common/dynamic_library.h"
11#include "common/logging/log.h" 31#include "common/logging/log.h"
12#include "common/settings.h" 32#include "common/settings.h"
13#include "core/core.h" 33#include "core/core.h"
14#include "ui_configure_graphics.h" 34#include "ui_configure_graphics.h"
15#include "video_core/vulkan_common/vulkan_instance.h" 35#include "video_core/vulkan_common/vulkan_instance.h"
16#include "video_core/vulkan_common/vulkan_library.h" 36#include "video_core/vulkan_common/vulkan_library.h"
37#include "video_core/vulkan_common/vulkan_surface.h"
17#include "yuzu/configuration/configuration_shared.h" 38#include "yuzu/configuration/configuration_shared.h"
18#include "yuzu/configuration/configure_graphics.h" 39#include "yuzu/configuration/configure_graphics.h"
40#include "yuzu/qt_common.h"
19#include "yuzu/uisettings.h" 41#include "yuzu/uisettings.h"
20 42
43static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR,
44 VK_PRESENT_MODE_FIFO_KHR};
45
46// Converts a setting to a present mode (or vice versa)
47static constexpr VkPresentModeKHR VSyncSettingToMode(Settings::VSyncMode mode) {
48 switch (mode) {
49 case Settings::VSyncMode::Immediate:
50 return VK_PRESENT_MODE_IMMEDIATE_KHR;
51 case Settings::VSyncMode::Mailbox:
52 return VK_PRESENT_MODE_MAILBOX_KHR;
53 case Settings::VSyncMode::FIFO:
54 return VK_PRESENT_MODE_FIFO_KHR;
55 case Settings::VSyncMode::FIFORelaxed:
56 return VK_PRESENT_MODE_FIFO_RELAXED_KHR;
57 default:
58 return VK_PRESENT_MODE_FIFO_KHR;
59 }
60}
61
62static constexpr Settings::VSyncMode PresentModeToSetting(VkPresentModeKHR mode) {
63 switch (mode) {
64 case VK_PRESENT_MODE_IMMEDIATE_KHR:
65 return Settings::VSyncMode::Immediate;
66 case VK_PRESENT_MODE_MAILBOX_KHR:
67 return Settings::VSyncMode::Mailbox;
68 case VK_PRESENT_MODE_FIFO_KHR:
69 return Settings::VSyncMode::FIFO;
70 case VK_PRESENT_MODE_FIFO_RELAXED_KHR:
71 return Settings::VSyncMode::FIFORelaxed;
72 default:
73 return Settings::VSyncMode::FIFO;
74 }
75}
76
21ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* parent) 77ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* parent)
22 : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, system{system_} { 78 : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, system{system_} {
23 vulkan_device = Settings::values.vulkan_device.GetValue(); 79 vulkan_device = Settings::values.vulkan_device.GetValue();
@@ -39,13 +95,16 @@ ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* paren
39 95
40 connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] { 96 connect(ui->api, qOverload<int>(&QComboBox::currentIndexChanged), this, [this] {
41 UpdateAPILayout(); 97 UpdateAPILayout();
98 PopulateVSyncModeSelection();
42 if (!Settings::IsConfiguringGlobal()) { 99 if (!Settings::IsConfiguringGlobal()) {
43 ConfigurationShared::SetHighlight( 100 ConfigurationShared::SetHighlight(
44 ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); 101 ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX);
45 } 102 }
46 }); 103 });
47 connect(ui->device, qOverload<int>(&QComboBox::activated), this, 104 connect(ui->device, qOverload<int>(&QComboBox::activated), this, [this](int device) {
48 [this](int device) { UpdateDeviceSelection(device); }); 105 UpdateDeviceSelection(device);
106 PopulateVSyncModeSelection();
107 });
49 connect(ui->backend, qOverload<int>(&QComboBox::activated), this, 108 connect(ui->backend, qOverload<int>(&QComboBox::activated), this,
50 [this](int backend) { UpdateShaderBackendSelection(backend); }); 109 [this](int backend) { UpdateShaderBackendSelection(backend); });
51 110
@@ -70,6 +129,43 @@ ConfigureGraphics::ConfigureGraphics(const Core::System& system_, QWidget* paren
70 ui->fsr_sharpening_label->setVisible(Settings::IsConfiguringGlobal()); 129 ui->fsr_sharpening_label->setVisible(Settings::IsConfiguringGlobal());
71} 130}
72 131
132void ConfigureGraphics::PopulateVSyncModeSelection() {
133 const Settings::RendererBackend backend{GetCurrentGraphicsBackend()};
134 if (backend == Settings::RendererBackend::Null) {
135 ui->vsync_mode_combobox->setEnabled(false);
136 return;
137 }
138 ui->vsync_mode_combobox->setEnabled(true);
139
140 const int current_index = //< current selected vsync mode from combobox
141 ui->vsync_mode_combobox->currentIndex();
142 const auto current_mode = //< current selected vsync mode as a VkPresentModeKHR
143 current_index == -1 ? VSyncSettingToMode(Settings::values.vsync_mode.GetValue())
144 : vsync_mode_combobox_enum_map[current_index];
145 int index{};
146 const int device{ui->device->currentIndex()}; //< current selected Vulkan device
147 const auto& present_modes = //< relevant vector of present modes for the selected device or API
148 backend == Settings::RendererBackend::Vulkan ? device_present_modes[device]
149 : default_present_modes;
150
151 ui->vsync_mode_combobox->clear();
152 vsync_mode_combobox_enum_map.clear();
153 vsync_mode_combobox_enum_map.reserve(present_modes.size());
154 for (const auto present_mode : present_modes) {
155 const auto mode_name = TranslateVSyncMode(present_mode, backend);
156 if (mode_name.isEmpty()) {
157 continue;
158 }
159
160 ui->vsync_mode_combobox->insertItem(index, mode_name);
161 vsync_mode_combobox_enum_map.push_back(present_mode);
162 if (present_mode == current_mode) {
163 ui->vsync_mode_combobox->setCurrentIndex(index);
164 }
165 index++;
166 }
167}
168
73void ConfigureGraphics::UpdateDeviceSelection(int device) { 169void ConfigureGraphics::UpdateDeviceSelection(int device) {
74 if (device == -1) { 170 if (device == -1) {
75 return; 171 return;
@@ -99,6 +195,9 @@ void ConfigureGraphics::SetConfiguration() {
99 ui->nvdec_emulation_widget->setEnabled(runtime_lock); 195 ui->nvdec_emulation_widget->setEnabled(runtime_lock);
100 ui->resolution_combobox->setEnabled(runtime_lock); 196 ui->resolution_combobox->setEnabled(runtime_lock);
101 ui->accelerate_astc->setEnabled(runtime_lock); 197 ui->accelerate_astc->setEnabled(runtime_lock);
198 ui->vsync_mode_layout->setEnabled(runtime_lock ||
199 Settings::values.renderer_backend.GetValue() ==
200 Settings::RendererBackend::Vulkan);
102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); 201 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
103 ui->use_asynchronous_gpu_emulation->setChecked( 202 ui->use_asynchronous_gpu_emulation->setChecked(
104 Settings::values.use_asynchronous_gpu_emulation.GetValue()); 203 Settings::values.use_asynchronous_gpu_emulation.GetValue());
@@ -170,7 +269,24 @@ void ConfigureGraphics::SetConfiguration() {
170 Settings::values.bg_green.GetValue(), 269 Settings::values.bg_green.GetValue(),
171 Settings::values.bg_blue.GetValue())); 270 Settings::values.bg_blue.GetValue()));
172 UpdateAPILayout(); 271 UpdateAPILayout();
272 PopulateVSyncModeSelection(); //< must happen after UpdateAPILayout
173 SetFSRIndicatorText(ui->fsr_sharpening_slider->sliderPosition()); 273 SetFSRIndicatorText(ui->fsr_sharpening_slider->sliderPosition());
274
275 // VSync setting needs to be determined after populating the VSync combobox
276 if (Settings::IsConfiguringGlobal()) {
277 const auto vsync_mode_setting = Settings::values.vsync_mode.GetValue();
278 const auto vsync_mode = VSyncSettingToMode(vsync_mode_setting);
279 int index{};
280 for (const auto mode : vsync_mode_combobox_enum_map) {
281 if (mode == vsync_mode) {
282 break;
283 }
284 index++;
285 }
286 if (static_cast<unsigned long>(index) < vsync_mode_combobox_enum_map.size()) {
287 ui->vsync_mode_combobox->setCurrentIndex(index);
288 }
289 }
174} 290}
175 291
176void ConfigureGraphics::SetFSRIndicatorText(int percentage) { 292void ConfigureGraphics::SetFSRIndicatorText(int percentage) {
@@ -178,6 +294,27 @@ void ConfigureGraphics::SetFSRIndicatorText(int percentage) {
178 tr("%1%", "FSR sharpening percentage (e.g. 50%)").arg(100 - (percentage / 2))); 294 tr("%1%", "FSR sharpening percentage (e.g. 50%)").arg(100 - (percentage / 2)));
179} 295}
180 296
297const QString ConfigureGraphics::TranslateVSyncMode(VkPresentModeKHR mode,
298 Settings::RendererBackend backend) const {
299 switch (mode) {
300 case VK_PRESENT_MODE_IMMEDIATE_KHR:
301 return backend == Settings::RendererBackend::OpenGL
302 ? tr("Off")
303 : QStringLiteral("Immediate (%1)").arg(tr("VSync Off"));
304 case VK_PRESENT_MODE_MAILBOX_KHR:
305 return QStringLiteral("Mailbox (%1)").arg(tr("Recommended"));
306 case VK_PRESENT_MODE_FIFO_KHR:
307 return backend == Settings::RendererBackend::OpenGL
308 ? tr("On")
309 : QStringLiteral("FIFO (%1)").arg(tr("VSync On"));
310 case VK_PRESENT_MODE_FIFO_RELAXED_KHR:
311 return QStringLiteral("FIFO Relaxed");
312 default:
313 return {};
314 break;
315 }
316}
317
181void ConfigureGraphics::ApplyConfiguration() { 318void ConfigureGraphics::ApplyConfiguration() {
182 const auto resolution_setup = static_cast<Settings::ResolutionSetup>( 319 const auto resolution_setup = static_cast<Settings::ResolutionSetup>(
183 ui->resolution_combobox->currentIndex() - 320 ui->resolution_combobox->currentIndex() -
@@ -232,6 +369,10 @@ void ConfigureGraphics::ApplyConfiguration() {
232 Settings::values.anti_aliasing.SetValue(anti_aliasing); 369 Settings::values.anti_aliasing.SetValue(anti_aliasing);
233 } 370 }
234 Settings::values.fsr_sharpening_slider.SetValue(ui->fsr_sharpening_slider->value()); 371 Settings::values.fsr_sharpening_slider.SetValue(ui->fsr_sharpening_slider->value());
372
373 const auto mode = vsync_mode_combobox_enum_map[ui->vsync_mode_combobox->currentIndex()];
374 const auto vsync_mode = PresentModeToSetting(mode);
375 Settings::values.vsync_mode.SetValue(vsync_mode);
235 } else { 376 } else {
236 if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { 377 if (ui->resolution_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
237 Settings::values.resolution_setup.SetGlobal(true); 378 Settings::values.resolution_setup.SetGlobal(true);
@@ -345,7 +486,9 @@ void ConfigureGraphics::UpdateAPILayout() {
345 ui->backend_widget->setVisible(true); 486 ui->backend_widget->setVisible(true);
346 break; 487 break;
347 case Settings::RendererBackend::Vulkan: 488 case Settings::RendererBackend::Vulkan:
348 ui->device->setCurrentIndex(vulkan_device); 489 if (static_cast<int>(vulkan_device) < ui->device->count()) {
490 ui->device->setCurrentIndex(vulkan_device);
491 }
349 ui->device_widget->setVisible(true); 492 ui->device_widget->setVisible(true);
350 ui->backend_widget->setVisible(false); 493 ui->backend_widget->setVisible(false);
351 break; 494 break;
@@ -363,16 +506,27 @@ void ConfigureGraphics::RetrieveVulkanDevices() try {
363 506
364 using namespace Vulkan; 507 using namespace Vulkan;
365 508
509 auto* window = this->window()->windowHandle();
510 auto wsi = QtCommon::GetWindowSystemInfo(window);
511
366 vk::InstanceDispatch dld; 512 vk::InstanceDispatch dld;
367 const Common::DynamicLibrary library = OpenLibrary(); 513 const Common::DynamicLibrary library = OpenLibrary();
368 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_1); 514 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_1, wsi.type);
369 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); 515 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
516 vk::SurfaceKHR surface = //< needed to view present modes for a device
517 CreateSurface(instance, wsi);
370 518
371 vulkan_devices.clear(); 519 vulkan_devices.clear();
372 vulkan_devices.reserve(physical_devices.size()); 520 vulkan_devices.reserve(physical_devices.size());
521 device_present_modes.clear();
522 device_present_modes.reserve(physical_devices.size());
373 for (const VkPhysicalDevice device : physical_devices) { 523 for (const VkPhysicalDevice device : physical_devices) {
374 const std::string name = vk::PhysicalDevice(device, dld).GetProperties().deviceName; 524 const auto physical_device = vk::PhysicalDevice(device, dld);
525 const std::string name = physical_device.GetProperties().deviceName;
526 const std::vector<VkPresentModeKHR> present_modes =
527 physical_device.GetSurfacePresentModesKHR(*surface);
375 vulkan_devices.push_back(QString::fromStdString(name)); 528 vulkan_devices.push_back(QString::fromStdString(name));
529 device_present_modes.push_back(present_modes);
376 } 530 }
377} catch (const Vulkan::vk::Exception& exception) { 531} catch (const Vulkan::vk::Exception& exception) {
378 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); 532 LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what());
@@ -465,4 +619,6 @@ void ConfigureGraphics::SetupPerGameUI() {
465 ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); 619 ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
466 ConfigurationShared::InsertGlobalItem( 620 ConfigurationShared::InsertGlobalItem(
467 ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true))); 621 ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
622
623 ui->vsync_mode_layout->setVisible(false);
468} 624}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index d98d6624e..901f604a5 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -5,9 +5,21 @@
5 5
6#include <memory> 6#include <memory>
7#include <vector> 7#include <vector>
8#include <QColor>
8#include <QString> 9#include <QString>
9#include <QWidget> 10#include <QWidget>
10#include "common/settings.h" 11#include <qobjectdefs.h>
12#include <vulkan/vulkan_core.h>
13#include "common/common_types.h"
14
15class QEvent;
16class QObject;
17
18namespace Settings {
19enum class NvdecEmulation : u32;
20enum class RendererBackend : u32;
21enum class ShaderBackend : u32;
22} // namespace Settings
11 23
12namespace Core { 24namespace Core {
13class System; 25class System;
@@ -35,6 +47,7 @@ private:
35 void changeEvent(QEvent* event) override; 47 void changeEvent(QEvent* event) override;
36 void RetranslateUI(); 48 void RetranslateUI();
37 49
50 void PopulateVSyncModeSelection();
38 void UpdateBackgroundColorButton(QColor color); 51 void UpdateBackgroundColorButton(QColor color);
39 void UpdateAPILayout(); 52 void UpdateAPILayout();
40 void UpdateDeviceSelection(int device); 53 void UpdateDeviceSelection(int device);
@@ -43,6 +56,10 @@ private:
43 void RetrieveVulkanDevices(); 56 void RetrieveVulkanDevices();
44 57
45 void SetFSRIndicatorText(int percentage); 58 void SetFSRIndicatorText(int percentage);
59 /* Turns a Vulkan present mode into a textual string for a UI
60 * (and eventually for a human to read) */
61 const QString TranslateVSyncMode(VkPresentModeKHR mode,
62 Settings::RendererBackend backend) const;
46 63
47 void SetupPerGameUI(); 64 void SetupPerGameUI();
48 65
@@ -58,6 +75,10 @@ private:
58 ConfigurationShared::CheckState use_asynchronous_gpu_emulation; 75 ConfigurationShared::CheckState use_asynchronous_gpu_emulation;
59 76
60 std::vector<QString> vulkan_devices; 77 std::vector<QString> vulkan_devices;
78 std::vector<std::vector<VkPresentModeKHR>> device_present_modes;
79 std::vector<VkPresentModeKHR>
80 vsync_mode_combobox_enum_map; //< Keeps track of which present mode corresponds to which
81 // selection in the combobox
61 u32 vulkan_device{}; 82 u32 vulkan_device{};
62 Settings::ShaderBackend shader_backend{}; 83 Settings::ShaderBackend shader_backend{};
63 84
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index a45ec69ec..39f70e406 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -189,6 +189,44 @@
189 </widget> 189 </widget>
190 </item> 190 </item>
191 <item> 191 <item>
192 <widget class="QWidget" name="vsync_mode_layout" native="true">
193 <layout class="QHBoxLayout" name="horizontalLayout_4">
194 <property name="leftMargin">
195 <number>0</number>
196 </property>
197 <property name="topMargin">
198 <number>0</number>
199 </property>
200 <property name="rightMargin">
201 <number>0</number>
202 </property>
203 <property name="bottomMargin">
204 <number>0</number>
205 </property>
206 <item>
207 <widget class="QLabel" name="vsync_mode_label">
208 <property name="text">
209 <string>VSync Mode:</string>
210 </property>
211 </widget>
212 </item>
213 <item>
214 <widget class="QComboBox" name="vsync_mode_combobox">
215 <property name="toolTip">
216 <string>FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen refresh rate.
217FIFO Relaxed is similar to FIFO but allows tearing as it recovers from a slow down.
218Mailbox can have lower latency than FIFO and does not tear but may drop frames.
219Immediate (no synchronization) just presents whatever is available and can exhibit tearing.</string>
220 </property>
221 <property name="currentText">
222 <string/>
223 </property>
224 </widget>
225 </item>
226 </layout>
227 </widget>
228 </item>
229 <item>
192 <widget class="QWidget" name="nvdec_emulation_widget" native="true"> 230 <widget class="QWidget" name="nvdec_emulation_widget" native="true">
193 <layout class="QHBoxLayout" name="nvdec_emulation_layout"> 231 <layout class="QHBoxLayout" name="nvdec_emulation_layout">
194 <property name="leftMargin"> 232 <property name="leftMargin">
@@ -366,7 +404,7 @@
366 </item> 404 </item>
367 <item> 405 <item>
368 <property name="text"> 406 <property name="text">
369 <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string> 407 <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string>
370 </property> 408 </property>
371 </item> 409 </item>
372 <item> 410 <item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 59fb1b334..005b022ca 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -21,18 +21,17 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
21 21
22void ConfigureGraphicsAdvanced::SetConfiguration() { 22void ConfigureGraphicsAdvanced::SetConfiguration() {
23 const bool runtime_lock = !system.IsPoweredOn(); 23 const bool runtime_lock = !system.IsPoweredOn();
24 ui->use_vsync->setEnabled(runtime_lock); 24 ui->async_present->setEnabled(runtime_lock);
25 ui->renderer_force_max_clock->setEnabled(runtime_lock); 25 ui->renderer_force_max_clock->setEnabled(runtime_lock);
26 ui->async_astc->setEnabled(runtime_lock); 26 ui->async_astc->setEnabled(runtime_lock);
27 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 27 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
28 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 28 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
29 29
30 ui->async_present->setChecked(Settings::values.async_presentation.GetValue());
30 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); 31 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
31 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
32 ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); 32 ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
33 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 33 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
34 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 34 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
35 ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
36 ui->use_vulkan_driver_pipeline_cache->setChecked( 35 ui->use_vulkan_driver_pipeline_cache->setChecked(
37 Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); 36 Settings::values.use_vulkan_driver_pipeline_cache.GetValue());
38 37
@@ -54,12 +53,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
54 53
55void ConfigureGraphicsAdvanced::ApplyConfiguration() { 54void ConfigureGraphicsAdvanced::ApplyConfiguration() {
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); 55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
57 ui->async_present, async_present);
57 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, 58 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
58 ui->renderer_force_max_clock, 59 ui->renderer_force_max_clock,
59 renderer_force_max_clock); 60 renderer_force_max_clock);
60 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, 61 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
61 ui->anisotropic_filtering_combobox); 62 ui->anisotropic_filtering_combobox);
62 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
63 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, 63 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
64 async_astc); 64 async_astc);
65 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 65 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
@@ -67,8 +67,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
67 use_asynchronous_shaders); 67 use_asynchronous_shaders);
68 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, 68 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
69 ui->use_fast_gpu_time, use_fast_gpu_time); 69 ui->use_fast_gpu_time, use_fast_gpu_time);
70 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes,
71 ui->use_pessimistic_flushes, use_pessimistic_flushes);
72 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, 70 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache,
73 ui->use_vulkan_driver_pipeline_cache, 71 ui->use_vulkan_driver_pipeline_cache,
74 use_vulkan_driver_pipeline_cache); 72 use_vulkan_driver_pipeline_cache);
@@ -90,15 +88,13 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
90 // Disable if not global (only happens during game) 88 // Disable if not global (only happens during game)
91 if (Settings::IsConfiguringGlobal()) { 89 if (Settings::IsConfiguringGlobal()) {
92 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 90 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
91 ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal());
93 ui->renderer_force_max_clock->setEnabled( 92 ui->renderer_force_max_clock->setEnabled(
94 Settings::values.renderer_force_max_clock.UsingGlobal()); 93 Settings::values.renderer_force_max_clock.UsingGlobal());
95 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
96 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); 94 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
97 ui->use_asynchronous_shaders->setEnabled( 95 ui->use_asynchronous_shaders->setEnabled(
98 Settings::values.use_asynchronous_shaders.UsingGlobal()); 96 Settings::values.use_asynchronous_shaders.UsingGlobal());
99 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 97 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
100 ui->use_pessimistic_flushes->setEnabled(
101 Settings::values.use_pessimistic_flushes.UsingGlobal());
102 ui->use_vulkan_driver_pipeline_cache->setEnabled( 98 ui->use_vulkan_driver_pipeline_cache->setEnabled(
103 Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); 99 Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal());
104 ui->anisotropic_filtering_combobox->setEnabled( 100 ui->anisotropic_filtering_combobox->setEnabled(
@@ -107,10 +103,11 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
107 return; 103 return;
108 } 104 }
109 105
106 ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation,
107 async_present);
110 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, 108 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
111 Settings::values.renderer_force_max_clock, 109 Settings::values.renderer_force_max_clock,
112 renderer_force_max_clock); 110 renderer_force_max_clock);
113 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
114 ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc, 111 ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc,
115 async_astc); 112 async_astc);
116 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, 113 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
@@ -118,9 +115,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
118 use_asynchronous_shaders); 115 use_asynchronous_shaders);
119 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, 116 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
120 Settings::values.use_fast_gpu_time, use_fast_gpu_time); 117 Settings::values.use_fast_gpu_time, use_fast_gpu_time);
121 ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes,
122 Settings::values.use_pessimistic_flushes,
123 use_pessimistic_flushes);
124 ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, 118 ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache,
125 Settings::values.use_vulkan_driver_pipeline_cache, 119 Settings::values.use_vulkan_driver_pipeline_cache,
126 use_vulkan_driver_pipeline_cache); 120 use_vulkan_driver_pipeline_cache);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index bf1b04749..ff5060957 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -36,12 +36,12 @@ private:
36 36
37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
38 38
39 ConfigurationShared::CheckState async_present;
39 ConfigurationShared::CheckState renderer_force_max_clock; 40 ConfigurationShared::CheckState renderer_force_max_clock;
40 ConfigurationShared::CheckState use_vsync; 41 ConfigurationShared::CheckState use_vsync;
41 ConfigurationShared::CheckState async_astc; 42 ConfigurationShared::CheckState async_astc;
42 ConfigurationShared::CheckState use_asynchronous_shaders; 43 ConfigurationShared::CheckState use_asynchronous_shaders;
43 ConfigurationShared::CheckState use_fast_gpu_time; 44 ConfigurationShared::CheckState use_fast_gpu_time;
44 ConfigurationShared::CheckState use_pessimistic_flushes;
45 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; 45 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
46 46
47 const Core::System& system; 47 const Core::System& system;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index a7dbdc18c..d073fe9b1 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -7,7 +7,7 @@
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>404</width> 9 <width>404</width>
10 <height>321</height> 10 <height>376</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="windowTitle"> 13 <property name="windowTitle">
@@ -70,22 +70,19 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QCheckBox" name="renderer_force_max_clock"> 73 <widget class="QCheckBox" name="async_present">
74 <property name="toolTip">
75 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
76 </property>
77 <property name="text"> 74 <property name="text">
78 <string>Force maximum clocks (Vulkan only)</string> 75 <string>Enable asynchronous presentation (Vulkan only)</string>
79 </property> 76 </property>
80 </widget> 77 </widget>
81 </item> 78 </item>
82 <item> 79 <item>
83 <widget class="QCheckBox" name="use_vsync"> 80 <widget class="QCheckBox" name="renderer_force_max_clock">
84 <property name="toolTip"> 81 <property name="toolTip">
85 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> 82 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
86 </property> 83 </property>
87 <property name="text"> 84 <property name="text">
88 <string>Use VSync</string> 85 <string>Force maximum clocks (Vulkan only)</string>
89 </property> 86 </property>
90 </widget> 87 </widget>
91 </item> 88 </item>
@@ -112,7 +109,7 @@
112 <item> 109 <item>
113 <widget class="QCheckBox" name="use_fast_gpu_time"> 110 <widget class="QCheckBox" name="use_fast_gpu_time">
114 <property name="toolTip"> 111 <property name="toolTip">
115 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> 112 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
116 </property> 113 </property>
117 <property name="text"> 114 <property name="text">
118 <string>Use Fast GPU Time (Hack)</string> 115 <string>Use Fast GPU Time (Hack)</string>
@@ -120,19 +117,9 @@
120 </widget> 117 </widget>
121 </item> 118 </item>
122 <item> 119 <item>
123 <widget class="QCheckBox" name="use_pessimistic_flushes">
124 <property name="toolTip">
125 <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
126 </property>
127 <property name="text">
128 <string>Use pessimistic buffer flushes (Hack)</string>
129 </property>
130 </widget>
131 </item>
132 <item>
133 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> 120 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
134 <property name="toolTip"> 121 <property name="toolTip">
135 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> 122 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
136 </property> 123 </property>
137 <property name="text"> 124 <property name="text">
138 <string>Use Vulkan pipeline cache</string> 125 <string>Use Vulkan pipeline cache</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 50b62293e..561a08dc5 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -8,6 +8,7 @@
8#include <QInputDialog> 8#include <QInputDialog>
9#include <QMenu> 9#include <QMenu>
10#include <QMessageBox> 10#include <QMessageBox>
11#include <QMouseEvent>
11#include <QTimer> 12#include <QTimer>
12#include "common/assert.h" 13#include "common/assert.h"
13#include "common/param_package.h" 14#include "common/param_package.h"
@@ -206,7 +207,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) {
206 } 207 }
207 if (param.Has("axis")) { 208 if (param.Has("axis")) {
208 const QString axis = QString::fromStdString(param.Get("axis", "")); 209 const QString axis = QString::fromStdString(param.Get("axis", ""));
209 return QObject::tr("%1%2Axis %3").arg(toggle, invert, axis); 210 return QObject::tr("%1%2%3Axis %4").arg(toggle, inverted, invert, axis);
210 } 211 }
211 if (param.Has("axis_x") && param.Has("axis_y") && param.Has("axis_z")) { 212 if (param.Has("axis_x") && param.Has("axis_y") && param.Has("axis_z")) {
212 const QString axis_x = QString::fromStdString(param.Get("axis_x", "")); 213 const QString axis_x = QString::fromStdString(param.Get("axis_x", ""));
@@ -229,7 +230,7 @@ QString ConfigureInputPlayer::ButtonToText(const Common::ParamPackage& param) {
229 return QObject::tr("%1%2%3Hat %4").arg(turbo, toggle, inverted, button_name); 230 return QObject::tr("%1%2%3Hat %4").arg(turbo, toggle, inverted, button_name);
230 } 231 }
231 if (param.Has("axis")) { 232 if (param.Has("axis")) {
232 return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name); 233 return QObject::tr("%1%2%3Axis %4").arg(toggle, inverted, invert, button_name);
233 } 234 }
234 if (param.Has("motion")) { 235 if (param.Has("motion")) {
235 return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name); 236 return QObject::tr("%1%2Axis %3").arg(toggle, inverted, button_name);
@@ -410,6 +411,12 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
410 button_map[button_id]->setText(ButtonToText(param)); 411 button_map[button_id]->setText(ButtonToText(param));
411 emulated_controller->SetButtonParam(button_id, param); 412 emulated_controller->SetButtonParam(button_id, param);
412 }); 413 });
414 context_menu.addAction(tr("Invert button"), [&] {
415 const bool invert_value = !param.Get("inverted", false);
416 param.Set("inverted", invert_value);
417 button_map[button_id]->setText(ButtonToText(param));
418 emulated_controller->SetButtonParam(button_id, param);
419 });
413 context_menu.addAction(tr("Set threshold"), [&] { 420 context_menu.addAction(tr("Set threshold"), [&] {
414 const int button_threshold = 421 const int button_threshold =
415 static_cast<int>(param.Get("threshold", 0.5f) * 100.0f); 422 static_cast<int>(param.Get("threshold", 0.5f) * 100.0f);
diff --git a/src/yuzu/configuration/configure_input_player_widget.cpp b/src/yuzu/configuration/configure_input_player_widget.cpp
index c287220fc..fe1ee2289 100644
--- a/src/yuzu/configuration/configure_input_player_widget.cpp
+++ b/src/yuzu/configuration/configure_input_player_widget.cpp
@@ -180,6 +180,10 @@ void PlayerControlPreview::ControllerUpdate(Core::HID::ControllerTriggerType typ
180 battery_values = controller->GetBatteryValues(); 180 battery_values = controller->GetBatteryValues();
181 needs_redraw = true; 181 needs_redraw = true;
182 break; 182 break;
183 case Core::HID::ControllerTriggerType::Motion:
184 motion_values = controller->GetMotions();
185 needs_redraw = true;
186 break;
183 default: 187 default:
184 break; 188 break;
185 } 189 }
@@ -313,6 +317,15 @@ void PlayerControlPreview::DrawLeftController(QPainter& p, const QPointF center)
313 DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0)); 317 DrawRawJoystick(p, center + QPointF(-140, 90), QPointF(0, 0));
314 } 318 }
315 319
320 {
321 // Draw motion cubes
322 using namespace Settings::NativeMotion;
323 p.setPen(colors.outline);
324 p.setBrush(colors.transparent);
325 Draw3dCube(p, center + QPointF(-140, 90),
326 motion_values[Settings::NativeMotion::MotionLeft].euler, 20.0f);
327 }
328
316 using namespace Settings::NativeButton; 329 using namespace Settings::NativeButton;
317 330
318 // D-pad constants 331 // D-pad constants
@@ -435,6 +448,15 @@ void PlayerControlPreview::DrawRightController(QPainter& p, const QPointF center
435 DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90)); 448 DrawRawJoystick(p, QPointF(0, 0), center + QPointF(140, 90));
436 } 449 }
437 450
451 {
452 // Draw motion cubes
453 using namespace Settings::NativeMotion;
454 p.setPen(colors.outline);
455 p.setBrush(colors.transparent);
456 Draw3dCube(p, center + QPointF(140, 90),
457 motion_values[Settings::NativeMotion::MotionRight].euler, 20.0f);
458 }
459
438 using namespace Settings::NativeButton; 460 using namespace Settings::NativeButton;
439 461
440 // Face buttons constants 462 // Face buttons constants
@@ -555,6 +577,17 @@ void PlayerControlPreview::DrawDualController(QPainter& p, const QPointF center)
555 DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90)); 577 DrawRawJoystick(p, center + QPointF(-180, 90), center + QPointF(180, 90));
556 } 578 }
557 579
580 {
581 // Draw motion cubes
582 using namespace Settings::NativeMotion;
583 p.setPen(colors.outline);
584 p.setBrush(colors.transparent);
585 Draw3dCube(p, center + QPointF(-180, -5),
586 motion_values[Settings::NativeMotion::MotionLeft].euler, 20.0f);
587 Draw3dCube(p, center + QPointF(180, -5),
588 motion_values[Settings::NativeMotion::MotionRight].euler, 20.0f);
589 }
590
558 using namespace Settings::NativeButton; 591 using namespace Settings::NativeButton;
559 592
560 // Face buttons constants 593 // Face buttons constants
@@ -647,6 +680,15 @@ void PlayerControlPreview::DrawHandheldController(QPainter& p, const QPointF cen
647 DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0)); 680 DrawRawJoystick(p, center + QPointF(-50, 0), center + QPointF(50, 0));
648 } 681 }
649 682
683 {
684 // Draw motion cubes
685 using namespace Settings::NativeMotion;
686 p.setPen(colors.outline);
687 p.setBrush(colors.transparent);
688 Draw3dCube(p, center + QPointF(0, -115),
689 motion_values[Settings::NativeMotion::MotionLeft].euler, 15.0f);
690 }
691
650 using namespace Settings::NativeButton; 692 using namespace Settings::NativeButton;
651 693
652 // Face buttons constants 694 // Face buttons constants
@@ -750,6 +792,15 @@ void PlayerControlPreview::DrawProController(QPainter& p, const QPointF center)
750 DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105)); 792 DrawRawJoystick(p, center + QPointF(-50, 105), center + QPointF(50, 105));
751 } 793 }
752 794
795 {
796 // Draw motion cubes
797 using namespace Settings::NativeMotion;
798 p.setPen(colors.button);
799 p.setBrush(colors.transparent);
800 Draw3dCube(p, center + QPointF(0, -100),
801 motion_values[Settings::NativeMotion::MotionLeft].euler, 15.0f);
802 }
803
753 using namespace Settings::NativeButton; 804 using namespace Settings::NativeButton;
754 805
755 // Face buttons constants 806 // Face buttons constants
@@ -2871,6 +2922,46 @@ void PlayerControlPreview::DrawArrow(QPainter& p, const QPointF center, const Di
2871 DrawPolygon(p, arrow_symbol); 2922 DrawPolygon(p, arrow_symbol);
2872} 2923}
2873 2924
2925// Draw motion functions
2926void PlayerControlPreview::Draw3dCube(QPainter& p, QPointF center, const Common::Vec3f& euler,
2927 float size) {
2928 std::array<Common::Vec3f, 8> cube{
2929 Common::Vec3f{-1, -1, -1},
2930 {-1, 1, -1},
2931 {1, 1, -1},
2932 {1, -1, -1},
2933 {-1, -1, 1},
2934 {-1, 1, 1},
2935 {1, 1, 1},
2936 {1, -1, 1},
2937 };
2938
2939 for (Common::Vec3f& point : cube) {
2940 point.RotateFromOrigin(euler.x, euler.y, euler.z);
2941 point *= size;
2942 }
2943
2944 const std::array<QPointF, 4> front_face{
2945 center + QPointF{cube[0].x, cube[0].y},
2946 center + QPointF{cube[1].x, cube[1].y},
2947 center + QPointF{cube[2].x, cube[2].y},
2948 center + QPointF{cube[3].x, cube[3].y},
2949 };
2950 const std::array<QPointF, 4> back_face{
2951 center + QPointF{cube[4].x, cube[4].y},
2952 center + QPointF{cube[5].x, cube[5].y},
2953 center + QPointF{cube[6].x, cube[6].y},
2954 center + QPointF{cube[7].x, cube[7].y},
2955 };
2956
2957 DrawPolygon(p, front_face);
2958 DrawPolygon(p, back_face);
2959 p.drawLine(center + QPointF{cube[0].x, cube[0].y}, center + QPointF{cube[4].x, cube[4].y});
2960 p.drawLine(center + QPointF{cube[1].x, cube[1].y}, center + QPointF{cube[5].x, cube[5].y});
2961 p.drawLine(center + QPointF{cube[2].x, cube[2].y}, center + QPointF{cube[6].x, cube[6].y});
2962 p.drawLine(center + QPointF{cube[3].x, cube[3].y}, center + QPointF{cube[7].x, cube[7].y});
2963}
2964
2874template <size_t N> 2965template <size_t N>
2875void PlayerControlPreview::DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon) { 2966void PlayerControlPreview::DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon) {
2876 p.drawPolygon(polygon.data(), static_cast<int>(polygon.size())); 2967 p.drawPolygon(polygon.data(), static_cast<int>(polygon.size()));
diff --git a/src/yuzu/configuration/configure_input_player_widget.h b/src/yuzu/configuration/configure_input_player_widget.h
index 267d134de..a16943c3c 100644
--- a/src/yuzu/configuration/configure_input_player_widget.h
+++ b/src/yuzu/configuration/configure_input_player_widget.h
@@ -9,6 +9,7 @@
9 9
10#include "common/input.h" 10#include "common/input.h"
11#include "common/settings_input.h" 11#include "common/settings_input.h"
12#include "common/vector_math.h"
12#include "core/hid/emulated_controller.h" 13#include "core/hid/emulated_controller.h"
13#include "core/hid/hid_types.h" 14#include "core/hid/hid_types.h"
14 15
@@ -193,6 +194,9 @@ private:
193 void DrawSymbol(QPainter& p, QPointF center, Symbol symbol, float icon_size); 194 void DrawSymbol(QPainter& p, QPointF center, Symbol symbol, float icon_size);
194 void DrawArrow(QPainter& p, QPointF center, Direction direction, float size); 195 void DrawArrow(QPainter& p, QPointF center, Direction direction, float size);
195 196
197 // Draw motion functions
198 void Draw3dCube(QPainter& p, QPointF center, const Common::Vec3f& euler, float size);
199
196 // Draw primitive types 200 // Draw primitive types
197 template <size_t N> 201 template <size_t N>
198 void DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon); 202 void DrawPolygon(QPainter& p, const std::array<QPointF, N>& polygon);
@@ -222,4 +226,5 @@ private:
222 Core::HID::SticksValues stick_values{}; 226 Core::HID::SticksValues stick_values{};
223 Core::HID::TriggerValues trigger_values{}; 227 Core::HID::TriggerValues trigger_values{};
224 Core::HID::BatteryValues battery_values{}; 228 Core::HID::BatteryValues battery_values{};
229 Core::HID::MotionState motion_values{};
225}; 230};
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index 6af34f793..286ccc5cd 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() {
111 ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); 111 ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time));
112 ui->device_name_edit->setText( 112 ui->device_name_edit->setText(
113 QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); 113 QString::fromUtf8(Settings::values.device_name.GetValue().c_str()));
114 ui->use_unsafe_extended_memory_layout->setEnabled(enabled);
115 ui->use_unsafe_extended_memory_layout->setChecked(
116 Settings::values.use_unsafe_extended_memory_layout.GetValue());
114 117
115 if (Settings::IsConfiguringGlobal()) { 118 if (Settings::IsConfiguringGlobal()) {
116 ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); 119 ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue());
@@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() {
160 ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); 163 ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region);
161 ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, 164 ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index,
162 ui->combo_time_zone); 165 ui->combo_time_zone);
166 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout,
167 ui->use_unsafe_extended_memory_layout,
168 use_unsafe_extended_memory_layout);
163 169
164 if (Settings::IsConfiguringGlobal()) { 170 if (Settings::IsConfiguringGlobal()) {
165 // Guard if during game and set to game-specific value 171 // Guard if during game and set to game-specific value
@@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() {
215 Settings::values.rng_seed.GetValue().has_value(), 221 Settings::values.rng_seed.GetValue().has_value(),
216 Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); 222 Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed);
217 223
224 ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout,
225 Settings::values.use_unsafe_extended_memory_layout,
226 use_unsafe_extended_memory_layout);
227
218 ui->custom_rtc_checkbox->setVisible(false); 228 ui->custom_rtc_checkbox->setVisible(false);
219 ui->custom_rtc_edit->setVisible(false); 229 ui->custom_rtc_edit->setVisible(false);
220} 230}
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index ec28724a1..ce1a91601 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -41,6 +41,7 @@ private:
41 bool enabled = false; 41 bool enabled = false;
42 42
43 ConfigurationShared::CheckState use_rng_seed; 43 ConfigurationShared::CheckState use_rng_seed;
44 ConfigurationShared::CheckState use_unsafe_extended_memory_layout;
44 45
45 Core::System& system; 46 Core::System& system;
46}; 47};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 9e7bc3b93..e0caecd5e 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -478,6 +478,13 @@
478 </property> 478 </property>
479 </widget> 479 </widget>
480 </item> 480 </item>
481 <item row="7" column="0">
482 <widget class="QCheckBox" name="use_unsafe_extended_memory_layout">
483 <property name="text">
484 <string>Unsafe extended memory layout (8GB DRAM)</string>
485 </property>
486 </widget>
487 </item>
481 </layout> 488 </layout>
482 </item> 489 </item>
483 </layout> 490 </layout>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index e051e35df..d932e33a7 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -27,6 +27,7 @@
27#include "configuration/configure_input.h" 27#include "configuration/configure_input.h"
28#include "configuration/configure_per_game.h" 28#include "configuration/configure_per_game.h"
29#include "configuration/configure_tas.h" 29#include "configuration/configure_tas.h"
30#include "core/file_sys/romfs_factory.h"
30#include "core/file_sys/vfs.h" 31#include "core/file_sys/vfs.h"
31#include "core/file_sys/vfs_real.h" 32#include "core/file_sys/vfs_real.h"
32#include "core/frontend/applets/cabinet.h" 33#include "core/frontend/applets/cabinet.h"
@@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4171 } 4172 }
4172 4173
4173 Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); 4174 Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();
4175 bool all_keys_present{true};
4176
4174 if (keys.BaseDeriveNecessary()) { 4177 if (keys.BaseDeriveNecessary()) {
4175 Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; 4178 Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)};
4176 4179
@@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4195 errors += tr(" - Missing PRODINFO"); 4198 errors += tr(" - Missing PRODINFO");
4196 } 4199 }
4197 if (!errors.isEmpty()) { 4200 if (!errors.isEmpty()) {
4201 all_keys_present = false;
4198 QMessageBox::warning( 4202 QMessageBox::warning(
4199 this, tr("Derivation Components Missing"), 4203 this, tr("Derivation Components Missing"),
4200 tr("Encryption keys are missing. " 4204 tr("Encryption keys are missing. "
@@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4222 4226
4223 system->GetFileSystemController().CreateFactories(*vfs); 4227 system->GetFileSystemController().CreateFactories(*vfs);
4224 4228
4229 if (all_keys_present && !this->CheckSystemArchiveDecryption()) {
4230 LOG_WARNING(Frontend, "Mii model decryption failed");
4231 QMessageBox::warning(
4232 this, tr("System Archive Decryption Failed"),
4233 tr("Encryption keys failed to decrypt firmware. "
4234 "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu "
4235 "quickstart guide</a> to get all your keys, firmware and "
4236 "games."));
4237 }
4238
4225 if (behavior == ReinitializeKeyBehavior::Warning) { 4239 if (behavior == ReinitializeKeyBehavior::Warning) {
4226 game_list->PopulateAsync(UISettings::values.game_dirs); 4240 game_list->PopulateAsync(UISettings::values.game_dirs);
4227 } 4241 }
4228} 4242}
4229 4243
4244bool GMainWindow::CheckSystemArchiveDecryption() {
4245 constexpr u64 MiiModelId = 0x0100000000000802;
4246
4247 auto bis_system = system->GetFileSystemController().GetSystemNANDContents();
4248 if (!bis_system) {
4249 // Not having system BIS files is not an error.
4250 return true;
4251 }
4252
4253 auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data);
4254 if (!mii_nca) {
4255 // Not having the Mii model is not an error.
4256 return true;
4257 }
4258
4259 // Return whether we are able to decrypt the RomFS of the Mii model.
4260 return mii_nca->GetRomFS().get() != nullptr;
4261}
4262
4230std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, 4263std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
4231 u64 program_id) { 4264 u64 program_id) {
4232 const auto dlc_entries = 4265 const auto dlc_entries =
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ac90bd5ae..7b23f2a59 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -392,6 +392,7 @@ private:
392 void LoadTranslation(); 392 void LoadTranslation();
393 void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); 393 void OpenPerGameConfiguration(u64 title_id, const std::string& file_name);
394 bool CheckDarkMode(); 394 bool CheckDarkMode();
395 bool CheckSystemArchiveDecryption();
395 396
396 QString GetTasStateDescription() const; 397 QString GetTasStateDescription() const;
397 bool CreateShortcut(const std::string& shortcut_path, const std::string& title, 398 bool CreateShortcut(const std::string& shortcut_path, const std::string& title,
diff --git a/src/yuzu/qt_common.cpp b/src/yuzu/qt_common.cpp
new file mode 100644
index 000000000..5ac9fe310
--- /dev/null
+++ b/src/yuzu/qt_common.cpp
@@ -0,0 +1,55 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <QGuiApplication>
5#include <QStringLiteral>
6#include <QWindow>
7#include "common/logging/log.h"
8#include "core/frontend/emu_window.h"
9#include "yuzu/qt_common.h"
10
11#ifdef __linux__
12#include <qpa/qplatformnativeinterface.h>
13#endif
14
15namespace QtCommon {
16Core::Frontend::WindowSystemType GetWindowSystemType() {
17 // Determine WSI type based on Qt platform.
18 QString platform_name = QGuiApplication::platformName();
19 if (platform_name == QStringLiteral("windows"))
20 return Core::Frontend::WindowSystemType::Windows;
21 else if (platform_name == QStringLiteral("xcb"))
22 return Core::Frontend::WindowSystemType::X11;
23 else if (platform_name == QStringLiteral("wayland"))
24 return Core::Frontend::WindowSystemType::Wayland;
25 else if (platform_name == QStringLiteral("wayland-egl"))
26 return Core::Frontend::WindowSystemType::Wayland;
27 else if (platform_name == QStringLiteral("cocoa"))
28 return Core::Frontend::WindowSystemType::Cocoa;
29 else if (platform_name == QStringLiteral("android"))
30 return Core::Frontend::WindowSystemType::Android;
31
32 LOG_CRITICAL(Frontend, "Unknown Qt platform {}!", platform_name.toStdString());
33 return Core::Frontend::WindowSystemType::Windows;
34} // namespace Core::Frontend::WindowSystemType
35
36Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
37 Core::Frontend::EmuWindow::WindowSystemInfo wsi;
38 wsi.type = GetWindowSystemType();
39
40 // Our Win32 Qt external doesn't have the private API.
41#if defined(WIN32) || defined(__APPLE__)
42 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
43#else
44 QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
45 wsi.display_connection = pni->nativeResourceForWindow("display", window);
46 if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
47 wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
48 else
49 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
50#endif
51 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
52
53 return wsi;
54}
55} // namespace QtCommon
diff --git a/src/yuzu/qt_common.h b/src/yuzu/qt_common.h
new file mode 100644
index 000000000..9c63f08f3
--- /dev/null
+++ b/src/yuzu/qt_common.h
@@ -0,0 +1,15 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <QWindow>
7#include "core/frontend/emu_window.h"
8
9namespace QtCommon {
10
11Core::Frontend::WindowSystemType GetWindowSystemType();
12
13Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window);
14
15} // namespace QtCommon
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 464da3231..a6418e693 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -4,18 +4,8 @@
4#include <memory> 4#include <memory>
5#include <optional> 5#include <optional>
6#include <sstream> 6#include <sstream>
7
8// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
9#ifdef __clang__
10#pragma clang diagnostic push
11#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
12#endif
13#include <SDL.h>
14#ifdef __clang__
15#pragma clang diagnostic pop
16#endif
17
18#include <INIReader.h> 7#include <INIReader.h>
8#include <SDL.h>
19#include "common/fs/file.h" 9#include "common/fs/file.h"
20#include "common/fs/fs.h" 10#include "common/fs/fs.h"
21#include "common/fs/path_util.h" 11#include "common/fs/path_util.h"
@@ -274,7 +264,7 @@ void Config::ReadValues() {
274 264
275 // Core 265 // Core
276 ReadSetting("Core", Settings::values.use_multi_core); 266 ReadSetting("Core", Settings::values.use_multi_core);
277 ReadSetting("Core", Settings::values.use_extended_memory_layout); 267 ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout);
278 268
279 // Cpu 269 // Cpu
280 ReadSetting("Cpu", Settings::values.cpu_accuracy); 270 ReadSetting("Cpu", Settings::values.cpu_accuracy);
@@ -300,6 +290,7 @@ void Config::ReadValues() {
300 290
301 // Renderer 291 // Renderer
302 ReadSetting("Renderer", Settings::values.renderer_backend); 292 ReadSetting("Renderer", Settings::values.renderer_backend);
293 ReadSetting("Renderer", Settings::values.async_presentation);
303 ReadSetting("Renderer", Settings::values.renderer_force_max_clock); 294 ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
304 ReadSetting("Renderer", Settings::values.renderer_debug); 295 ReadSetting("Renderer", Settings::values.renderer_debug);
305 ReadSetting("Renderer", Settings::values.renderer_shader_feedback); 296 ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
@@ -319,14 +310,13 @@ void Config::ReadValues() {
319 ReadSetting("Renderer", Settings::values.use_disk_shader_cache); 310 ReadSetting("Renderer", Settings::values.use_disk_shader_cache);
320 ReadSetting("Renderer", Settings::values.gpu_accuracy); 311 ReadSetting("Renderer", Settings::values.gpu_accuracy);
321 ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); 312 ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation);
322 ReadSetting("Renderer", Settings::values.use_vsync); 313 ReadSetting("Renderer", Settings::values.vsync_mode);
323 ReadSetting("Renderer", Settings::values.shader_backend); 314 ReadSetting("Renderer", Settings::values.shader_backend);
324 ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); 315 ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
325 ReadSetting("Renderer", Settings::values.nvdec_emulation); 316 ReadSetting("Renderer", Settings::values.nvdec_emulation);
326 ReadSetting("Renderer", Settings::values.accelerate_astc); 317 ReadSetting("Renderer", Settings::values.accelerate_astc);
327 ReadSetting("Renderer", Settings::values.async_astc); 318 ReadSetting("Renderer", Settings::values.async_astc);
328 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 319 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
329 ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
330 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); 320 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
331 321
332 ReadSetting("Renderer", Settings::values.bg_red); 322 ReadSetting("Renderer", Settings::values.bg_red);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 209cfc28a..086ed4cfa 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -163,9 +163,9 @@ keyboard_enabled =
163# 0: Disabled, 1 (default): Enabled 163# 0: Disabled, 1 (default): Enabled
164use_multi_core = 164use_multi_core =
165 165
166# Enable extended guest system memory layout (8GB DRAM) 166# Enable unsafe extended guest system memory layout (8GB DRAM)
167# 0 (default): Disabled, 1: Enabled 167# 0 (default): Disabled, 1: Enabled
168use_extended_memory_layout = 168use_unsafe_extended_memory_layout =
169 169
170[Cpu] 170[Cpu]
171# Adjusts various optimizations. 171# Adjusts various optimizations.
@@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor =
264# 0: OpenGL, 1 (default): Vulkan 264# 0: OpenGL, 1 (default): Vulkan
265backend = 265backend =
266 266
267# Whether to enable asynchronous presentation (Vulkan only)
268# 0 (default): Off, 1: On
269async_presentation =
270
267# Enable graphics API debugging mode. 271# Enable graphics API debugging mode.
268# 0 (default): Disabled, 1: Enabled 272# 0 (default): Disabled, 1: Enabled
269debug = 273debug =
@@ -321,8 +325,14 @@ aspect_ratio =
321# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x 325# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
322max_anisotropy = 326max_anisotropy =
323 327
324# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 328# Whether to enable VSync or not.
325# 0 (default): Off, 1: On 329# OpenGL: Values other than 0 enable VSync
330# Vulkan: FIFO is selected if the requested mode is not supported by the driver.
331# FIFO (VSync) does not drop frames or exhibit tearing but is limited by the screen refresh rate.
332# FIFO Relaxed is similar to FIFO but allows tearing as it recovers from a slow down.
333# Mailbox can have lower latency than FIFO and does not tear but may drop frames.
334# Immediate (no synchronization) just presents whatever is available and can exhibit tearing.
335# 0: Immediate (Off), 1: Mailbox, 2 (Default): FIFO (On), 3: FIFO Relaxed
326use_vsync = 336use_vsync =
327 337
328# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is 338# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is
@@ -370,10 +380,6 @@ use_asynchronous_gpu_emulation =
370# 0: Off, 1 (default): On 380# 0: Off, 1 (default): On
371use_fast_gpu_time = 381use_fast_gpu_time =
372 382
373# Force unmodified buffers to be flushed, which can cost performance.
374# 0: Off (default), 1: On
375use_pessimistic_flushes =
376
377# Whether to use garbage collection or not for GPU caches. 383# Whether to use garbage collection or not for GPU caches.
378# 0 (default): Off, 1: On 384# 0 (default): Off, 1: On
379use_caches_gc = 385use_caches_gc =