summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/bit_util.h7
-rw-r--r--src/common/host_memory.cpp4
-rw-r--r--src/common/intrusive_red_black_tree.h391
-rw-r--r--src/common/logging/backend.cpp18
-rw-r--r--src/common/settings.cpp1
-rw-r--r--src/common/settings.h3
-rw-r--r--src/common/telemetry.cpp60
-rw-r--r--src/common/telemetry.h7
-rw-r--r--src/common/tree.h625
-rw-r--r--src/common/x64/cpu_detect.cpp124
-rw-r--r--src/common/x64/cpu_detect.h79
-rw-r--r--src/core/CMakeLists.txt5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp16
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp17
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.h2
-rw-r--r--src/core/arm/exclusive_monitor.h2
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/frontend/emu_window.h11
-rw-r--r--src/core/hle/ipc_helpers.h2
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h13
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp12
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.h1
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp5
-rw-r--r--src/core/hle/kernel/hle_ipc.h9
-rw-r--r--src/core/hle/kernel/init/init_slab_setup.cpp101
-rw-r--r--src/core/hle/kernel/init/init_slab_setup.h5
-rw-r--r--src/core/hle/kernel/initial_process.h23
-rw-r--r--src/core/hle/kernel/k_address_arbiter.cpp10
-rw-r--r--src/core/hle/kernel/k_condition_variable.cpp2
-rw-r--r--src/core/hle/kernel/k_memory_layout.h10
-rw-r--r--src/core/hle/kernel/k_memory_manager.cpp469
-rw-r--r--src/core/hle/kernel/k_memory_manager.h167
-rw-r--r--src/core/hle/kernel/k_memory_region_type.h10
-rw-r--r--src/core/hle/kernel/k_page_buffer.h34
-rw-r--r--src/core/hle/kernel/k_page_heap.cpp126
-rw-r--r--src/core/hle/kernel/k_page_heap.h221
-rw-r--r--src/core/hle/kernel/k_page_table.cpp335
-rw-r--r--src/core/hle/kernel/k_page_table.h38
-rw-r--r--src/core/hle/kernel/k_port.cpp7
-rw-r--r--src/core/hle/kernel/k_process.cpp194
-rw-r--r--src/core/hle/kernel/k_process.h18
-rw-r--r--src/core/hle/kernel/k_server_port.h8
-rw-r--r--src/core/hle/kernel/k_server_session.cpp15
-rw-r--r--src/core/hle/kernel/k_slab_heap.h230
-rw-r--r--src/core/hle/kernel/k_thread.cpp7
-rw-r--r--src/core/hle/kernel/k_thread.h6
-rw-r--r--src/core/hle/kernel/k_thread_local_page.cpp65
-rw-r--r--src/core/hle/kernel/k_thread_local_page.h112
-rw-r--r--src/core/hle/kernel/kernel.cpp191
-rw-r--r--src/core/hle/kernel/kernel.h26
-rw-r--r--src/core/hle/kernel/service_thread.cpp5
-rw-r--r--src/core/hle/kernel/slab_helpers.h2
-rw-r--r--src/core/hle/kernel/svc_types.h2
-rw-r--r--src/core/hle/service/am/am.cpp4
-rw-r--r--src/core/hle/service/kernel_helpers.cpp11
-rw-r--r--src/core/hle/service/ldr/ldr.cpp78
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp580
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp4
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp3
-rw-r--r--src/shader_recompiler/ir_opt/rescaling_pass.cpp29
-rw-r--r--src/video_core/engines/maxwell_3d.cpp109
-rw-r--r--src/video_core/engines/maxwell_3d.h50
-rw-r--r--src/video_core/engines/maxwell_dma.cpp20
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp3
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_cpu.cpp9
-rw-r--r--src/yuzu/configuration/configure_cpu.h1
-rw-r--r--src/yuzu/configuration/configure_cpu.ui12
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.cpp8
-rw-r--r--src/yuzu/configuration/configure_cpu_debug.ui29
-rw-r--r--src/yuzu/main.cpp4
-rw-r--r--src/yuzu_cmd/config.cpp3
-rw-r--r--src/yuzu_cmd/default_ini.h19
83 files changed, 3415 insertions, 1586 deletions
diff --git a/src/common/bit_util.h b/src/common/bit_util.h
index f50d3308a..f37538e06 100644
--- a/src/common/bit_util.h
+++ b/src/common/bit_util.h
@@ -57,4 +57,11 @@ requires std::is_integral_v<T>
57 return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); 57 return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U)));
58} 58}
59 59
60template <size_t bit_index, typename T>
61requires std::is_integral_v<T>
62[[nodiscard]] constexpr bool Bit(const T value) {
63 static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T");
64 return ((value >> bit_index) & T(1)) == T(1);
65}
66
60} // namespace Common 67} // namespace Common
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 28949fe5e..c465cfc14 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -327,8 +327,8 @@ private:
327 bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const { 327 bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
328 const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length}); 328 const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
329 if (it != placeholders.end() && it->lower() == virtual_offset + length) { 329 if (it != placeholders.end() && it->lower() == virtual_offset + length) {
330 const bool is_root = it == placeholders.begin() && virtual_offset == 0; 330 return it == placeholders.begin() ? virtual_offset == 0
331 return is_root || std::prev(it)->upper() == virtual_offset; 331 : std::prev(it)->upper() == virtual_offset;
332 } 332 }
333 return false; 333 return false;
334 } 334 }
diff --git a/src/common/intrusive_red_black_tree.h b/src/common/intrusive_red_black_tree.h
index 3173cc449..b296b639e 100644
--- a/src/common/intrusive_red_black_tree.h
+++ b/src/common/intrusive_red_black_tree.h
@@ -4,6 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/alignment.h"
8#include "common/common_funcs.h"
7#include "common/parent_of_member.h" 9#include "common/parent_of_member.h"
8#include "common/tree.h" 10#include "common/tree.h"
9 11
@@ -15,32 +17,33 @@ class IntrusiveRedBlackTreeImpl;
15 17
16} 18}
17 19
20#pragma pack(push, 4)
18struct IntrusiveRedBlackTreeNode { 21struct IntrusiveRedBlackTreeNode {
22 YUZU_NON_COPYABLE(IntrusiveRedBlackTreeNode);
23
19public: 24public:
20 using EntryType = RBEntry<IntrusiveRedBlackTreeNode>; 25 using RBEntry = freebsd::RBEntry<IntrusiveRedBlackTreeNode>;
21 26
22 constexpr IntrusiveRedBlackTreeNode() = default; 27private:
28 RBEntry m_entry;
23 29
24 void SetEntry(const EntryType& new_entry) { 30public:
25 entry = new_entry; 31 explicit IntrusiveRedBlackTreeNode() = default;
26 }
27 32
28 [[nodiscard]] EntryType& GetEntry() { 33 [[nodiscard]] constexpr RBEntry& GetRBEntry() {
29 return entry; 34 return m_entry;
30 } 35 }
31 36 [[nodiscard]] constexpr const RBEntry& GetRBEntry() const {
32 [[nodiscard]] const EntryType& GetEntry() const { 37 return m_entry;
33 return entry;
34 } 38 }
35 39
36private: 40 constexpr void SetRBEntry(const RBEntry& entry) {
37 EntryType entry{}; 41 m_entry = entry;
38 42 }
39 friend class impl::IntrusiveRedBlackTreeImpl;
40
41 template <class, class, class>
42 friend class IntrusiveRedBlackTree;
43}; 43};
44static_assert(sizeof(IntrusiveRedBlackTreeNode) ==
45 3 * sizeof(void*) + std::max<size_t>(sizeof(freebsd::RBColor), 4));
46#pragma pack(pop)
44 47
45template <class T, class Traits, class Comparator> 48template <class T, class Traits, class Comparator>
46class IntrusiveRedBlackTree; 49class IntrusiveRedBlackTree;
@@ -48,12 +51,17 @@ class IntrusiveRedBlackTree;
48namespace impl { 51namespace impl {
49 52
50class IntrusiveRedBlackTreeImpl { 53class IntrusiveRedBlackTreeImpl {
54 YUZU_NON_COPYABLE(IntrusiveRedBlackTreeImpl);
55
51private: 56private:
52 template <class, class, class> 57 template <class, class, class>
53 friend class ::Common::IntrusiveRedBlackTree; 58 friend class ::Common::IntrusiveRedBlackTree;
54 59
55 using RootType = RBHead<IntrusiveRedBlackTreeNode>; 60private:
56 RootType root; 61 using RootType = freebsd::RBHead<IntrusiveRedBlackTreeNode>;
62
63private:
64 RootType m_root;
57 65
58public: 66public:
59 template <bool Const> 67 template <bool Const>
@@ -81,149 +89,150 @@ public:
81 IntrusiveRedBlackTreeImpl::reference>; 89 IntrusiveRedBlackTreeImpl::reference>;
82 90
83 private: 91 private:
84 pointer node; 92 pointer m_node;
85 93
86 public: 94 public:
87 explicit Iterator(pointer n) : node(n) {} 95 constexpr explicit Iterator(pointer n) : m_node(n) {}
88 96
89 bool operator==(const Iterator& rhs) const { 97 constexpr bool operator==(const Iterator& rhs) const {
90 return this->node == rhs.node; 98 return m_node == rhs.m_node;
91 } 99 }
92 100
93 bool operator!=(const Iterator& rhs) const { 101 constexpr bool operator!=(const Iterator& rhs) const {
94 return !(*this == rhs); 102 return !(*this == rhs);
95 } 103 }
96 104
97 pointer operator->() const { 105 constexpr pointer operator->() const {
98 return this->node; 106 return m_node;
99 } 107 }
100 108
101 reference operator*() const { 109 constexpr reference operator*() const {
102 return *this->node; 110 return *m_node;
103 } 111 }
104 112
105 Iterator& operator++() { 113 constexpr Iterator& operator++() {
106 this->node = GetNext(this->node); 114 m_node = GetNext(m_node);
107 return *this; 115 return *this;
108 } 116 }
109 117
110 Iterator& operator--() { 118 constexpr Iterator& operator--() {
111 this->node = GetPrev(this->node); 119 m_node = GetPrev(m_node);
112 return *this; 120 return *this;
113 } 121 }
114 122
115 Iterator operator++(int) { 123 constexpr Iterator operator++(int) {
116 const Iterator it{*this}; 124 const Iterator it{*this};
117 ++(*this); 125 ++(*this);
118 return it; 126 return it;
119 } 127 }
120 128
121 Iterator operator--(int) { 129 constexpr Iterator operator--(int) {
122 const Iterator it{*this}; 130 const Iterator it{*this};
123 --(*this); 131 --(*this);
124 return it; 132 return it;
125 } 133 }
126 134
127 operator Iterator<true>() const { 135 constexpr operator Iterator<true>() const {
128 return Iterator<true>(this->node); 136 return Iterator<true>(m_node);
129 } 137 }
130 }; 138 };
131 139
132private: 140private:
133 // Define accessors using RB_* functions. 141 constexpr bool EmptyImpl() const {
134 bool EmptyImpl() const { 142 return m_root.IsEmpty();
135 return root.IsEmpty();
136 } 143 }
137 144
138 IntrusiveRedBlackTreeNode* GetMinImpl() const { 145 constexpr IntrusiveRedBlackTreeNode* GetMinImpl() const {
139 return RB_MIN(const_cast<RootType*>(&root)); 146 return freebsd::RB_MIN(const_cast<RootType&>(m_root));
140 } 147 }
141 148
142 IntrusiveRedBlackTreeNode* GetMaxImpl() const { 149 constexpr IntrusiveRedBlackTreeNode* GetMaxImpl() const {
143 return RB_MAX(const_cast<RootType*>(&root)); 150 return freebsd::RB_MAX(const_cast<RootType&>(m_root));
144 } 151 }
145 152
146 IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) { 153 constexpr IntrusiveRedBlackTreeNode* RemoveImpl(IntrusiveRedBlackTreeNode* node) {
147 return RB_REMOVE(&root, node); 154 return freebsd::RB_REMOVE(m_root, node);
148 } 155 }
149 156
150public: 157public:
151 static IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) { 158 static constexpr IntrusiveRedBlackTreeNode* GetNext(IntrusiveRedBlackTreeNode* node) {
152 return RB_NEXT(node); 159 return freebsd::RB_NEXT(node);
153 } 160 }
154 161
155 static IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) { 162 static constexpr IntrusiveRedBlackTreeNode* GetPrev(IntrusiveRedBlackTreeNode* node) {
156 return RB_PREV(node); 163 return freebsd::RB_PREV(node);
157 } 164 }
158 165
159 static const IntrusiveRedBlackTreeNode* GetNext(const IntrusiveRedBlackTreeNode* node) { 166 static constexpr IntrusiveRedBlackTreeNode const* GetNext(
167 IntrusiveRedBlackTreeNode const* node) {
160 return static_cast<const IntrusiveRedBlackTreeNode*>( 168 return static_cast<const IntrusiveRedBlackTreeNode*>(
161 GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node))); 169 GetNext(const_cast<IntrusiveRedBlackTreeNode*>(node)));
162 } 170 }
163 171
164 static const IntrusiveRedBlackTreeNode* GetPrev(const IntrusiveRedBlackTreeNode* node) { 172 static constexpr IntrusiveRedBlackTreeNode const* GetPrev(
173 IntrusiveRedBlackTreeNode const* node) {
165 return static_cast<const IntrusiveRedBlackTreeNode*>( 174 return static_cast<const IntrusiveRedBlackTreeNode*>(
166 GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node))); 175 GetPrev(const_cast<IntrusiveRedBlackTreeNode*>(node)));
167 } 176 }
168 177
169public: 178public:
170 constexpr IntrusiveRedBlackTreeImpl() {} 179 constexpr IntrusiveRedBlackTreeImpl() = default;
171 180
172 // Iterator accessors. 181 // Iterator accessors.
173 iterator begin() { 182 constexpr iterator begin() {
174 return iterator(this->GetMinImpl()); 183 return iterator(this->GetMinImpl());
175 } 184 }
176 185
177 const_iterator begin() const { 186 constexpr const_iterator begin() const {
178 return const_iterator(this->GetMinImpl()); 187 return const_iterator(this->GetMinImpl());
179 } 188 }
180 189
181 iterator end() { 190 constexpr iterator end() {
182 return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr)); 191 return iterator(static_cast<IntrusiveRedBlackTreeNode*>(nullptr));
183 } 192 }
184 193
185 const_iterator end() const { 194 constexpr const_iterator end() const {
186 return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr)); 195 return const_iterator(static_cast<const IntrusiveRedBlackTreeNode*>(nullptr));
187 } 196 }
188 197
189 const_iterator cbegin() const { 198 constexpr const_iterator cbegin() const {
190 return this->begin(); 199 return this->begin();
191 } 200 }
192 201
193 const_iterator cend() const { 202 constexpr const_iterator cend() const {
194 return this->end(); 203 return this->end();
195 } 204 }
196 205
197 iterator iterator_to(reference ref) { 206 constexpr iterator iterator_to(reference ref) {
198 return iterator(&ref); 207 return iterator(std::addressof(ref));
199 } 208 }
200 209
201 const_iterator iterator_to(const_reference ref) const { 210 constexpr const_iterator iterator_to(const_reference ref) const {
202 return const_iterator(&ref); 211 return const_iterator(std::addressof(ref));
203 } 212 }
204 213
205 // Content management. 214 // Content management.
206 bool empty() const { 215 constexpr bool empty() const {
207 return this->EmptyImpl(); 216 return this->EmptyImpl();
208 } 217 }
209 218
210 reference back() { 219 constexpr reference back() {
211 return *this->GetMaxImpl(); 220 return *this->GetMaxImpl();
212 } 221 }
213 222
214 const_reference back() const { 223 constexpr const_reference back() const {
215 return *this->GetMaxImpl(); 224 return *this->GetMaxImpl();
216 } 225 }
217 226
218 reference front() { 227 constexpr reference front() {
219 return *this->GetMinImpl(); 228 return *this->GetMinImpl();
220 } 229 }
221 230
222 const_reference front() const { 231 constexpr const_reference front() const {
223 return *this->GetMinImpl(); 232 return *this->GetMinImpl();
224 } 233 }
225 234
226 iterator erase(iterator it) { 235 constexpr iterator erase(iterator it) {
227 auto cur = std::addressof(*it); 236 auto cur = std::addressof(*it);
228 auto next = GetNext(cur); 237 auto next = GetNext(cur);
229 this->RemoveImpl(cur); 238 this->RemoveImpl(cur);
@@ -234,16 +243,16 @@ public:
234} // namespace impl 243} // namespace impl
235 244
236template <typename T> 245template <typename T>
237concept HasLightCompareType = requires { 246concept HasRedBlackKeyType = requires {
238 { std::is_same<typename T::LightCompareType, void>::value } -> std::convertible_to<bool>; 247 { std::is_same<typename T::RedBlackKeyType, void>::value } -> std::convertible_to<bool>;
239}; 248};
240 249
241namespace impl { 250namespace impl {
242 251
243 template <typename T, typename Default> 252 template <typename T, typename Default>
244 consteval auto* GetLightCompareType() { 253 consteval auto* GetRedBlackKeyType() {
245 if constexpr (HasLightCompareType<T>) { 254 if constexpr (HasRedBlackKeyType<T>) {
246 return static_cast<typename T::LightCompareType*>(nullptr); 255 return static_cast<typename T::RedBlackKeyType*>(nullptr);
247 } else { 256 } else {
248 return static_cast<Default*>(nullptr); 257 return static_cast<Default*>(nullptr);
249 } 258 }
@@ -252,16 +261,17 @@ namespace impl {
252} // namespace impl 261} // namespace impl
253 262
254template <typename T, typename Default> 263template <typename T, typename Default>
255using LightCompareType = std::remove_pointer_t<decltype(impl::GetLightCompareType<T, Default>())>; 264using RedBlackKeyType = std::remove_pointer_t<decltype(impl::GetRedBlackKeyType<T, Default>())>;
256 265
257template <class T, class Traits, class Comparator> 266template <class T, class Traits, class Comparator>
258class IntrusiveRedBlackTree { 267class IntrusiveRedBlackTree {
268 YUZU_NON_COPYABLE(IntrusiveRedBlackTree);
259 269
260public: 270public:
261 using ImplType = impl::IntrusiveRedBlackTreeImpl; 271 using ImplType = impl::IntrusiveRedBlackTreeImpl;
262 272
263private: 273private:
264 ImplType impl{}; 274 ImplType m_impl;
265 275
266public: 276public:
267 template <bool Const> 277 template <bool Const>
@@ -277,9 +287,9 @@ public:
277 using iterator = Iterator<false>; 287 using iterator = Iterator<false>;
278 using const_iterator = Iterator<true>; 288 using const_iterator = Iterator<true>;
279 289
280 using light_value_type = LightCompareType<Comparator, value_type>; 290 using key_type = RedBlackKeyType<Comparator, value_type>;
281 using const_light_pointer = const light_value_type*; 291 using const_key_pointer = const key_type*;
282 using const_light_reference = const light_value_type&; 292 using const_key_reference = const key_type&;
283 293
284 template <bool Const> 294 template <bool Const>
285 class Iterator { 295 class Iterator {
@@ -298,183 +308,201 @@ public:
298 IntrusiveRedBlackTree::reference>; 308 IntrusiveRedBlackTree::reference>;
299 309
300 private: 310 private:
301 ImplIterator iterator; 311 ImplIterator m_impl;
302 312
303 private: 313 private:
304 explicit Iterator(ImplIterator it) : iterator(it) {} 314 constexpr explicit Iterator(ImplIterator it) : m_impl(it) {}
305 315
306 explicit Iterator(typename std::conditional<Const, ImplType::const_iterator, 316 constexpr explicit Iterator(typename ImplIterator::pointer p) : m_impl(p) {}
307 ImplType::iterator>::type::pointer ptr)
308 : iterator(ptr) {}
309 317
310 ImplIterator GetImplIterator() const { 318 constexpr ImplIterator GetImplIterator() const {
311 return this->iterator; 319 return m_impl;
312 } 320 }
313 321
314 public: 322 public:
315 bool operator==(const Iterator& rhs) const { 323 constexpr bool operator==(const Iterator& rhs) const {
316 return this->iterator == rhs.iterator; 324 return m_impl == rhs.m_impl;
317 } 325 }
318 326
319 bool operator!=(const Iterator& rhs) const { 327 constexpr bool operator!=(const Iterator& rhs) const {
320 return !(*this == rhs); 328 return !(*this == rhs);
321 } 329 }
322 330
323 pointer operator->() const { 331 constexpr pointer operator->() const {
324 return Traits::GetParent(std::addressof(*this->iterator)); 332 return Traits::GetParent(std::addressof(*m_impl));
325 } 333 }
326 334
327 reference operator*() const { 335 constexpr reference operator*() const {
328 return *Traits::GetParent(std::addressof(*this->iterator)); 336 return *Traits::GetParent(std::addressof(*m_impl));
329 } 337 }
330 338
331 Iterator& operator++() { 339 constexpr Iterator& operator++() {
332 ++this->iterator; 340 ++m_impl;
333 return *this; 341 return *this;
334 } 342 }
335 343
336 Iterator& operator--() { 344 constexpr Iterator& operator--() {
337 --this->iterator; 345 --m_impl;
338 return *this; 346 return *this;
339 } 347 }
340 348
341 Iterator operator++(int) { 349 constexpr Iterator operator++(int) {
342 const Iterator it{*this}; 350 const Iterator it{*this};
343 ++this->iterator; 351 ++m_impl;
344 return it; 352 return it;
345 } 353 }
346 354
347 Iterator operator--(int) { 355 constexpr Iterator operator--(int) {
348 const Iterator it{*this}; 356 const Iterator it{*this};
349 --this->iterator; 357 --m_impl;
350 return it; 358 return it;
351 } 359 }
352 360
353 operator Iterator<true>() const { 361 constexpr operator Iterator<true>() const {
354 return Iterator<true>(this->iterator); 362 return Iterator<true>(m_impl);
355 } 363 }
356 }; 364 };
357 365
358private: 366private:
359 static int CompareImpl(const IntrusiveRedBlackTreeNode* lhs, 367 static constexpr int CompareImpl(const IntrusiveRedBlackTreeNode* lhs,
360 const IntrusiveRedBlackTreeNode* rhs) { 368 const IntrusiveRedBlackTreeNode* rhs) {
361 return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs)); 369 return Comparator::Compare(*Traits::GetParent(lhs), *Traits::GetParent(rhs));
362 } 370 }
363 371
364 static int LightCompareImpl(const void* elm, const IntrusiveRedBlackTreeNode* rhs) { 372 static constexpr int CompareKeyImpl(const_key_reference key,
365 return Comparator::Compare(*static_cast<const_light_pointer>(elm), *Traits::GetParent(rhs)); 373 const IntrusiveRedBlackTreeNode* rhs) {
374 return Comparator::Compare(key, *Traits::GetParent(rhs));
366 } 375 }
367 376
368 // Define accessors using RB_* functions. 377 // Define accessors using RB_* functions.
369 IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) { 378 constexpr IntrusiveRedBlackTreeNode* InsertImpl(IntrusiveRedBlackTreeNode* node) {
370 return RB_INSERT(&impl.root, node, CompareImpl); 379 return freebsd::RB_INSERT(m_impl.m_root, node, CompareImpl);
371 } 380 }
372 381
373 IntrusiveRedBlackTreeNode* FindImpl(const IntrusiveRedBlackTreeNode* node) const { 382 constexpr IntrusiveRedBlackTreeNode* FindImpl(IntrusiveRedBlackTreeNode const* node) const {
374 return RB_FIND(const_cast<ImplType::RootType*>(&impl.root), 383 return freebsd::RB_FIND(const_cast<ImplType::RootType&>(m_impl.m_root),
375 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); 384 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
376 } 385 }
377 386
378 IntrusiveRedBlackTreeNode* NFindImpl(const IntrusiveRedBlackTreeNode* node) const { 387 constexpr IntrusiveRedBlackTreeNode* NFindImpl(IntrusiveRedBlackTreeNode const* node) const {
379 return RB_NFIND(const_cast<ImplType::RootType*>(&impl.root), 388 return freebsd::RB_NFIND(const_cast<ImplType::RootType&>(m_impl.m_root),
380 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl); 389 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
381 } 390 }
382 391
383 IntrusiveRedBlackTreeNode* FindLightImpl(const_light_pointer lelm) const { 392 constexpr IntrusiveRedBlackTreeNode* FindKeyImpl(const_key_reference key) const {
384 return RB_FIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root), 393 return freebsd::RB_FIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
385 static_cast<const void*>(lelm), LightCompareImpl); 394 CompareKeyImpl);
386 } 395 }
387 396
388 IntrusiveRedBlackTreeNode* NFindLightImpl(const_light_pointer lelm) const { 397 constexpr IntrusiveRedBlackTreeNode* NFindKeyImpl(const_key_reference key) const {
389 return RB_NFIND_LIGHT(const_cast<ImplType::RootType*>(&impl.root), 398 return freebsd::RB_NFIND_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
390 static_cast<const void*>(lelm), LightCompareImpl); 399 CompareKeyImpl);
400 }
401
402 constexpr IntrusiveRedBlackTreeNode* FindExistingImpl(
403 IntrusiveRedBlackTreeNode const* node) const {
404 return freebsd::RB_FIND_EXISTING(const_cast<ImplType::RootType&>(m_impl.m_root),
405 const_cast<IntrusiveRedBlackTreeNode*>(node), CompareImpl);
406 }
407
408 constexpr IntrusiveRedBlackTreeNode* FindExistingKeyImpl(const_key_reference key) const {
409 return freebsd::RB_FIND_EXISTING_KEY(const_cast<ImplType::RootType&>(m_impl.m_root), key,
410 CompareKeyImpl);
391 } 411 }
392 412
393public: 413public:
394 constexpr IntrusiveRedBlackTree() = default; 414 constexpr IntrusiveRedBlackTree() = default;
395 415
396 // Iterator accessors. 416 // Iterator accessors.
397 iterator begin() { 417 constexpr iterator begin() {
398 return iterator(this->impl.begin()); 418 return iterator(m_impl.begin());
399 } 419 }
400 420
401 const_iterator begin() const { 421 constexpr const_iterator begin() const {
402 return const_iterator(this->impl.begin()); 422 return const_iterator(m_impl.begin());
403 } 423 }
404 424
405 iterator end() { 425 constexpr iterator end() {
406 return iterator(this->impl.end()); 426 return iterator(m_impl.end());
407 } 427 }
408 428
409 const_iterator end() const { 429 constexpr const_iterator end() const {
410 return const_iterator(this->impl.end()); 430 return const_iterator(m_impl.end());
411 } 431 }
412 432
413 const_iterator cbegin() const { 433 constexpr const_iterator cbegin() const {
414 return this->begin(); 434 return this->begin();
415 } 435 }
416 436
417 const_iterator cend() const { 437 constexpr const_iterator cend() const {
418 return this->end(); 438 return this->end();
419 } 439 }
420 440
421 iterator iterator_to(reference ref) { 441 constexpr iterator iterator_to(reference ref) {
422 return iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); 442 return iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
423 } 443 }
424 444
425 const_iterator iterator_to(const_reference ref) const { 445 constexpr const_iterator iterator_to(const_reference ref) const {
426 return const_iterator(this->impl.iterator_to(*Traits::GetNode(std::addressof(ref)))); 446 return const_iterator(m_impl.iterator_to(*Traits::GetNode(std::addressof(ref))));
427 } 447 }
428 448
429 // Content management. 449 // Content management.
430 bool empty() const { 450 constexpr bool empty() const {
431 return this->impl.empty(); 451 return m_impl.empty();
432 } 452 }
433 453
434 reference back() { 454 constexpr reference back() {
435 return *Traits::GetParent(std::addressof(this->impl.back())); 455 return *Traits::GetParent(std::addressof(m_impl.back()));
436 } 456 }
437 457
438 const_reference back() const { 458 constexpr const_reference back() const {
439 return *Traits::GetParent(std::addressof(this->impl.back())); 459 return *Traits::GetParent(std::addressof(m_impl.back()));
440 } 460 }
441 461
442 reference front() { 462 constexpr reference front() {
443 return *Traits::GetParent(std::addressof(this->impl.front())); 463 return *Traits::GetParent(std::addressof(m_impl.front()));
444 } 464 }
445 465
446 const_reference front() const { 466 constexpr const_reference front() const {
447 return *Traits::GetParent(std::addressof(this->impl.front())); 467 return *Traits::GetParent(std::addressof(m_impl.front()));
448 } 468 }
449 469
450 iterator erase(iterator it) { 470 constexpr iterator erase(iterator it) {
451 return iterator(this->impl.erase(it.GetImplIterator())); 471 return iterator(m_impl.erase(it.GetImplIterator()));
452 } 472 }
453 473
454 iterator insert(reference ref) { 474 constexpr iterator insert(reference ref) {
455 ImplType::pointer node = Traits::GetNode(std::addressof(ref)); 475 ImplType::pointer node = Traits::GetNode(std::addressof(ref));
456 this->InsertImpl(node); 476 this->InsertImpl(node);
457 return iterator(node); 477 return iterator(node);
458 } 478 }
459 479
460 iterator find(const_reference ref) const { 480 constexpr iterator find(const_reference ref) const {
461 return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref)))); 481 return iterator(this->FindImpl(Traits::GetNode(std::addressof(ref))));
462 } 482 }
463 483
464 iterator nfind(const_reference ref) const { 484 constexpr iterator nfind(const_reference ref) const {
465 return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref)))); 485 return iterator(this->NFindImpl(Traits::GetNode(std::addressof(ref))));
466 } 486 }
467 487
468 iterator find_light(const_light_reference ref) const { 488 constexpr iterator find_key(const_key_reference ref) const {
469 return iterator(this->FindLightImpl(std::addressof(ref))); 489 return iterator(this->FindKeyImpl(ref));
490 }
491
492 constexpr iterator nfind_key(const_key_reference ref) const {
493 return iterator(this->NFindKeyImpl(ref));
494 }
495
496 constexpr iterator find_existing(const_reference ref) const {
497 return iterator(this->FindExistingImpl(Traits::GetNode(std::addressof(ref))));
470 } 498 }
471 499
472 iterator nfind_light(const_light_reference ref) const { 500 constexpr iterator find_existing_key(const_key_reference ref) const {
473 return iterator(this->NFindLightImpl(std::addressof(ref))); 501 return iterator(this->FindExistingKeyImpl(ref));
474 } 502 }
475}; 503};
476 504
477template <auto T, class Derived = impl::GetParentType<T>> 505template <auto T, class Derived = Common::impl::GetParentType<T>>
478class IntrusiveRedBlackTreeMemberTraits; 506class IntrusiveRedBlackTreeMemberTraits;
479 507
480template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> 508template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -498,19 +526,16 @@ private:
498 return std::addressof(parent->*Member); 526 return std::addressof(parent->*Member);
499 } 527 }
500 528
501 static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { 529 static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
502 return GetParentPointer<Member, Derived>(node); 530 return Common::GetParentPointer<Member, Derived>(node);
503 } 531 }
504 532
505 static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { 533 static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
506 return GetParentPointer<Member, Derived>(node); 534 return Common::GetParentPointer<Member, Derived>(node);
507 } 535 }
508
509private:
510 static constexpr TypedStorage<Derived> DerivedStorage = {};
511}; 536};
512 537
513template <auto T, class Derived = impl::GetParentType<T>> 538template <auto T, class Derived = Common::impl::GetParentType<T>>
514class IntrusiveRedBlackTreeMemberTraitsDeferredAssert; 539class IntrusiveRedBlackTreeMemberTraitsDeferredAssert;
515 540
516template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived> 541template <class Parent, IntrusiveRedBlackTreeNode Parent::*Member, class Derived>
@@ -521,11 +546,6 @@ public:
521 IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>; 546 IntrusiveRedBlackTree<Derived, IntrusiveRedBlackTreeMemberTraitsDeferredAssert, Comparator>;
522 using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl; 547 using TreeTypeImpl = impl::IntrusiveRedBlackTreeImpl;
523 548
524 static constexpr bool IsValid() {
525 TypedStorage<Derived> DerivedStorage = {};
526 return GetParent(GetNode(GetPointer(DerivedStorage))) == GetPointer(DerivedStorage);
527 }
528
529private: 549private:
530 template <class, class, class> 550 template <class, class, class>
531 friend class IntrusiveRedBlackTree; 551 friend class IntrusiveRedBlackTree;
@@ -540,30 +560,36 @@ private:
540 return std::addressof(parent->*Member); 560 return std::addressof(parent->*Member);
541 } 561 }
542 562
543 static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { 563 static Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
544 return GetParentPointer<Member, Derived>(node); 564 return Common::GetParentPointer<Member, Derived>(node);
545 } 565 }
546 566
547 static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { 567 static Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
548 return GetParentPointer<Member, Derived>(node); 568 return Common::GetParentPointer<Member, Derived>(node);
549 } 569 }
550}; 570};
551 571
552template <class Derived> 572template <class Derived>
553class IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode { 573class alignas(void*) IntrusiveRedBlackTreeBaseNode : public IntrusiveRedBlackTreeNode {
554public: 574public:
575 using IntrusiveRedBlackTreeNode::IntrusiveRedBlackTreeNode;
576
555 constexpr Derived* GetPrev() { 577 constexpr Derived* GetPrev() {
556 return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); 578 return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
579 impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
557 } 580 }
558 constexpr const Derived* GetPrev() const { 581 constexpr const Derived* GetPrev() const {
559 return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetPrev(this)); 582 return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
583 impl::IntrusiveRedBlackTreeImpl::GetPrev(this)));
560 } 584 }
561 585
562 constexpr Derived* GetNext() { 586 constexpr Derived* GetNext() {
563 return static_cast<Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); 587 return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode*>(
588 impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
564 } 589 }
565 constexpr const Derived* GetNext() const { 590 constexpr const Derived* GetNext() const {
566 return static_cast<const Derived*>(impl::IntrusiveRedBlackTreeImpl::GetNext(this)); 591 return static_cast<const Derived*>(static_cast<const IntrusiveRedBlackTreeBaseNode*>(
592 impl::IntrusiveRedBlackTreeImpl::GetNext(this)));
567 } 593 }
568}; 594};
569 595
@@ -581,19 +607,22 @@ private:
581 friend class impl::IntrusiveRedBlackTreeImpl; 607 friend class impl::IntrusiveRedBlackTreeImpl;
582 608
583 static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) { 609 static constexpr IntrusiveRedBlackTreeNode* GetNode(Derived* parent) {
584 return static_cast<IntrusiveRedBlackTreeNode*>(parent); 610 return static_cast<IntrusiveRedBlackTreeNode*>(
611 static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
585 } 612 }
586 613
587 static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) { 614 static constexpr IntrusiveRedBlackTreeNode const* GetNode(Derived const* parent) {
588 return static_cast<const IntrusiveRedBlackTreeNode*>(parent); 615 return static_cast<const IntrusiveRedBlackTreeNode*>(
616 static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(parent));
589 } 617 }
590 618
591 static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) { 619 static constexpr Derived* GetParent(IntrusiveRedBlackTreeNode* node) {
592 return static_cast<Derived*>(node); 620 return static_cast<Derived*>(static_cast<IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
593 } 621 }
594 622
595 static constexpr Derived const* GetParent(const IntrusiveRedBlackTreeNode* node) { 623 static constexpr Derived const* GetParent(IntrusiveRedBlackTreeNode const* node) {
596 return static_cast<const Derived*>(node); 624 return static_cast<const Derived*>(
625 static_cast<const IntrusiveRedBlackTreeBaseNode<Derived>*>(node));
597 } 626 }
598}; 627};
599 628
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index c51c05b28..4a2462ec4 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -218,19 +218,17 @@ private:
218 Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_) 218 Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_)
219 : filter{filter_}, file_backend{file_backend_filename} {} 219 : filter{filter_}, file_backend{file_backend_filename} {}
220 220
221 ~Impl() { 221 ~Impl() = default;
222 StopBackendThread();
223 }
224 222
225 void StartBackendThread() { 223 void StartBackendThread() {
226 backend_thread = std::thread([this] { 224 backend_thread = std::jthread([this](std::stop_token stop_token) {
227 Common::SetCurrentThreadName("yuzu:Log"); 225 Common::SetCurrentThreadName("yuzu:Log");
228 Entry entry; 226 Entry entry;
229 const auto write_logs = [this, &entry]() { 227 const auto write_logs = [this, &entry]() {
230 ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); 228 ForEachBackend([&entry](Backend& backend) { backend.Write(entry); });
231 }; 229 };
232 while (!stop.stop_requested()) { 230 while (!stop_token.stop_requested()) {
233 entry = message_queue.PopWait(stop.get_token()); 231 entry = message_queue.PopWait(stop_token);
234 if (entry.filename != nullptr) { 232 if (entry.filename != nullptr) {
235 write_logs(); 233 write_logs();
236 } 234 }
@@ -244,11 +242,6 @@ private:
244 }); 242 });
245 } 243 }
246 244
247 void StopBackendThread() {
248 stop.request_stop();
249 backend_thread.join();
250 }
251
252 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, 245 Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr,
253 const char* function, std::string&& message) const { 246 const char* function, std::string&& message) const {
254 using std::chrono::duration_cast; 247 using std::chrono::duration_cast;
@@ -283,10 +276,9 @@ private:
283 ColorConsoleBackend color_console_backend{}; 276 ColorConsoleBackend color_console_backend{};
284 FileBackend file_backend; 277 FileBackend file_backend;
285 278
286 std::stop_source stop;
287 std::thread backend_thread;
288 MPSCQueue<Entry, true> message_queue{}; 279 MPSCQueue<Entry, true> message_queue{};
289 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; 280 std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()};
281 std::jthread backend_thread;
290}; 282};
291} // namespace 283} // namespace
292 284
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 2810cec15..877e0faa4 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
176 values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); 176 values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
177 values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); 177 values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
178 values.cpuopt_unsafe_fastmem_check.SetGlobal(true); 178 values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
179 values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
179 180
180 // Renderer 181 // Renderer
181 values.renderer_backend.SetGlobal(true); 182 values.renderer_backend.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index d06b23a14..a37d83fb3 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -484,12 +484,15 @@ struct Values {
484 BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"}; 484 BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
485 BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"}; 485 BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
486 BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"}; 486 BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
487 BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
488 BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
487 489
488 Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"}; 490 Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
489 Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"}; 491 Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
490 Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"}; 492 Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
491 Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"}; 493 Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
492 Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"}; 494 Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
495 Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
493 496
494 // Renderer 497 // Renderer
495 RangedSetting<RendererBackend> renderer_backend{ 498 RangedSetting<RendererBackend> renderer_backend{
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 6241d08b3..98c82cd17 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -55,22 +55,50 @@ void AppendBuildInfo(FieldCollection& fc) {
55 55
56void AppendCPUInfo(FieldCollection& fc) { 56void AppendCPUInfo(FieldCollection& fc) {
57#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
58 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); 58
59 fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); 59 const auto& caps = Common::GetCPUCaps();
60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 const auto add_field = [&fc](std::string_view field_name, const auto& field_value) {
61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, field_name, field_value);
62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 };
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); 63 add_field("CPU_Model", caps.cpu_string);
64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); 64 add_field("CPU_BrandString", caps.brand_string);
65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); 65
66 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); 66 add_field("CPU_Extension_x64_SSE", caps.sse);
67 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA4", Common::GetCPUCaps().fma4); 67 add_field("CPU_Extension_x64_SSE2", caps.sse2);
68 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE", Common::GetCPUCaps().sse); 68 add_field("CPU_Extension_x64_SSE3", caps.sse3);
69 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE2", Common::GetCPUCaps().sse2); 69 add_field("CPU_Extension_x64_SSSE3", caps.ssse3);
70 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE3", Common::GetCPUCaps().sse3); 70 add_field("CPU_Extension_x64_SSE41", caps.sse4_1);
71 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSSE3", Common::GetCPUCaps().ssse3); 71 add_field("CPU_Extension_x64_SSE42", caps.sse4_2);
72 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE41", Common::GetCPUCaps().sse4_1); 72
73 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_SSE42", Common::GetCPUCaps().sse4_2); 73 add_field("CPU_Extension_x64_AVX", caps.avx);
74 add_field("CPU_Extension_x64_AVX_VNNI", caps.avx_vnni);
75 add_field("CPU_Extension_x64_AVX2", caps.avx2);
76
77 // Skylake-X/SP level AVX512, for compatibility with the previous telemetry field
78 add_field("CPU_Extension_x64_AVX512",
79 caps.avx512f && caps.avx512cd && caps.avx512vl && caps.avx512dq && caps.avx512bw);
80
81 add_field("CPU_Extension_x64_AVX512F", caps.avx512f);
82 add_field("CPU_Extension_x64_AVX512CD", caps.avx512cd);
83 add_field("CPU_Extension_x64_AVX512VL", caps.avx512vl);
84 add_field("CPU_Extension_x64_AVX512DQ", caps.avx512dq);
85 add_field("CPU_Extension_x64_AVX512BW", caps.avx512bw);
86 add_field("CPU_Extension_x64_AVX512BITALG", caps.avx512bitalg);
87 add_field("CPU_Extension_x64_AVX512VBMI", caps.avx512vbmi);
88
89 add_field("CPU_Extension_x64_AES", caps.aes);
90 add_field("CPU_Extension_x64_BMI1", caps.bmi1);
91 add_field("CPU_Extension_x64_BMI2", caps.bmi2);
92 add_field("CPU_Extension_x64_F16C", caps.f16c);
93 add_field("CPU_Extension_x64_FMA", caps.fma);
94 add_field("CPU_Extension_x64_FMA4", caps.fma4);
95 add_field("CPU_Extension_x64_GFNI", caps.gfni);
96 add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
97 add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
98 add_field("CPU_Extension_x64_MOVBE", caps.movbe);
99 add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
100 add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
101 add_field("CPU_Extension_x64_SHA", caps.sha);
74#else 102#else
75 fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); 103 fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
76#endif 104#endif
diff --git a/src/common/telemetry.h b/src/common/telemetry.h
index 4d632f7eb..3524c857e 100644
--- a/src/common/telemetry.h
+++ b/src/common/telemetry.h
@@ -8,6 +8,7 @@
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <string> 10#include <string>
11#include <string_view>
11#include "common/common_funcs.h" 12#include "common/common_funcs.h"
12#include "common/common_types.h" 13#include "common/common_types.h"
13 14
@@ -55,8 +56,8 @@ class Field : public FieldInterface {
55public: 56public:
56 YUZU_NON_COPYABLE(Field); 57 YUZU_NON_COPYABLE(Field);
57 58
58 Field(FieldType type_, std::string name_, T value_) 59 Field(FieldType type_, std::string_view name_, T value_)
59 : name(std::move(name_)), type(type_), value(std::move(value_)) {} 60 : name(name_), type(type_), value(std::move(value_)) {}
60 61
61 ~Field() override = default; 62 ~Field() override = default;
62 63
@@ -123,7 +124,7 @@ public:
123 * @param value Value for the field to add. 124 * @param value Value for the field to add.
124 */ 125 */
125 template <typename T> 126 template <typename T>
126 void AddField(FieldType type, const char* name, T value) { 127 void AddField(FieldType type, std::string_view name, T value) {
127 return AddField(std::make_unique<Field<T>>(type, name, std::move(value))); 128 return AddField(std::make_unique<Field<T>>(type, name, std::move(value)));
128 } 129 }
129 130
diff --git a/src/common/tree.h b/src/common/tree.h
index 18faa4a48..28370e343 100644
--- a/src/common/tree.h
+++ b/src/common/tree.h
@@ -43,294 +43,265 @@
43 * The maximum height of a red-black tree is 2lg (n+1). 43 * The maximum height of a red-black tree is 2lg (n+1).
44 */ 44 */
45 45
46#include "common/assert.h" 46namespace Common::freebsd {
47 47
48namespace Common { 48enum class RBColor {
49 RB_BLACK = 0,
50 RB_RED = 1,
51};
52
53#pragma pack(push, 4)
49template <typename T> 54template <typename T>
50class RBHead { 55class RBEntry {
51public: 56public:
52 [[nodiscard]] T* Root() { 57 constexpr RBEntry() = default;
53 return rbh_root;
54 }
55 58
56 [[nodiscard]] const T* Root() const { 59 [[nodiscard]] constexpr T* Left() {
57 return rbh_root; 60 return m_rbe_left;
58 } 61 }
59 62 [[nodiscard]] constexpr const T* Left() const {
60 void SetRoot(T* root) { 63 return m_rbe_left;
61 rbh_root = root;
62 } 64 }
63 65
64 [[nodiscard]] bool IsEmpty() const { 66 constexpr void SetLeft(T* e) {
65 return Root() == nullptr; 67 m_rbe_left = e;
66 } 68 }
67 69
68private: 70 [[nodiscard]] constexpr T* Right() {
69 T* rbh_root = nullptr; 71 return m_rbe_right;
70};
71
72enum class EntryColor {
73 Black,
74 Red,
75};
76
77template <typename T>
78class RBEntry {
79public:
80 [[nodiscard]] T* Left() {
81 return rbe_left;
82 } 72 }
83 73 [[nodiscard]] constexpr const T* Right() const {
84 [[nodiscard]] const T* Left() const { 74 return m_rbe_right;
85 return rbe_left;
86 } 75 }
87 76
88 void SetLeft(T* left) { 77 constexpr void SetRight(T* e) {
89 rbe_left = left; 78 m_rbe_right = e;
90 } 79 }
91 80
92 [[nodiscard]] T* Right() { 81 [[nodiscard]] constexpr T* Parent() {
93 return rbe_right; 82 return m_rbe_parent;
94 } 83 }
95 84 [[nodiscard]] constexpr const T* Parent() const {
96 [[nodiscard]] const T* Right() const { 85 return m_rbe_parent;
97 return rbe_right;
98 } 86 }
99 87
100 void SetRight(T* right) { 88 constexpr void SetParent(T* e) {
101 rbe_right = right; 89 m_rbe_parent = e;
102 } 90 }
103 91
104 [[nodiscard]] T* Parent() { 92 [[nodiscard]] constexpr bool IsBlack() const {
105 return rbe_parent; 93 return m_rbe_color == RBColor::RB_BLACK;
106 } 94 }
107 95 [[nodiscard]] constexpr bool IsRed() const {
108 [[nodiscard]] const T* Parent() const { 96 return m_rbe_color == RBColor::RB_RED;
109 return rbe_parent;
110 } 97 }
111 98 [[nodiscard]] constexpr RBColor Color() const {
112 void SetParent(T* parent) { 99 return m_rbe_color;
113 rbe_parent = parent;
114 } 100 }
115 101
116 [[nodiscard]] bool IsBlack() const { 102 constexpr void SetColor(RBColor c) {
117 return rbe_color == EntryColor::Black; 103 m_rbe_color = c;
118 } 104 }
119 105
120 [[nodiscard]] bool IsRed() const { 106private:
121 return rbe_color == EntryColor::Red; 107 T* m_rbe_left{};
122 } 108 T* m_rbe_right{};
109 T* m_rbe_parent{};
110 RBColor m_rbe_color{RBColor::RB_BLACK};
111};
112#pragma pack(pop)
123 113
124 [[nodiscard]] EntryColor Color() const { 114template <typename T>
125 return rbe_color; 115struct CheckRBEntry {
126 } 116 static constexpr bool value = false;
117};
118template <typename T>
119struct CheckRBEntry<RBEntry<T>> {
120 static constexpr bool value = true;
121};
127 122
128 void SetColor(EntryColor color) { 123template <typename T>
129 rbe_color = color; 124concept IsRBEntry = CheckRBEntry<T>::value;
130 }
131 125
126template <typename T>
127concept HasRBEntry = requires(T& t, const T& ct) {
128 { t.GetRBEntry() } -> std::same_as<RBEntry<T>&>;
129 { ct.GetRBEntry() } -> std::same_as<const RBEntry<T>&>;
130};
131
132template <typename T>
133requires HasRBEntry<T>
134class RBHead {
132private: 135private:
133 T* rbe_left = nullptr; 136 T* m_rbh_root = nullptr;
134 T* rbe_right = nullptr; 137
135 T* rbe_parent = nullptr; 138public:
136 EntryColor rbe_color{}; 139 [[nodiscard]] constexpr T* Root() {
140 return m_rbh_root;
141 }
142 [[nodiscard]] constexpr const T* Root() const {
143 return m_rbh_root;
144 }
145 constexpr void SetRoot(T* root) {
146 m_rbh_root = root;
147 }
148
149 [[nodiscard]] constexpr bool IsEmpty() const {
150 return this->Root() == nullptr;
151 }
137}; 152};
138 153
139template <typename Node> 154template <typename T>
140[[nodiscard]] RBEntry<Node>& RB_ENTRY(Node* node) { 155requires HasRBEntry<T>
141 return node->GetEntry(); 156[[nodiscard]] constexpr RBEntry<T>& RB_ENTRY(T* t) {
157 return t->GetRBEntry();
142} 158}
143 159template <typename T>
144template <typename Node> 160requires HasRBEntry<T>
145[[nodiscard]] const RBEntry<Node>& RB_ENTRY(const Node* node) { 161[[nodiscard]] constexpr const RBEntry<T>& RB_ENTRY(const T* t) {
146 return node->GetEntry(); 162 return t->GetRBEntry();
147} 163}
148 164
149template <typename Node> 165template <typename T>
150[[nodiscard]] Node* RB_PARENT(Node* node) { 166requires HasRBEntry<T>
151 return RB_ENTRY(node).Parent(); 167[[nodiscard]] constexpr T* RB_LEFT(T* t) {
168 return RB_ENTRY(t).Left();
152} 169}
153 170template <typename T>
154template <typename Node> 171requires HasRBEntry<T>
155[[nodiscard]] const Node* RB_PARENT(const Node* node) { 172[[nodiscard]] constexpr const T* RB_LEFT(const T* t) {
156 return RB_ENTRY(node).Parent(); 173 return RB_ENTRY(t).Left();
157} 174}
158 175
159template <typename Node> 176template <typename T>
160void RB_SET_PARENT(Node* node, Node* parent) { 177requires HasRBEntry<T>
161 return RB_ENTRY(node).SetParent(parent); 178[[nodiscard]] constexpr T* RB_RIGHT(T* t) {
179 return RB_ENTRY(t).Right();
162} 180}
163 181template <typename T>
164template <typename Node> 182requires HasRBEntry<T>
165[[nodiscard]] Node* RB_LEFT(Node* node) { 183[[nodiscard]] constexpr const T* RB_RIGHT(const T* t) {
166 return RB_ENTRY(node).Left(); 184 return RB_ENTRY(t).Right();
167} 185}
168 186
169template <typename Node> 187template <typename T>
170[[nodiscard]] const Node* RB_LEFT(const Node* node) { 188requires HasRBEntry<T>
171 return RB_ENTRY(node).Left(); 189[[nodiscard]] constexpr T* RB_PARENT(T* t) {
190 return RB_ENTRY(t).Parent();
172} 191}
173 192template <typename T>
174template <typename Node> 193requires HasRBEntry<T>
175void RB_SET_LEFT(Node* node, Node* left) { 194[[nodiscard]] constexpr const T* RB_PARENT(const T* t) {
176 return RB_ENTRY(node).SetLeft(left); 195 return RB_ENTRY(t).Parent();
177} 196}
178 197
179template <typename Node> 198template <typename T>
180[[nodiscard]] Node* RB_RIGHT(Node* node) { 199requires HasRBEntry<T>
181 return RB_ENTRY(node).Right(); 200constexpr void RB_SET_LEFT(T* t, T* e) {
201 RB_ENTRY(t).SetLeft(e);
182} 202}
183 203template <typename T>
184template <typename Node> 204requires HasRBEntry<T>
185[[nodiscard]] const Node* RB_RIGHT(const Node* node) { 205constexpr void RB_SET_RIGHT(T* t, T* e) {
186 return RB_ENTRY(node).Right(); 206 RB_ENTRY(t).SetRight(e);
187} 207}
188 208template <typename T>
189template <typename Node> 209requires HasRBEntry<T>
190void RB_SET_RIGHT(Node* node, Node* right) { 210constexpr void RB_SET_PARENT(T* t, T* e) {
191 return RB_ENTRY(node).SetRight(right); 211 RB_ENTRY(t).SetParent(e);
192} 212}
193 213
194template <typename Node> 214template <typename T>
195[[nodiscard]] bool RB_IS_BLACK(const Node* node) { 215requires HasRBEntry<T>
196 return RB_ENTRY(node).IsBlack(); 216[[nodiscard]] constexpr bool RB_IS_BLACK(const T* t) {
217 return RB_ENTRY(t).IsBlack();
197} 218}
198 219template <typename T>
199template <typename Node> 220requires HasRBEntry<T>
200[[nodiscard]] bool RB_IS_RED(const Node* node) { 221[[nodiscard]] constexpr bool RB_IS_RED(const T* t) {
201 return RB_ENTRY(node).IsRed(); 222 return RB_ENTRY(t).IsRed();
202} 223}
203 224
204template <typename Node> 225template <typename T>
205[[nodiscard]] EntryColor RB_COLOR(const Node* node) { 226requires HasRBEntry<T>
206 return RB_ENTRY(node).Color(); 227[[nodiscard]] constexpr RBColor RB_COLOR(const T* t) {
228 return RB_ENTRY(t).Color();
207} 229}
208 230
209template <typename Node> 231template <typename T>
210void RB_SET_COLOR(Node* node, EntryColor color) { 232requires HasRBEntry<T>
211 return RB_ENTRY(node).SetColor(color); 233constexpr void RB_SET_COLOR(T* t, RBColor c) {
234 RB_ENTRY(t).SetColor(c);
212} 235}
213 236
214template <typename Node> 237template <typename T>
215void RB_SET(Node* node, Node* parent) { 238requires HasRBEntry<T>
216 auto& entry = RB_ENTRY(node); 239constexpr void RB_SET(T* elm, T* parent) {
217 entry.SetParent(parent); 240 auto& rb_entry = RB_ENTRY(elm);
218 entry.SetLeft(nullptr); 241 rb_entry.SetParent(parent);
219 entry.SetRight(nullptr); 242 rb_entry.SetLeft(nullptr);
220 entry.SetColor(EntryColor::Red); 243 rb_entry.SetRight(nullptr);
244 rb_entry.SetColor(RBColor::RB_RED);
221} 245}
222 246
223template <typename Node> 247template <typename T>
224void RB_SET_BLACKRED(Node* black, Node* red) { 248requires HasRBEntry<T>
225 RB_SET_COLOR(black, EntryColor::Black); 249constexpr void RB_SET_BLACKRED(T* black, T* red) {
226 RB_SET_COLOR(red, EntryColor::Red); 250 RB_SET_COLOR(black, RBColor::RB_BLACK);
251 RB_SET_COLOR(red, RBColor::RB_RED);
227} 252}
228 253
229template <typename Node> 254template <typename T>
230void RB_ROTATE_LEFT(RBHead<Node>* head, Node* elm, Node*& tmp) { 255requires HasRBEntry<T>
256constexpr void RB_ROTATE_LEFT(RBHead<T>& head, T* elm, T*& tmp) {
231 tmp = RB_RIGHT(elm); 257 tmp = RB_RIGHT(elm);
232 RB_SET_RIGHT(elm, RB_LEFT(tmp)); 258 if (RB_SET_RIGHT(elm, RB_LEFT(tmp)); RB_RIGHT(elm) != nullptr) {
233 if (RB_RIGHT(elm) != nullptr) {
234 RB_SET_PARENT(RB_LEFT(tmp), elm); 259 RB_SET_PARENT(RB_LEFT(tmp), elm);
235 } 260 }
236 261
237 RB_SET_PARENT(tmp, RB_PARENT(elm)); 262 if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
238 if (RB_PARENT(tmp) != nullptr) {
239 if (elm == RB_LEFT(RB_PARENT(elm))) { 263 if (elm == RB_LEFT(RB_PARENT(elm))) {
240 RB_SET_LEFT(RB_PARENT(elm), tmp); 264 RB_SET_LEFT(RB_PARENT(elm), tmp);
241 } else { 265 } else {
242 RB_SET_RIGHT(RB_PARENT(elm), tmp); 266 RB_SET_RIGHT(RB_PARENT(elm), tmp);
243 } 267 }
244 } else { 268 } else {
245 head->SetRoot(tmp); 269 head.SetRoot(tmp);
246 } 270 }
247 271
248 RB_SET_LEFT(tmp, elm); 272 RB_SET_LEFT(tmp, elm);
249 RB_SET_PARENT(elm, tmp); 273 RB_SET_PARENT(elm, tmp);
250} 274}
251 275
252template <typename Node> 276template <typename T>
253void RB_ROTATE_RIGHT(RBHead<Node>* head, Node* elm, Node*& tmp) { 277requires HasRBEntry<T>
278constexpr void RB_ROTATE_RIGHT(RBHead<T>& head, T* elm, T*& tmp) {
254 tmp = RB_LEFT(elm); 279 tmp = RB_LEFT(elm);
255 RB_SET_LEFT(elm, RB_RIGHT(tmp)); 280 if (RB_SET_LEFT(elm, RB_RIGHT(tmp)); RB_LEFT(elm) != nullptr) {
256 if (RB_LEFT(elm) != nullptr) {
257 RB_SET_PARENT(RB_RIGHT(tmp), elm); 281 RB_SET_PARENT(RB_RIGHT(tmp), elm);
258 } 282 }
259 283
260 RB_SET_PARENT(tmp, RB_PARENT(elm)); 284 if (RB_SET_PARENT(tmp, RB_PARENT(elm)); RB_PARENT(tmp) != nullptr) {
261 if (RB_PARENT(tmp) != nullptr) {
262 if (elm == RB_LEFT(RB_PARENT(elm))) { 285 if (elm == RB_LEFT(RB_PARENT(elm))) {
263 RB_SET_LEFT(RB_PARENT(elm), tmp); 286 RB_SET_LEFT(RB_PARENT(elm), tmp);
264 } else { 287 } else {
265 RB_SET_RIGHT(RB_PARENT(elm), tmp); 288 RB_SET_RIGHT(RB_PARENT(elm), tmp);
266 } 289 }
267 } else { 290 } else {
268 head->SetRoot(tmp); 291 head.SetRoot(tmp);
269 } 292 }
270 293
271 RB_SET_RIGHT(tmp, elm); 294 RB_SET_RIGHT(tmp, elm);
272 RB_SET_PARENT(elm, tmp); 295 RB_SET_PARENT(elm, tmp);
273} 296}
274 297
275template <typename Node> 298template <typename T>
276void RB_INSERT_COLOR(RBHead<Node>* head, Node* elm) { 299requires HasRBEntry<T>
277 Node* parent = nullptr; 300constexpr void RB_REMOVE_COLOR(RBHead<T>& head, T* parent, T* elm) {
278 Node* tmp = nullptr; 301 T* tmp;
279 302 while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head.Root()) {
280 while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
281 Node* gparent = RB_PARENT(parent);
282 if (parent == RB_LEFT(gparent)) {
283 tmp = RB_RIGHT(gparent);
284 if (tmp && RB_IS_RED(tmp)) {
285 RB_SET_COLOR(tmp, EntryColor::Black);
286 RB_SET_BLACKRED(parent, gparent);
287 elm = gparent;
288 continue;
289 }
290
291 if (RB_RIGHT(parent) == elm) {
292 RB_ROTATE_LEFT(head, parent, tmp);
293 tmp = parent;
294 parent = elm;
295 elm = tmp;
296 }
297
298 RB_SET_BLACKRED(parent, gparent);
299 RB_ROTATE_RIGHT(head, gparent, tmp);
300 } else {
301 tmp = RB_LEFT(gparent);
302 if (tmp && RB_IS_RED(tmp)) {
303 RB_SET_COLOR(tmp, EntryColor::Black);
304 RB_SET_BLACKRED(parent, gparent);
305 elm = gparent;
306 continue;
307 }
308
309 if (RB_LEFT(parent) == elm) {
310 RB_ROTATE_RIGHT(head, parent, tmp);
311 tmp = parent;
312 parent = elm;
313 elm = tmp;
314 }
315
316 RB_SET_BLACKRED(parent, gparent);
317 RB_ROTATE_LEFT(head, gparent, tmp);
318 }
319 }
320
321 RB_SET_COLOR(head->Root(), EntryColor::Black);
322}
323
324template <typename Node>
325void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
326 Node* tmp;
327 while ((elm == nullptr || RB_IS_BLACK(elm)) && elm != head->Root() && parent != nullptr) {
328 if (RB_LEFT(parent) == elm) { 303 if (RB_LEFT(parent) == elm) {
329 tmp = RB_RIGHT(parent); 304 tmp = RB_RIGHT(parent);
330 if (!tmp) {
331 ASSERT_MSG(false, "tmp is invalid!");
332 break;
333 }
334 if (RB_IS_RED(tmp)) { 305 if (RB_IS_RED(tmp)) {
335 RB_SET_BLACKRED(tmp, parent); 306 RB_SET_BLACKRED(tmp, parent);
336 RB_ROTATE_LEFT(head, parent, tmp); 307 RB_ROTATE_LEFT(head, parent, tmp);
@@ -339,29 +310,29 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
339 310
340 if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) && 311 if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
341 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) { 312 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
342 RB_SET_COLOR(tmp, EntryColor::Red); 313 RB_SET_COLOR(tmp, RBColor::RB_RED);
343 elm = parent; 314 elm = parent;
344 parent = RB_PARENT(elm); 315 parent = RB_PARENT(elm);
345 } else { 316 } else {
346 if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) { 317 if (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp))) {
347 Node* oleft; 318 T* oleft;
348 if ((oleft = RB_LEFT(tmp)) != nullptr) { 319 if ((oleft = RB_LEFT(tmp)) != nullptr) {
349 RB_SET_COLOR(oleft, EntryColor::Black); 320 RB_SET_COLOR(oleft, RBColor::RB_BLACK);
350 } 321 }
351 322
352 RB_SET_COLOR(tmp, EntryColor::Red); 323 RB_SET_COLOR(tmp, RBColor::RB_RED);
353 RB_ROTATE_RIGHT(head, tmp, oleft); 324 RB_ROTATE_RIGHT(head, tmp, oleft);
354 tmp = RB_RIGHT(parent); 325 tmp = RB_RIGHT(parent);
355 } 326 }
356 327
357 RB_SET_COLOR(tmp, RB_COLOR(parent)); 328 RB_SET_COLOR(tmp, RB_COLOR(parent));
358 RB_SET_COLOR(parent, EntryColor::Black); 329 RB_SET_COLOR(parent, RBColor::RB_BLACK);
359 if (RB_RIGHT(tmp)) { 330 if (RB_RIGHT(tmp)) {
360 RB_SET_COLOR(RB_RIGHT(tmp), EntryColor::Black); 331 RB_SET_COLOR(RB_RIGHT(tmp), RBColor::RB_BLACK);
361 } 332 }
362 333
363 RB_ROTATE_LEFT(head, parent, tmp); 334 RB_ROTATE_LEFT(head, parent, tmp);
364 elm = head->Root(); 335 elm = head.Root();
365 break; 336 break;
366 } 337 }
367 } else { 338 } else {
@@ -372,68 +343,56 @@ void RB_REMOVE_COLOR(RBHead<Node>* head, Node* parent, Node* elm) {
372 tmp = RB_LEFT(parent); 343 tmp = RB_LEFT(parent);
373 } 344 }
374 345
375 if (!tmp) {
376 ASSERT_MSG(false, "tmp is invalid!");
377 break;
378 }
379
380 if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) && 346 if ((RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) &&
381 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) { 347 (RB_RIGHT(tmp) == nullptr || RB_IS_BLACK(RB_RIGHT(tmp)))) {
382 RB_SET_COLOR(tmp, EntryColor::Red); 348 RB_SET_COLOR(tmp, RBColor::RB_RED);
383 elm = parent; 349 elm = parent;
384 parent = RB_PARENT(elm); 350 parent = RB_PARENT(elm);
385 } else { 351 } else {
386 if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) { 352 if (RB_LEFT(tmp) == nullptr || RB_IS_BLACK(RB_LEFT(tmp))) {
387 Node* oright; 353 T* oright;
388 if ((oright = RB_RIGHT(tmp)) != nullptr) { 354 if ((oright = RB_RIGHT(tmp)) != nullptr) {
389 RB_SET_COLOR(oright, EntryColor::Black); 355 RB_SET_COLOR(oright, RBColor::RB_BLACK);
390 } 356 }
391 357
392 RB_SET_COLOR(tmp, EntryColor::Red); 358 RB_SET_COLOR(tmp, RBColor::RB_RED);
393 RB_ROTATE_LEFT(head, tmp, oright); 359 RB_ROTATE_LEFT(head, tmp, oright);
394 tmp = RB_LEFT(parent); 360 tmp = RB_LEFT(parent);
395 } 361 }
396 362
397 RB_SET_COLOR(tmp, RB_COLOR(parent)); 363 RB_SET_COLOR(tmp, RB_COLOR(parent));
398 RB_SET_COLOR(parent, EntryColor::Black); 364 RB_SET_COLOR(parent, RBColor::RB_BLACK);
399 365
400 if (RB_LEFT(tmp)) { 366 if (RB_LEFT(tmp)) {
401 RB_SET_COLOR(RB_LEFT(tmp), EntryColor::Black); 367 RB_SET_COLOR(RB_LEFT(tmp), RBColor::RB_BLACK);
402 } 368 }
403 369
404 RB_ROTATE_RIGHT(head, parent, tmp); 370 RB_ROTATE_RIGHT(head, parent, tmp);
405 elm = head->Root(); 371 elm = head.Root();
406 break; 372 break;
407 } 373 }
408 } 374 }
409 } 375 }
410 376
411 if (elm) { 377 if (elm) {
412 RB_SET_COLOR(elm, EntryColor::Black); 378 RB_SET_COLOR(elm, RBColor::RB_BLACK);
413 } 379 }
414} 380}
415 381
416template <typename Node> 382template <typename T>
417Node* RB_REMOVE(RBHead<Node>* head, Node* elm) { 383requires HasRBEntry<T>
418 Node* child = nullptr; 384constexpr T* RB_REMOVE(RBHead<T>& head, T* elm) {
419 Node* parent = nullptr; 385 T* child = nullptr;
420 Node* old = elm; 386 T* parent = nullptr;
421 EntryColor color{}; 387 T* old = elm;
422 388 RBColor color = RBColor::RB_BLACK;
423 const auto finalize = [&] {
424 if (color == EntryColor::Black) {
425 RB_REMOVE_COLOR(head, parent, child);
426 }
427
428 return old;
429 };
430 389
431 if (RB_LEFT(elm) == nullptr) { 390 if (RB_LEFT(elm) == nullptr) {
432 child = RB_RIGHT(elm); 391 child = RB_RIGHT(elm);
433 } else if (RB_RIGHT(elm) == nullptr) { 392 } else if (RB_RIGHT(elm) == nullptr) {
434 child = RB_LEFT(elm); 393 child = RB_LEFT(elm);
435 } else { 394 } else {
436 Node* left; 395 T* left;
437 elm = RB_RIGHT(elm); 396 elm = RB_RIGHT(elm);
438 while ((left = RB_LEFT(elm)) != nullptr) { 397 while ((left = RB_LEFT(elm)) != nullptr) {
439 elm = left; 398 elm = left;
@@ -446,6 +405,7 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
446 if (child) { 405 if (child) {
447 RB_SET_PARENT(child, parent); 406 RB_SET_PARENT(child, parent);
448 } 407 }
408
449 if (parent) { 409 if (parent) {
450 if (RB_LEFT(parent) == elm) { 410 if (RB_LEFT(parent) == elm) {
451 RB_SET_LEFT(parent, child); 411 RB_SET_LEFT(parent, child);
@@ -453,14 +413,14 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
453 RB_SET_RIGHT(parent, child); 413 RB_SET_RIGHT(parent, child);
454 } 414 }
455 } else { 415 } else {
456 head->SetRoot(child); 416 head.SetRoot(child);
457 } 417 }
458 418
459 if (RB_PARENT(elm) == old) { 419 if (RB_PARENT(elm) == old) {
460 parent = elm; 420 parent = elm;
461 } 421 }
462 422
463 elm->SetEntry(old->GetEntry()); 423 elm->SetRBEntry(old->GetRBEntry());
464 424
465 if (RB_PARENT(old)) { 425 if (RB_PARENT(old)) {
466 if (RB_LEFT(RB_PARENT(old)) == old) { 426 if (RB_LEFT(RB_PARENT(old)) == old) {
@@ -469,17 +429,24 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
469 RB_SET_RIGHT(RB_PARENT(old), elm); 429 RB_SET_RIGHT(RB_PARENT(old), elm);
470 } 430 }
471 } else { 431 } else {
472 head->SetRoot(elm); 432 head.SetRoot(elm);
473 } 433 }
434
474 RB_SET_PARENT(RB_LEFT(old), elm); 435 RB_SET_PARENT(RB_LEFT(old), elm);
436
475 if (RB_RIGHT(old)) { 437 if (RB_RIGHT(old)) {
476 RB_SET_PARENT(RB_RIGHT(old), elm); 438 RB_SET_PARENT(RB_RIGHT(old), elm);
477 } 439 }
440
478 if (parent) { 441 if (parent) {
479 left = parent; 442 left = parent;
480 } 443 }
481 444
482 return finalize(); 445 if (color == RBColor::RB_BLACK) {
446 RB_REMOVE_COLOR(head, parent, child);
447 }
448
449 return old;
483 } 450 }
484 451
485 parent = RB_PARENT(elm); 452 parent = RB_PARENT(elm);
@@ -495,17 +462,69 @@ Node* RB_REMOVE(RBHead<Node>* head, Node* elm) {
495 RB_SET_RIGHT(parent, child); 462 RB_SET_RIGHT(parent, child);
496 } 463 }
497 } else { 464 } else {
498 head->SetRoot(child); 465 head.SetRoot(child);
466 }
467
468 if (color == RBColor::RB_BLACK) {
469 RB_REMOVE_COLOR(head, parent, child);
470 }
471
472 return old;
473}
474
475template <typename T>
476requires HasRBEntry<T>
477constexpr void RB_INSERT_COLOR(RBHead<T>& head, T* elm) {
478 T *parent = nullptr, *tmp = nullptr;
479 while ((parent = RB_PARENT(elm)) != nullptr && RB_IS_RED(parent)) {
480 T* gparent = RB_PARENT(parent);
481 if (parent == RB_LEFT(gparent)) {
482 tmp = RB_RIGHT(gparent);
483 if (tmp && RB_IS_RED(tmp)) {
484 RB_SET_COLOR(tmp, RBColor::RB_BLACK);
485 RB_SET_BLACKRED(parent, gparent);
486 elm = gparent;
487 continue;
488 }
489
490 if (RB_RIGHT(parent) == elm) {
491 RB_ROTATE_LEFT(head, parent, tmp);
492 tmp = parent;
493 parent = elm;
494 elm = tmp;
495 }
496
497 RB_SET_BLACKRED(parent, gparent);
498 RB_ROTATE_RIGHT(head, gparent, tmp);
499 } else {
500 tmp = RB_LEFT(gparent);
501 if (tmp && RB_IS_RED(tmp)) {
502 RB_SET_COLOR(tmp, RBColor::RB_BLACK);
503 RB_SET_BLACKRED(parent, gparent);
504 elm = gparent;
505 continue;
506 }
507
508 if (RB_LEFT(parent) == elm) {
509 RB_ROTATE_RIGHT(head, parent, tmp);
510 tmp = parent;
511 parent = elm;
512 elm = tmp;
513 }
514
515 RB_SET_BLACKRED(parent, gparent);
516 RB_ROTATE_LEFT(head, gparent, tmp);
517 }
499 } 518 }
500 519
501 return finalize(); 520 RB_SET_COLOR(head.Root(), RBColor::RB_BLACK);
502} 521}
503 522
504// Inserts a node into the RB tree 523template <typename T, typename Compare>
505template <typename Node, typename CompareFunction> 524requires HasRBEntry<T>
506Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) { 525constexpr T* RB_INSERT(RBHead<T>& head, T* elm, Compare cmp) {
507 Node* parent = nullptr; 526 T* parent = nullptr;
508 Node* tmp = head->Root(); 527 T* tmp = head.Root();
509 int comp = 0; 528 int comp = 0;
510 529
511 while (tmp) { 530 while (tmp) {
@@ -529,17 +548,17 @@ Node* RB_INSERT(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
529 RB_SET_RIGHT(parent, elm); 548 RB_SET_RIGHT(parent, elm);
530 } 549 }
531 } else { 550 } else {
532 head->SetRoot(elm); 551 head.SetRoot(elm);
533 } 552 }
534 553
535 RB_INSERT_COLOR(head, elm); 554 RB_INSERT_COLOR(head, elm);
536 return nullptr; 555 return nullptr;
537} 556}
538 557
539// Finds the node with the same key as elm 558template <typename T, typename Compare>
540template <typename Node, typename CompareFunction> 559requires HasRBEntry<T>
541Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { 560constexpr T* RB_FIND(RBHead<T>& head, T* elm, Compare cmp) {
542 Node* tmp = head->Root(); 561 T* tmp = head.Root();
543 562
544 while (tmp) { 563 while (tmp) {
545 const int comp = cmp(elm, tmp); 564 const int comp = cmp(elm, tmp);
@@ -555,11 +574,11 @@ Node* RB_FIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
555 return nullptr; 574 return nullptr;
556} 575}
557 576
558// Finds the first node greater than or equal to the search key 577template <typename T, typename Compare>
559template <typename Node, typename CompareFunction> 578requires HasRBEntry<T>
560Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) { 579constexpr T* RB_NFIND(RBHead<T>& head, T* elm, Compare cmp) {
561 Node* tmp = head->Root(); 580 T* tmp = head.Root();
562 Node* res = nullptr; 581 T* res = nullptr;
563 582
564 while (tmp) { 583 while (tmp) {
565 const int comp = cmp(elm, tmp); 584 const int comp = cmp(elm, tmp);
@@ -576,13 +595,13 @@ Node* RB_NFIND(RBHead<Node>* head, Node* elm, CompareFunction cmp) {
576 return res; 595 return res;
577} 596}
578 597
579// Finds the node with the same key as lelm 598template <typename T, typename U, typename Compare>
580template <typename Node, typename CompareFunction> 599requires HasRBEntry<T>
581Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) { 600constexpr T* RB_FIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
582 Node* tmp = head->Root(); 601 T* tmp = head.Root();
583 602
584 while (tmp) { 603 while (tmp) {
585 const int comp = lcmp(lelm, tmp); 604 const int comp = cmp(key, tmp);
586 if (comp < 0) { 605 if (comp < 0) {
587 tmp = RB_LEFT(tmp); 606 tmp = RB_LEFT(tmp);
588 } else if (comp > 0) { 607 } else if (comp > 0) {
@@ -595,14 +614,14 @@ Node* RB_FIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
595 return nullptr; 614 return nullptr;
596} 615}
597 616
598// Finds the first node greater than or equal to the search key 617template <typename T, typename U, typename Compare>
599template <typename Node, typename CompareFunction> 618requires HasRBEntry<T>
600Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp) { 619constexpr T* RB_NFIND_KEY(RBHead<T>& head, const U& key, Compare cmp) {
601 Node* tmp = head->Root(); 620 T* tmp = head.Root();
602 Node* res = nullptr; 621 T* res = nullptr;
603 622
604 while (tmp) { 623 while (tmp) {
605 const int comp = lcmp(lelm, tmp); 624 const int comp = cmp(key, tmp);
606 if (comp < 0) { 625 if (comp < 0) {
607 res = tmp; 626 res = tmp;
608 tmp = RB_LEFT(tmp); 627 tmp = RB_LEFT(tmp);
@@ -616,8 +635,43 @@ Node* RB_NFIND_LIGHT(RBHead<Node>* head, const void* lelm, CompareFunction lcmp)
616 return res; 635 return res;
617} 636}
618 637
619template <typename Node> 638template <typename T, typename Compare>
620Node* RB_NEXT(Node* elm) { 639requires HasRBEntry<T>
640constexpr T* RB_FIND_EXISTING(RBHead<T>& head, T* elm, Compare cmp) {
641 T* tmp = head.Root();
642
643 while (true) {
644 const int comp = cmp(elm, tmp);
645 if (comp < 0) {
646 tmp = RB_LEFT(tmp);
647 } else if (comp > 0) {
648 tmp = RB_RIGHT(tmp);
649 } else {
650 return tmp;
651 }
652 }
653}
654
655template <typename T, typename U, typename Compare>
656requires HasRBEntry<T>
657constexpr T* RB_FIND_EXISTING_KEY(RBHead<T>& head, const U& key, Compare cmp) {
658 T* tmp = head.Root();
659
660 while (true) {
661 const int comp = cmp(key, tmp);
662 if (comp < 0) {
663 tmp = RB_LEFT(tmp);
664 } else if (comp > 0) {
665 tmp = RB_RIGHT(tmp);
666 } else {
667 return tmp;
668 }
669 }
670}
671
672template <typename T>
673requires HasRBEntry<T>
674constexpr T* RB_NEXT(T* elm) {
621 if (RB_RIGHT(elm)) { 675 if (RB_RIGHT(elm)) {
622 elm = RB_RIGHT(elm); 676 elm = RB_RIGHT(elm);
623 while (RB_LEFT(elm)) { 677 while (RB_LEFT(elm)) {
@@ -636,8 +690,9 @@ Node* RB_NEXT(Node* elm) {
636 return elm; 690 return elm;
637} 691}
638 692
639template <typename Node> 693template <typename T>
640Node* RB_PREV(Node* elm) { 694requires HasRBEntry<T>
695constexpr T* RB_PREV(T* elm) {
641 if (RB_LEFT(elm)) { 696 if (RB_LEFT(elm)) {
642 elm = RB_LEFT(elm); 697 elm = RB_LEFT(elm);
643 while (RB_RIGHT(elm)) { 698 while (RB_RIGHT(elm)) {
@@ -656,30 +711,32 @@ Node* RB_PREV(Node* elm) {
656 return elm; 711 return elm;
657} 712}
658 713
659template <typename Node> 714template <typename T>
660Node* RB_MINMAX(RBHead<Node>* head, bool is_min) { 715requires HasRBEntry<T>
661 Node* tmp = head->Root(); 716constexpr T* RB_MIN(RBHead<T>& head) {
662 Node* parent = nullptr; 717 T* tmp = head.Root();
718 T* parent = nullptr;
663 719
664 while (tmp) { 720 while (tmp) {
665 parent = tmp; 721 parent = tmp;
666 if (is_min) { 722 tmp = RB_LEFT(tmp);
667 tmp = RB_LEFT(tmp);
668 } else {
669 tmp = RB_RIGHT(tmp);
670 }
671 } 723 }
672 724
673 return parent; 725 return parent;
674} 726}
675 727
676template <typename Node> 728template <typename T>
677Node* RB_MIN(RBHead<Node>* head) { 729requires HasRBEntry<T>
678 return RB_MINMAX(head, true); 730constexpr T* RB_MAX(RBHead<T>& head) {
679} 731 T* tmp = head.Root();
732 T* parent = nullptr;
680 733
681template <typename Node> 734 while (tmp) {
682Node* RB_MAX(RBHead<Node>* head) { 735 parent = tmp;
683 return RB_MINMAX(head, false); 736 tmp = RB_RIGHT(tmp);
737 }
738
739 return parent;
684} 740}
685} // namespace Common 741
742} // namespace Common::freebsd
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index fbeacc7e2..d81edb140 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -1,8 +1,12 @@
1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project 1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
2// Licensed under GPLv2 or any later version 2// Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
3// Refer to the license.txt file included.
4 3
4#include <array>
5#include <cstring> 5#include <cstring>
6#include <iterator>
7#include <span>
8#include <string_view>
9#include "common/bit_util.h"
6#include "common/common_types.h" 10#include "common/common_types.h"
7#include "common/x64/cpu_detect.h" 11#include "common/x64/cpu_detect.h"
8 12
@@ -17,7 +21,7 @@
17// clang-format on 21// clang-format on
18#endif 22#endif
19 23
20static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { 24static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) {
21#if defined(__DragonFly__) || defined(__FreeBSD__) 25#if defined(__DragonFly__) || defined(__FreeBSD__)
22 // Despite the name, this is just do_cpuid() with ECX as second input. 26 // Despite the name, this is just do_cpuid() with ECX as second input.
23 cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); 27 cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
@@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
30#endif 34#endif
31} 35}
32 36
33static inline void __cpuid(int info[4], int function_id) { 37static inline void __cpuid(int info[4], u32 function_id) {
34 return __cpuidex(info, function_id, 0); 38 return __cpuidex(info, function_id, 0);
35} 39}
36 40
@@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) {
45 49
46namespace Common { 50namespace Common {
47 51
52CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) {
53 if (brand_string == "GenuineIntel") {
54 return Manufacturer::Intel;
55 } else if (brand_string == "AuthenticAMD") {
56 return Manufacturer::AMD;
57 } else if (brand_string == "HygonGenuine") {
58 return Manufacturer::Hygon;
59 }
60 return Manufacturer::Unknown;
61}
62
48// Detects the various CPU features 63// Detects the various CPU features
49static CPUCaps Detect() { 64static CPUCaps Detect() {
50 CPUCaps caps = {}; 65 CPUCaps caps = {};
@@ -53,75 +68,74 @@ static CPUCaps Detect() {
53 // yuzu at all anyway 68 // yuzu at all anyway
54 69
55 int cpu_id[4]; 70 int cpu_id[4];
56 memset(caps.brand_string, 0, sizeof(caps.brand_string));
57 71
58 // Detect CPU's CPUID capabilities and grab CPU string 72 // Detect CPU's CPUID capabilities and grab manufacturer string
59 __cpuid(cpu_id, 0x00000000); 73 __cpuid(cpu_id, 0x00000000);
60 u32 max_std_fn = cpu_id[0]; // EAX 74 const u32 max_std_fn = cpu_id[0]; // EAX
61
62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
65 if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
66 caps.manufacturer = Manufacturer::Intel;
67 else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
68 caps.manufacturer = Manufacturer::AMD;
69 else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
70 caps.manufacturer = Manufacturer::Hygon;
71 else
72 caps.manufacturer = Manufacturer::Unknown;
73 75
74 __cpuid(cpu_id, 0x80000000); 76 std::memset(caps.brand_string, 0, std::size(caps.brand_string));
77 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32));
78 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32));
79 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32));
80
81 caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string);
75 82
76 u32 max_ex_fn = cpu_id[0]; 83 // Set reasonable default cpu string even if brand string not available
84 std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string));
77 85
78 // Set reasonable default brand string even if brand string not available 86 __cpuid(cpu_id, 0x80000000);
79 strcpy(caps.cpu_string, caps.brand_string); 87
88 const u32 max_ex_fn = cpu_id[0];
80 89
81 // Detect family and other miscellaneous features 90 // Detect family and other miscellaneous features
82 if (max_std_fn >= 1) { 91 if (max_std_fn >= 1) {
83 __cpuid(cpu_id, 0x00000001); 92 __cpuid(cpu_id, 0x00000001);
84 if ((cpu_id[3] >> 25) & 1) 93 caps.sse = Common::Bit<25>(cpu_id[3]);
85 caps.sse = true; 94 caps.sse2 = Common::Bit<26>(cpu_id[3]);
86 if ((cpu_id[3] >> 26) & 1) 95 caps.sse3 = Common::Bit<0>(cpu_id[2]);
87 caps.sse2 = true; 96 caps.pclmulqdq = Common::Bit<1>(cpu_id[2]);
88 if ((cpu_id[2]) & 1) 97 caps.ssse3 = Common::Bit<9>(cpu_id[2]);
89 caps.sse3 = true; 98 caps.sse4_1 = Common::Bit<19>(cpu_id[2]);
90 if ((cpu_id[2] >> 9) & 1) 99 caps.sse4_2 = Common::Bit<20>(cpu_id[2]);
91 caps.ssse3 = true; 100 caps.movbe = Common::Bit<22>(cpu_id[2]);
92 if ((cpu_id[2] >> 19) & 1) 101 caps.popcnt = Common::Bit<23>(cpu_id[2]);
93 caps.sse4_1 = true; 102 caps.aes = Common::Bit<25>(cpu_id[2]);
94 if ((cpu_id[2] >> 20) & 1) 103 caps.f16c = Common::Bit<29>(cpu_id[2]);
95 caps.sse4_2 = true;
96 if ((cpu_id[2] >> 25) & 1)
97 caps.aes = true;
98 104
99 // AVX support requires 3 separate checks: 105 // AVX support requires 3 separate checks:
100 // - Is the AVX bit set in CPUID? 106 // - Is the AVX bit set in CPUID?
101 // - Is the XSAVE bit set in CPUID? 107 // - Is the XSAVE bit set in CPUID?
102 // - XGETBV result has the XCR bit set. 108 // - XGETBV result has the XCR bit set.
103 if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { 109 if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) {
104 if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { 110 if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
105 caps.avx = true; 111 caps.avx = true;
106 if ((cpu_id[2] >> 12) & 1) 112 if (Common::Bit<12>(cpu_id[2]))
107 caps.fma = true; 113 caps.fma = true;
108 } 114 }
109 } 115 }
110 116
111 if (max_std_fn >= 7) { 117 if (max_std_fn >= 7) {
112 __cpuidex(cpu_id, 0x00000007, 0x00000000); 118 __cpuidex(cpu_id, 0x00000007, 0x00000000);
113 // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed 119 // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed
114 if ((cpu_id[1] >> 5) & 1) 120 if (caps.avx) {
115 caps.avx2 = caps.avx; 121 caps.avx2 = Common::Bit<5>(cpu_id[1]);
116 if ((cpu_id[1] >> 3) & 1) 122 caps.avx512f = Common::Bit<16>(cpu_id[1]);
117 caps.bmi1 = true; 123 caps.avx512dq = Common::Bit<17>(cpu_id[1]);
118 if ((cpu_id[1] >> 8) & 1) 124 caps.avx512cd = Common::Bit<28>(cpu_id[1]);
119 caps.bmi2 = true; 125 caps.avx512bw = Common::Bit<30>(cpu_id[1]);
120 // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) 126 caps.avx512vl = Common::Bit<31>(cpu_id[1]);
121 if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && 127 caps.avx512vbmi = Common::Bit<1>(cpu_id[2]);
122 (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { 128 caps.avx512bitalg = Common::Bit<12>(cpu_id[2]);
123 caps.avx512 = caps.avx2;
124 } 129 }
130
131 caps.bmi1 = Common::Bit<3>(cpu_id[1]);
132 caps.bmi2 = Common::Bit<8>(cpu_id[1]);
133 caps.sha = Common::Bit<29>(cpu_id[1]);
134
135 caps.gfni = Common::Bit<8>(cpu_id[2]);
136
137 __cpuidex(cpu_id, 0x00000007, 0x00000001);
138 caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]);
125 } 139 }
126 } 140 }
127 141
@@ -138,15 +152,13 @@ static CPUCaps Detect() {
138 if (max_ex_fn >= 0x80000001) { 152 if (max_ex_fn >= 0x80000001) {
139 // Check for more features 153 // Check for more features
140 __cpuid(cpu_id, 0x80000001); 154 __cpuid(cpu_id, 0x80000001);
141 if ((cpu_id[2] >> 16) & 1) 155 caps.lzcnt = Common::Bit<5>(cpu_id[2]);
142 caps.fma4 = true; 156 caps.fma4 = Common::Bit<16>(cpu_id[2]);
143 } 157 }
144 158
145 if (max_ex_fn >= 0x80000007) { 159 if (max_ex_fn >= 0x80000007) {
146 __cpuid(cpu_id, 0x80000007); 160 __cpuid(cpu_id, 0x80000007);
147 if (cpu_id[3] & (1 << 8)) { 161 caps.invariant_tsc = Common::Bit<8>(cpu_id[3]);
148 caps.invariant_tsc = true;
149 }
150 } 162 }
151 163
152 if (max_std_fn >= 0x16) { 164 if (max_std_fn >= 0x16) {
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index e3b63302e..40c48b132 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -1,42 +1,65 @@
1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project 1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator
2// Licensed under GPLv2 or any later version 2// Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included.
3// Refer to the license.txt file included.
4 3
5#pragma once 4#pragma once
6 5
7namespace Common { 6#include <string_view>
7#include "common/common_types.h"
8 8
9enum class Manufacturer : u32 { 9namespace Common {
10 Intel = 0,
11 AMD = 1,
12 Hygon = 2,
13 Unknown = 3,
14};
15 10
16/// x86/x64 CPU capabilities that may be detected by this module 11/// x86/x64 CPU capabilities that may be detected by this module
17struct CPUCaps { 12struct CPUCaps {
13
14 enum class Manufacturer : u8 {
15 Unknown = 0,
16 Intel = 1,
17 AMD = 2,
18 Hygon = 3,
19 };
20
21 static Manufacturer ParseManufacturer(std::string_view brand_string);
22
18 Manufacturer manufacturer; 23 Manufacturer manufacturer;
19 char cpu_string[0x21]; 24 char brand_string[13];
20 char brand_string[0x41]; 25
21 bool sse; 26 char cpu_string[48];
22 bool sse2; 27
23 bool sse3;
24 bool ssse3;
25 bool sse4_1;
26 bool sse4_2;
27 bool lzcnt;
28 bool avx;
29 bool avx2;
30 bool avx512;
31 bool bmi1;
32 bool bmi2;
33 bool fma;
34 bool fma4;
35 bool aes;
36 bool invariant_tsc;
37 u32 base_frequency; 28 u32 base_frequency;
38 u32 max_frequency; 29 u32 max_frequency;
39 u32 bus_frequency; 30 u32 bus_frequency;
31
32 bool sse : 1;
33 bool sse2 : 1;
34 bool sse3 : 1;
35 bool ssse3 : 1;
36 bool sse4_1 : 1;
37 bool sse4_2 : 1;
38
39 bool avx : 1;
40 bool avx_vnni : 1;
41 bool avx2 : 1;
42 bool avx512f : 1;
43 bool avx512dq : 1;
44 bool avx512cd : 1;
45 bool avx512bw : 1;
46 bool avx512vl : 1;
47 bool avx512vbmi : 1;
48 bool avx512bitalg : 1;
49
50 bool aes : 1;
51 bool bmi1 : 1;
52 bool bmi2 : 1;
53 bool f16c : 1;
54 bool fma : 1;
55 bool fma4 : 1;
56 bool gfni : 1;
57 bool invariant_tsc : 1;
58 bool lzcnt : 1;
59 bool movbe : 1;
60 bool pclmulqdq : 1;
61 bool popcnt : 1;
62 bool sha : 1;
40}; 63};
41 64
42/** 65/**
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 7ed43bfb1..1f234c822 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -154,6 +154,7 @@ add_library(core STATIC
154 hle/api_version.h 154 hle/api_version.h
155 hle/ipc.h 155 hle/ipc.h
156 hle/ipc_helpers.h 156 hle/ipc_helpers.h
157 hle/kernel/board/nintendo/nx/k_memory_layout.h
157 hle/kernel/board/nintendo/nx/k_system_control.cpp 158 hle/kernel/board/nintendo/nx/k_system_control.cpp
158 hle/kernel/board/nintendo/nx/k_system_control.h 159 hle/kernel/board/nintendo/nx/k_system_control.h
159 hle/kernel/board/nintendo/nx/secure_monitor.h 160 hle/kernel/board/nintendo/nx/secure_monitor.h
@@ -166,6 +167,7 @@ add_library(core STATIC
166 hle/kernel/hle_ipc.h 167 hle/kernel/hle_ipc.h
167 hle/kernel/init/init_slab_setup.cpp 168 hle/kernel/init/init_slab_setup.cpp
168 hle/kernel/init/init_slab_setup.h 169 hle/kernel/init/init_slab_setup.h
170 hle/kernel/initial_process.h
169 hle/kernel/k_address_arbiter.cpp 171 hle/kernel/k_address_arbiter.cpp
170 hle/kernel/k_address_arbiter.h 172 hle/kernel/k_address_arbiter.h
171 hle/kernel/k_address_space_info.cpp 173 hle/kernel/k_address_space_info.cpp
@@ -207,6 +209,7 @@ add_library(core STATIC
207 hle/kernel/k_memory_region.h 209 hle/kernel/k_memory_region.h
208 hle/kernel/k_memory_region_type.h 210 hle/kernel/k_memory_region_type.h
209 hle/kernel/k_page_bitmap.h 211 hle/kernel/k_page_bitmap.h
212 hle/kernel/k_page_buffer.h
210 hle/kernel/k_page_heap.cpp 213 hle/kernel/k_page_heap.cpp
211 hle/kernel/k_page_heap.h 214 hle/kernel/k_page_heap.h
212 hle/kernel/k_page_linked_list.h 215 hle/kernel/k_page_linked_list.h
@@ -244,6 +247,8 @@ add_library(core STATIC
244 hle/kernel/k_system_control.h 247 hle/kernel/k_system_control.h
245 hle/kernel/k_thread.cpp 248 hle/kernel/k_thread.cpp
246 hle/kernel/k_thread.h 249 hle/kernel/k_thread.h
250 hle/kernel/k_thread_local_page.cpp
251 hle/kernel/k_thread_local_page.h
247 hle/kernel/k_thread_queue.cpp 252 hle/kernel/k_thread_queue.cpp
248 hle/kernel/k_thread_queue.h 253 hle/kernel/k_thread_queue.h
249 hle/kernel/k_trace.h 254 hle/kernel/k_trace.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index b0d89c539..c1c843b8f 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
137 config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; 137 config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
138 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; 138 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
139 config.only_detect_misalignment_via_page_table_on_page_boundary = true; 139 config.only_detect_misalignment_via_page_table_on_page_boundary = true;
140 config.fastmem_exclusive_access = true;
141 config.recompile_on_exclusive_fastmem_failure = true;
140 142
141 // Multi-process state 143 // Multi-process state
142 config.processor_id = core_index; 144 config.processor_id = core_index;
@@ -146,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
146 config.wall_clock_cntpct = uses_wall_clock; 148 config.wall_clock_cntpct = uses_wall_clock;
147 149
148 // Code cache size 150 // Code cache size
149 config.code_cache_size = 512_MiB; 151 config.code_cache_size = 128_MiB;
150 config.far_code_offset = 400_MiB; 152 config.far_code_offset = 100_MiB;
151 153
152 // Safe optimizations 154 // Safe optimizations
153 if (Settings::values.cpu_debug_mode) { 155 if (Settings::values.cpu_debug_mode) {
@@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
178 if (!Settings::values.cpuopt_fastmem) { 180 if (!Settings::values.cpuopt_fastmem) {
179 config.fastmem_pointer = nullptr; 181 config.fastmem_pointer = nullptr;
180 } 182 }
183 if (!Settings::values.cpuopt_fastmem_exclusives) {
184 config.fastmem_exclusive_access = false;
185 }
186 if (!Settings::values.cpuopt_recompile_exclusives) {
187 config.recompile_on_exclusive_fastmem_failure = false;
188 }
181 } 189 }
182 190
183 // Unsafe optimizations 191 // Unsafe optimizations
@@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
195 if (Settings::values.cpuopt_unsafe_inaccurate_nan) { 203 if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
196 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; 204 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
197 } 205 }
206 if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
207 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
208 }
198 } 209 }
199 210
200 // Curated optimizations 211 // Curated optimizations
@@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
203 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; 214 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
204 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; 215 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
205 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; 216 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
217 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
206 } 218 }
207 219
208 return std::make_unique<Dynarmic::A32::Jit>(config); 220 return std::make_unique<Dynarmic::A32::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 56836bd05..aa74fce4d 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
185 config.fastmem_pointer = page_table->fastmem_arena; 185 config.fastmem_pointer = page_table->fastmem_arena;
186 config.fastmem_address_space_bits = address_space_bits; 186 config.fastmem_address_space_bits = address_space_bits;
187 config.silently_mirror_fastmem = false; 187 config.silently_mirror_fastmem = false;
188
189 config.fastmem_exclusive_access = true;
190 config.recompile_on_exclusive_fastmem_failure = true;
188 } 191 }
189 192
190 // Multi-process state 193 // Multi-process state
@@ -205,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
205 config.wall_clock_cntpct = uses_wall_clock; 208 config.wall_clock_cntpct = uses_wall_clock;
206 209
207 // Code cache size 210 // Code cache size
208 config.code_cache_size = 512_MiB; 211 config.code_cache_size = 128_MiB;
209 config.far_code_offset = 400_MiB; 212 config.far_code_offset = 100_MiB;
210 213
211 // Safe optimizations 214 // Safe optimizations
212 if (Settings::values.cpu_debug_mode) { 215 if (Settings::values.cpu_debug_mode) {
@@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
237 if (!Settings::values.cpuopt_fastmem) { 240 if (!Settings::values.cpuopt_fastmem) {
238 config.fastmem_pointer = nullptr; 241 config.fastmem_pointer = nullptr;
239 } 242 }
243 if (!Settings::values.cpuopt_fastmem_exclusives) {
244 config.fastmem_exclusive_access = false;
245 }
246 if (!Settings::values.cpuopt_recompile_exclusives) {
247 config.recompile_on_exclusive_fastmem_failure = false;
248 }
240 } 249 }
241 250
242 // Unsafe optimizations 251 // Unsafe optimizations
@@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
254 if (Settings::values.cpuopt_unsafe_fastmem_check) { 263 if (Settings::values.cpuopt_unsafe_fastmem_check) {
255 config.fastmem_address_space_bits = 64; 264 config.fastmem_address_space_bits = 64;
256 } 265 }
266 if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
267 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
268 }
257 } 269 }
258 270
259 // Curated optimizations 271 // Curated optimizations
@@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
262 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA; 274 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
263 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; 275 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
264 config.fastmem_address_space_bits = 64; 276 config.fastmem_address_space_bits = 64;
277 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
265 } 278 }
266 279
267 return std::make_shared<Dynarmic::A64::Jit>(config); 280 return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
index 397d054a8..ea6b224e0 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
37 }); 37 });
38} 38}
39 39
40void DynarmicExclusiveMonitor::ClearExclusive() { 40void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
41 monitor.Clear(); 41 monitor.ClearProcessor(core_index);
42} 42}
43 43
44bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { 44bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
index 265c4ecef..5a15b43ef 100644
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -29,7 +29,7 @@ public:
29 u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; 29 u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
30 u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; 30 u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
31 u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; 31 u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
32 void ClearExclusive() override; 32 void ClearExclusive(std::size_t core_index) override;
33 33
34 bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; 34 bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
35 bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; 35 bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index 62f6e6023..9914ca3da 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -23,7 +23,7 @@ public:
23 virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; 23 virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
24 virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; 24 virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
25 virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0; 25 virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
26 virtual void ClearExclusive() = 0; 26 virtual void ClearExclusive(std::size_t core_index) = 0;
27 27
28 virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; 28 virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
29 virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0; 29 virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b0cfee3ee..c60a784c3 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -326,7 +326,9 @@ struct System::Impl {
326 is_powered_on = false; 326 is_powered_on = false;
327 exit_lock = false; 327 exit_lock = false;
328 328
329 gpu_core->NotifyShutdown(); 329 if (gpu_core != nullptr) {
330 gpu_core->NotifyShutdown();
331 }
330 332
331 services.reset(); 333 services.reset();
332 service_manager.reset(); 334 service_manager.reset();
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index e413a520a..b3bffecb2 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -42,11 +42,20 @@ public:
42 context.MakeCurrent(); 42 context.MakeCurrent();
43 } 43 }
44 ~Scoped() { 44 ~Scoped() {
45 context.DoneCurrent(); 45 if (active) {
46 context.DoneCurrent();
47 }
48 }
49
50 /// In the event that context was destroyed before the Scoped is destroyed, this provides a
51 /// mechanism to prevent calling a destroyed object's method during the deconstructor
52 void Cancel() {
53 active = false;
46 } 54 }
47 55
48 private: 56 private:
49 GraphicsContext& context; 57 GraphicsContext& context;
58 bool active{true};
50 }; 59 };
51 60
52 /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value 61 /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index 026257115..3c4e45fcd 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -385,7 +385,7 @@ public:
385 T PopRaw(); 385 T PopRaw();
386 386
387 template <class T> 387 template <class T>
388 std::shared_ptr<T> PopIpcInterface() { 388 std::weak_ptr<T> PopIpcInterface() {
389 ASSERT(context->Session()->IsDomain()); 389 ASSERT(context->Session()->IsDomain());
390 ASSERT(context->GetDomainMessageHeader().input_object_count > 0); 390 ASSERT(context->GetDomainMessageHeader().input_object_count > 0);
391 return context->GetDomainHandler<T>(Pop<u32>() - 1); 391 return context->GetDomainHandler<T>(Pop<u32>() - 1);
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
new file mode 100644
index 000000000..01e225088
--- /dev/null
+++ b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.h
@@ -0,0 +1,13 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Kernel {
10
11constexpr inline PAddr MainMemoryAddress = 0x80000000;
12
13} // namespace Kernel
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 702cacffc..8027bec00 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -39,6 +39,10 @@ Smc::MemoryArrangement GetMemoryArrangeForInit() {
39} 39}
40} // namespace 40} // namespace
41 41
42size_t KSystemControl::Init::GetRealMemorySize() {
43 return GetIntendedMemorySize();
44}
45
42// Initialization. 46// Initialization.
43size_t KSystemControl::Init::GetIntendedMemorySize() { 47size_t KSystemControl::Init::GetIntendedMemorySize() {
44 switch (GetMemorySizeForInit()) { 48 switch (GetMemorySizeForInit()) {
@@ -53,7 +57,13 @@ size_t KSystemControl::Init::GetIntendedMemorySize() {
53} 57}
54 58
55PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) { 59PAddr KSystemControl::Init::GetKernelPhysicalBaseAddress(u64 base_address) {
56 return base_address; 60 const size_t real_dram_size = KSystemControl::Init::GetRealMemorySize();
61 const size_t intended_dram_size = KSystemControl::Init::GetIntendedMemorySize();
62 if (intended_dram_size * 2 < real_dram_size) {
63 return base_address;
64 } else {
65 return base_address + ((real_dram_size - intended_dram_size) / 2);
66 }
57} 67}
58 68
59bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() { 69bool KSystemControl::Init::ShouldIncreaseThreadResourceLimit() {
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
index 52f230ced..df2a17f2a 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.h
@@ -13,6 +13,7 @@ public:
13 class Init { 13 class Init {
14 public: 14 public:
15 // Initialization. 15 // Initialization.
16 static std::size_t GetRealMemorySize();
16 static std::size_t GetIntendedMemorySize(); 17 static std::size_t GetIntendedMemorySize();
17 static PAddr GetKernelPhysicalBaseAddress(u64 base_address); 18 static PAddr GetKernelPhysicalBaseAddress(u64 base_address);
18 static bool ShouldIncreaseThreadResourceLimit(); 19 static bool ShouldIncreaseThreadResourceLimit();
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index e19544c54..9f2175f82 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -45,7 +45,7 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
45 LOG_CRITICAL(IPC, "object_id {} is too big!", object_id); 45 LOG_CRITICAL(IPC, "object_id {} is too big!", object_id);
46 return false; 46 return false;
47 } 47 }
48 return DomainHandler(object_id - 1) != nullptr; 48 return DomainHandler(object_id - 1).lock() != nullptr;
49 } else { 49 } else {
50 return session_handler != nullptr; 50 return session_handler != nullptr;
51 } 51 }
@@ -53,9 +53,6 @@ bool SessionRequestManager::HasSessionRequestHandler(const HLERequestContext& co
53 53
54void SessionRequestHandler::ClientConnected(KServerSession* session) { 54void SessionRequestHandler::ClientConnected(KServerSession* session) {
55 session->ClientConnected(shared_from_this()); 55 session->ClientConnected(shared_from_this());
56
57 // Ensure our server session is tracked globally.
58 kernel.RegisterServerSession(session);
59} 56}
60 57
61void SessionRequestHandler::ClientDisconnected(KServerSession* session) { 58void SessionRequestHandler::ClientDisconnected(KServerSession* session) {
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index 754b41ff6..670cc741c 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -94,6 +94,7 @@ protected:
94 std::weak_ptr<ServiceThread> service_thread; 94 std::weak_ptr<ServiceThread> service_thread;
95}; 95};
96 96
97using SessionRequestHandlerWeakPtr = std::weak_ptr<SessionRequestHandler>;
97using SessionRequestHandlerPtr = std::shared_ptr<SessionRequestHandler>; 98using SessionRequestHandlerPtr = std::shared_ptr<SessionRequestHandler>;
98 99
99/** 100/**
@@ -139,7 +140,7 @@ public:
139 } 140 }
140 } 141 }
141 142
142 SessionRequestHandlerPtr DomainHandler(std::size_t index) const { 143 SessionRequestHandlerWeakPtr DomainHandler(std::size_t index) const {
143 ASSERT_MSG(index < DomainHandlerCount(), "Unexpected handler index {}", index); 144 ASSERT_MSG(index < DomainHandlerCount(), "Unexpected handler index {}", index);
144 return domain_handlers.at(index); 145 return domain_handlers.at(index);
145 } 146 }
@@ -328,10 +329,10 @@ public:
328 329
329 template <typename T> 330 template <typename T>
330 std::shared_ptr<T> GetDomainHandler(std::size_t index) const { 331 std::shared_ptr<T> GetDomainHandler(std::size_t index) const {
331 return std::static_pointer_cast<T>(manager->DomainHandler(index)); 332 return std::static_pointer_cast<T>(manager.lock()->DomainHandler(index).lock());
332 } 333 }
333 334
334 void SetSessionRequestManager(std::shared_ptr<SessionRequestManager> manager_) { 335 void SetSessionRequestManager(std::weak_ptr<SessionRequestManager> manager_) {
335 manager = std::move(manager_); 336 manager = std::move(manager_);
336 } 337 }
337 338
@@ -374,7 +375,7 @@ private:
374 u32 handles_offset{}; 375 u32 handles_offset{};
375 u32 domain_offset{}; 376 u32 domain_offset{};
376 377
377 std::shared_ptr<SessionRequestManager> manager; 378 std::weak_ptr<SessionRequestManager> manager;
378 379
379 KernelCore& kernel; 380 KernelCore& kernel;
380 Core::Memory::Memory& memory; 381 Core::Memory::Memory& memory;
diff --git a/src/core/hle/kernel/init/init_slab_setup.cpp b/src/core/hle/kernel/init/init_slab_setup.cpp
index 36fc0944a..b0f773ee0 100644
--- a/src/core/hle/kernel/init/init_slab_setup.cpp
+++ b/src/core/hle/kernel/init/init_slab_setup.cpp
@@ -7,19 +7,23 @@
7#include "common/common_funcs.h" 7#include "common/common_funcs.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/device_memory.h"
10#include "core/hardware_properties.h" 11#include "core/hardware_properties.h"
11#include "core/hle/kernel/init/init_slab_setup.h" 12#include "core/hle/kernel/init/init_slab_setup.h"
12#include "core/hle/kernel/k_code_memory.h" 13#include "core/hle/kernel/k_code_memory.h"
13#include "core/hle/kernel/k_event.h" 14#include "core/hle/kernel/k_event.h"
14#include "core/hle/kernel/k_memory_layout.h" 15#include "core/hle/kernel/k_memory_layout.h"
15#include "core/hle/kernel/k_memory_manager.h" 16#include "core/hle/kernel/k_memory_manager.h"
17#include "core/hle/kernel/k_page_buffer.h"
16#include "core/hle/kernel/k_port.h" 18#include "core/hle/kernel/k_port.h"
17#include "core/hle/kernel/k_process.h" 19#include "core/hle/kernel/k_process.h"
18#include "core/hle/kernel/k_resource_limit.h" 20#include "core/hle/kernel/k_resource_limit.h"
19#include "core/hle/kernel/k_session.h" 21#include "core/hle/kernel/k_session.h"
20#include "core/hle/kernel/k_shared_memory.h" 22#include "core/hle/kernel/k_shared_memory.h"
23#include "core/hle/kernel/k_shared_memory_info.h"
21#include "core/hle/kernel/k_system_control.h" 24#include "core/hle/kernel/k_system_control.h"
22#include "core/hle/kernel/k_thread.h" 25#include "core/hle/kernel/k_thread.h"
26#include "core/hle/kernel/k_thread_local_page.h"
23#include "core/hle/kernel/k_transfer_memory.h" 27#include "core/hle/kernel/k_transfer_memory.h"
24 28
25namespace Kernel::Init { 29namespace Kernel::Init {
@@ -32,9 +36,13 @@ namespace Kernel::Init {
32 HANDLER(KEvent, (SLAB_COUNT(KEvent)), ##__VA_ARGS__) \ 36 HANDLER(KEvent, (SLAB_COUNT(KEvent)), ##__VA_ARGS__) \
33 HANDLER(KPort, (SLAB_COUNT(KPort)), ##__VA_ARGS__) \ 37 HANDLER(KPort, (SLAB_COUNT(KPort)), ##__VA_ARGS__) \
34 HANDLER(KSharedMemory, (SLAB_COUNT(KSharedMemory)), ##__VA_ARGS__) \ 38 HANDLER(KSharedMemory, (SLAB_COUNT(KSharedMemory)), ##__VA_ARGS__) \
39 HANDLER(KSharedMemoryInfo, (SLAB_COUNT(KSharedMemory) * 8), ##__VA_ARGS__) \
35 HANDLER(KTransferMemory, (SLAB_COUNT(KTransferMemory)), ##__VA_ARGS__) \ 40 HANDLER(KTransferMemory, (SLAB_COUNT(KTransferMemory)), ##__VA_ARGS__) \
36 HANDLER(KCodeMemory, (SLAB_COUNT(KCodeMemory)), ##__VA_ARGS__) \ 41 HANDLER(KCodeMemory, (SLAB_COUNT(KCodeMemory)), ##__VA_ARGS__) \
37 HANDLER(KSession, (SLAB_COUNT(KSession)), ##__VA_ARGS__) \ 42 HANDLER(KSession, (SLAB_COUNT(KSession)), ##__VA_ARGS__) \
43 HANDLER(KThreadLocalPage, \
44 (SLAB_COUNT(KProcess) + (SLAB_COUNT(KProcess) + SLAB_COUNT(KThread)) / 8), \
45 ##__VA_ARGS__) \
38 HANDLER(KResourceLimit, (SLAB_COUNT(KResourceLimit)), ##__VA_ARGS__) 46 HANDLER(KResourceLimit, (SLAB_COUNT(KResourceLimit)), ##__VA_ARGS__)
39 47
40namespace { 48namespace {
@@ -50,38 +58,46 @@ enum KSlabType : u32 {
50// Constexpr counts. 58// Constexpr counts.
51constexpr size_t SlabCountKProcess = 80; 59constexpr size_t SlabCountKProcess = 80;
52constexpr size_t SlabCountKThread = 800; 60constexpr size_t SlabCountKThread = 800;
53constexpr size_t SlabCountKEvent = 700; 61constexpr size_t SlabCountKEvent = 900;
54constexpr size_t SlabCountKInterruptEvent = 100; 62constexpr size_t SlabCountKInterruptEvent = 100;
55constexpr size_t SlabCountKPort = 256 + 0x20; // Extra 0x20 ports over Nintendo for homebrew. 63constexpr size_t SlabCountKPort = 384;
56constexpr size_t SlabCountKSharedMemory = 80; 64constexpr size_t SlabCountKSharedMemory = 80;
57constexpr size_t SlabCountKTransferMemory = 200; 65constexpr size_t SlabCountKTransferMemory = 200;
58constexpr size_t SlabCountKCodeMemory = 10; 66constexpr size_t SlabCountKCodeMemory = 10;
59constexpr size_t SlabCountKDeviceAddressSpace = 300; 67constexpr size_t SlabCountKDeviceAddressSpace = 300;
60constexpr size_t SlabCountKSession = 933; 68constexpr size_t SlabCountKSession = 1133;
61constexpr size_t SlabCountKLightSession = 100; 69constexpr size_t SlabCountKLightSession = 100;
62constexpr size_t SlabCountKObjectName = 7; 70constexpr size_t SlabCountKObjectName = 7;
63constexpr size_t SlabCountKResourceLimit = 5; 71constexpr size_t SlabCountKResourceLimit = 5;
64constexpr size_t SlabCountKDebug = Core::Hardware::NUM_CPU_CORES; 72constexpr size_t SlabCountKDebug = Core::Hardware::NUM_CPU_CORES;
65constexpr size_t SlabCountKAlpha = 1; 73constexpr size_t SlabCountKIoPool = 1;
66constexpr size_t SlabCountKBeta = 6; 74constexpr size_t SlabCountKIoRegion = 6;
67 75
68constexpr size_t SlabCountExtraKThread = 160; 76constexpr size_t SlabCountExtraKThread = 160;
69 77
78/// Helper function to translate from the slab virtual address to the reserved location in physical
79/// memory.
80static PAddr TranslateSlabAddrToPhysical(KMemoryLayout& memory_layout, VAddr slab_addr) {
81 slab_addr -= memory_layout.GetSlabRegionAddress();
82 return slab_addr + Core::DramMemoryMap::SlabHeapBase;
83}
84
70template <typename T> 85template <typename T>
71VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAddr address, 86VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAddr address,
72 size_t num_objects) { 87 size_t num_objects) {
73 // TODO(bunnei): This is just a place holder. We should initialize the appropriate KSlabHeap for
74 // kernel object type T with the backing kernel memory pointer once we emulate kernel memory.
75 88
76 const size_t size = Common::AlignUp(sizeof(T) * num_objects, alignof(void*)); 89 const size_t size = Common::AlignUp(sizeof(T) * num_objects, alignof(void*));
77 VAddr start = Common::AlignUp(address, alignof(T)); 90 VAddr start = Common::AlignUp(address, alignof(T));
78 91
79 // This is intentionally empty. Once KSlabHeap is fully implemented, we can replace this with 92 // This should use the virtual memory address passed in, but currently, we do not setup the
80 // the pointer to emulated memory to pass along. Until then, KSlabHeap will just allocate/free 93 // kernel virtual memory layout. Instead, we simply map these at a region of physical memory
81 // host memory. 94 // that we reserve for the slab heaps.
82 void* backing_kernel_memory{}; 95 // TODO(bunnei): Fix this once we support the kernel virtual memory layout.
83 96
84 if (size > 0) { 97 if (size > 0) {
98 void* backing_kernel_memory{
99 system.DeviceMemory().GetPointer(TranslateSlabAddrToPhysical(memory_layout, start))};
100
85 const KMemoryRegion* region = memory_layout.FindVirtual(start + size - 1); 101 const KMemoryRegion* region = memory_layout.FindVirtual(start + size - 1);
86 ASSERT(region != nullptr); 102 ASSERT(region != nullptr);
87 ASSERT(region->IsDerivedFrom(KMemoryRegionType_KernelSlab)); 103 ASSERT(region->IsDerivedFrom(KMemoryRegionType_KernelSlab));
@@ -91,6 +107,12 @@ VAddr InitializeSlabHeap(Core::System& system, KMemoryLayout& memory_layout, VAd
91 return start + size; 107 return start + size;
92} 108}
93 109
110size_t CalculateSlabHeapGapSize() {
111 constexpr size_t KernelSlabHeapGapSize = 2_MiB - 296_KiB;
112 static_assert(KernelSlabHeapGapSize <= KernelSlabHeapGapsSizeMax);
113 return KernelSlabHeapGapSize;
114}
115
94} // namespace 116} // namespace
95 117
96KSlabResourceCounts KSlabResourceCounts::CreateDefault() { 118KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
@@ -109,8 +131,8 @@ KSlabResourceCounts KSlabResourceCounts::CreateDefault() {
109 .num_KObjectName = SlabCountKObjectName, 131 .num_KObjectName = SlabCountKObjectName,
110 .num_KResourceLimit = SlabCountKResourceLimit, 132 .num_KResourceLimit = SlabCountKResourceLimit,
111 .num_KDebug = SlabCountKDebug, 133 .num_KDebug = SlabCountKDebug,
112 .num_KAlpha = SlabCountKAlpha, 134 .num_KIoPool = SlabCountKIoPool,
113 .num_KBeta = SlabCountKBeta, 135 .num_KIoRegion = SlabCountKIoRegion,
114 }; 136 };
115} 137}
116 138
@@ -136,11 +158,34 @@ size_t CalculateTotalSlabHeapSize(const KernelCore& kernel) {
136#undef ADD_SLAB_SIZE 158#undef ADD_SLAB_SIZE
137 159
138 // Add the reserved size. 160 // Add the reserved size.
139 size += KernelSlabHeapGapsSize; 161 size += CalculateSlabHeapGapSize();
140 162
141 return size; 163 return size;
142} 164}
143 165
166void InitializeKPageBufferSlabHeap(Core::System& system) {
167 auto& kernel = system.Kernel();
168
169 const auto& counts = kernel.SlabResourceCounts();
170 const size_t num_pages =
171 counts.num_KProcess + counts.num_KThread + (counts.num_KProcess + counts.num_KThread) / 8;
172 const size_t slab_size = num_pages * PageSize;
173
174 // Reserve memory from the system resource limit.
175 ASSERT(kernel.GetSystemResourceLimit()->Reserve(LimitableResource::PhysicalMemory, slab_size));
176
177 // Allocate memory for the slab.
178 constexpr auto AllocateOption = KMemoryManager::EncodeOption(
179 KMemoryManager::Pool::System, KMemoryManager::Direction::FromFront);
180 const PAddr slab_address =
181 kernel.MemoryManager().AllocateAndOpenContinuous(num_pages, 1, AllocateOption);
182 ASSERT(slab_address != 0);
183
184 // Initialize the slabheap.
185 KPageBuffer::InitializeSlabHeap(kernel, system.DeviceMemory().GetPointer(slab_address),
186 slab_size);
187}
188
144void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) { 189void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
145 auto& kernel = system.Kernel(); 190 auto& kernel = system.Kernel();
146 191
@@ -160,13 +205,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
160 } 205 }
161 206
162 // Create an array to represent the gaps between the slabs. 207 // Create an array to represent the gaps between the slabs.
163 const size_t total_gap_size = KernelSlabHeapGapsSize; 208 const size_t total_gap_size = CalculateSlabHeapGapSize();
164 std::array<size_t, slab_types.size()> slab_gaps; 209 std::array<size_t, slab_types.size()> slab_gaps;
165 for (size_t i = 0; i < slab_gaps.size(); i++) { 210 for (auto& slab_gap : slab_gaps) {
166 // Note: This is an off-by-one error from Nintendo's intention, because GenerateRandomRange 211 // Note: This is an off-by-one error from Nintendo's intention, because GenerateRandomRange
167 // is inclusive. However, Nintendo also has the off-by-one error, and it's "harmless", so we 212 // is inclusive. However, Nintendo also has the off-by-one error, and it's "harmless", so we
168 // will include it ourselves. 213 // will include it ourselves.
169 slab_gaps[i] = KSystemControl::GenerateRandomRange(0, total_gap_size); 214 slab_gap = KSystemControl::GenerateRandomRange(0, total_gap_size);
170 } 215 }
171 216
172 // Sort the array, so that we can treat differences between values as offsets to the starts of 217 // Sort the array, so that we can treat differences between values as offsets to the starts of
@@ -177,13 +222,21 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
177 } 222 }
178 } 223 }
179 224
180 for (size_t i = 0; i < slab_types.size(); i++) { 225 // Track the gaps, so that we can free them to the unused slab tree.
226 VAddr gap_start = address;
227 size_t gap_size = 0;
228
229 for (size_t i = 0; i < slab_gaps.size(); i++) {
181 // Add the random gap to the address. 230 // Add the random gap to the address.
182 address += (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1]; 231 const auto cur_gap = (i == 0) ? slab_gaps[0] : slab_gaps[i] - slab_gaps[i - 1];
232 address += cur_gap;
233 gap_size += cur_gap;
183 234
184#define INITIALIZE_SLAB_HEAP(NAME, COUNT, ...) \ 235#define INITIALIZE_SLAB_HEAP(NAME, COUNT, ...) \
185 case KSlabType_##NAME: \ 236 case KSlabType_##NAME: \
186 address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \ 237 if (COUNT > 0) { \
238 address = InitializeSlabHeap<NAME>(system, memory_layout, address, COUNT); \
239 } \
187 break; 240 break;
188 241
189 // Initialize the slabheap. 242 // Initialize the slabheap.
@@ -192,7 +245,13 @@ void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout) {
192 FOREACH_SLAB_TYPE(INITIALIZE_SLAB_HEAP) 245 FOREACH_SLAB_TYPE(INITIALIZE_SLAB_HEAP)
193 // If we somehow get an invalid type, abort. 246 // If we somehow get an invalid type, abort.
194 default: 247 default:
195 UNREACHABLE(); 248 UNREACHABLE_MSG("Unknown slab type: {}", slab_types[i]);
249 }
250
251 // If we've hit the end of a gap, free it.
252 if (gap_start + gap_size != address) {
253 gap_start = address;
254 gap_size = 0;
196 } 255 }
197 } 256 }
198} 257}
diff --git a/src/core/hle/kernel/init/init_slab_setup.h b/src/core/hle/kernel/init/init_slab_setup.h
index a8f7e0918..f54b67d02 100644
--- a/src/core/hle/kernel/init/init_slab_setup.h
+++ b/src/core/hle/kernel/init/init_slab_setup.h
@@ -32,12 +32,13 @@ struct KSlabResourceCounts {
32 size_t num_KObjectName; 32 size_t num_KObjectName;
33 size_t num_KResourceLimit; 33 size_t num_KResourceLimit;
34 size_t num_KDebug; 34 size_t num_KDebug;
35 size_t num_KAlpha; 35 size_t num_KIoPool;
36 size_t num_KBeta; 36 size_t num_KIoRegion;
37}; 37};
38 38
39void InitializeSlabResourceCounts(KernelCore& kernel); 39void InitializeSlabResourceCounts(KernelCore& kernel);
40size_t CalculateTotalSlabHeapSize(const KernelCore& kernel); 40size_t CalculateTotalSlabHeapSize(const KernelCore& kernel);
41void InitializeKPageBufferSlabHeap(Core::System& system);
41void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout); 42void InitializeSlabHeaps(Core::System& system, KMemoryLayout& memory_layout);
42 43
43} // namespace Kernel::Init 44} // namespace Kernel::Init
diff --git a/src/core/hle/kernel/initial_process.h b/src/core/hle/kernel/initial_process.h
new file mode 100644
index 000000000..25b27909c
--- /dev/null
+++ b/src/core/hle/kernel/initial_process.h
@@ -0,0 +1,23 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/literals.h"
9#include "core/hle/kernel/board/nintendo/nx/k_memory_layout.h"
10#include "core/hle/kernel/board/nintendo/nx/k_system_control.h"
11
12namespace Kernel {
13
14using namespace Common::Literals;
15
16constexpr std::size_t InitialProcessBinarySizeMax = 12_MiB;
17
18static inline PAddr GetInitialProcessBinaryPhysicalAddress() {
19 return Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetKernelPhysicalBaseAddress(
20 MainMemoryAddress);
21}
22
23} // namespace Kernel
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp
index 783c69858..8cdd0490f 100644
--- a/src/core/hle/kernel/k_address_arbiter.cpp
+++ b/src/core/hle/kernel/k_address_arbiter.cpp
@@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
49 } 49 }
50 } else { 50 } else {
51 // Otherwise, clear our exclusive hold and finish 51 // Otherwise, clear our exclusive hold and finish
52 monitor.ClearExclusive(); 52 monitor.ClearExclusive(current_core);
53 } 53 }
54 54
55 // We're done. 55 // We're done.
@@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
78 } 78 }
79 } else { 79 } else {
80 // Otherwise, clear our exclusive hold and finish. 80 // Otherwise, clear our exclusive hold and finish.
81 monitor.ClearExclusive(); 81 monitor.ClearExclusive(current_core);
82 } 82 }
83 83
84 // We're done. 84 // We're done.
@@ -115,7 +115,7 @@ ResultCode KAddressArbiter::Signal(VAddr addr, s32 count) {
115 { 115 {
116 KScopedSchedulerLock sl(kernel); 116 KScopedSchedulerLock sl(kernel);
117 117
118 auto it = thread_tree.nfind_light({addr, -1}); 118 auto it = thread_tree.nfind_key({addr, -1});
119 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && 119 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
120 (it->GetAddressArbiterKey() == addr)) { 120 (it->GetAddressArbiterKey() == addr)) {
121 // End the thread's wait. 121 // End the thread's wait.
@@ -148,7 +148,7 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32
148 return ResultInvalidState; 148 return ResultInvalidState;
149 } 149 }
150 150
151 auto it = thread_tree.nfind_light({addr, -1}); 151 auto it = thread_tree.nfind_key({addr, -1});
152 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && 152 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
153 (it->GetAddressArbiterKey() == addr)) { 153 (it->GetAddressArbiterKey() == addr)) {
154 // End the thread's wait. 154 // End the thread's wait.
@@ -171,7 +171,7 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32
171 { 171 {
172 [[maybe_unused]] const KScopedSchedulerLock sl(kernel); 172 [[maybe_unused]] const KScopedSchedulerLock sl(kernel);
173 173
174 auto it = thread_tree.nfind_light({addr, -1}); 174 auto it = thread_tree.nfind_key({addr, -1});
175 // Determine the updated value. 175 // Determine the updated value.
176 s32 new_value{}; 176 s32 new_value{};
177 if (count <= 0) { 177 if (count <= 0) {
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp
index aadcc297a..8e2a9593c 100644
--- a/src/core/hle/kernel/k_condition_variable.cpp
+++ b/src/core/hle/kernel/k_condition_variable.cpp
@@ -244,7 +244,7 @@ void KConditionVariable::Signal(u64 cv_key, s32 count) {
244 { 244 {
245 KScopedSchedulerLock sl(kernel); 245 KScopedSchedulerLock sl(kernel);
246 246
247 auto it = thread_tree.nfind_light({cv_key, -1}); 247 auto it = thread_tree.nfind_key({cv_key, -1});
248 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && 248 while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) &&
249 (it->GetConditionVariableKey() == cv_key)) { 249 (it->GetConditionVariableKey() == cv_key)) {
250 KThread* target_thread = std::addressof(*it); 250 KThread* target_thread = std::addressof(*it);
diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h
index 57ff538cc..0858827b6 100644
--- a/src/core/hle/kernel/k_memory_layout.h
+++ b/src/core/hle/kernel/k_memory_layout.h
@@ -57,11 +57,11 @@ constexpr std::size_t KernelPageTableHeapSize = GetMaximumOverheadSize(MainMemor
57constexpr std::size_t KernelInitialPageHeapSize = 128_KiB; 57constexpr std::size_t KernelInitialPageHeapSize = 128_KiB;
58 58
59constexpr std::size_t KernelSlabHeapDataSize = 5_MiB; 59constexpr std::size_t KernelSlabHeapDataSize = 5_MiB;
60constexpr std::size_t KernelSlabHeapGapsSize = 2_MiB - 64_KiB; 60constexpr std::size_t KernelSlabHeapGapsSizeMax = 2_MiB - 64_KiB;
61constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSize; 61constexpr std::size_t KernelSlabHeapSize = KernelSlabHeapDataSize + KernelSlabHeapGapsSizeMax;
62 62
63// NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860. 63// NOTE: This is calculated from KThread slab counts, assuming KThread size <= 0x860.
64constexpr std::size_t KernelSlabHeapAdditionalSize = 416_KiB; 64constexpr std::size_t KernelSlabHeapAdditionalSize = 0x68000;
65 65
66constexpr std::size_t KernelResourceSize = 66constexpr std::size_t KernelResourceSize =
67 KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize; 67 KernelPageTableHeapSize + KernelInitialPageHeapSize + KernelSlabHeapSize;
@@ -173,6 +173,10 @@ public:
173 return Dereference(FindVirtualLinear(address)); 173 return Dereference(FindVirtualLinear(address));
174 } 174 }
175 175
176 const KMemoryRegion& GetPhysicalLinearRegion(PAddr address) const {
177 return Dereference(FindPhysicalLinear(address));
178 }
179
176 const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const { 180 const KMemoryRegion* GetPhysicalKernelTraceBufferRegion() const {
177 return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer); 181 return GetPhysicalMemoryRegionTree().FindFirstDerived(KMemoryRegionType_KernelTraceBuffer);
178 } 182 }
diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp
index 1b44541b1..a2f18f643 100644
--- a/src/core/hle/kernel/k_memory_manager.cpp
+++ b/src/core/hle/kernel/k_memory_manager.cpp
@@ -10,189 +10,412 @@
10#include "common/scope_exit.h" 10#include "common/scope_exit.h"
11#include "core/core.h" 11#include "core/core.h"
12#include "core/device_memory.h" 12#include "core/device_memory.h"
13#include "core/hle/kernel/initial_process.h"
13#include "core/hle/kernel/k_memory_manager.h" 14#include "core/hle/kernel/k_memory_manager.h"
14#include "core/hle/kernel/k_page_linked_list.h" 15#include "core/hle/kernel/k_page_linked_list.h"
16#include "core/hle/kernel/kernel.h"
15#include "core/hle/kernel/svc_results.h" 17#include "core/hle/kernel/svc_results.h"
18#include "core/memory.h"
16 19
17namespace Kernel { 20namespace Kernel {
18 21
19KMemoryManager::KMemoryManager(Core::System& system_) : system{system_} {} 22namespace {
23
24constexpr KMemoryManager::Pool GetPoolFromMemoryRegionType(u32 type) {
25 if ((type | KMemoryRegionType_DramApplicationPool) == type) {
26 return KMemoryManager::Pool::Application;
27 } else if ((type | KMemoryRegionType_DramAppletPool) == type) {
28 return KMemoryManager::Pool::Applet;
29 } else if ((type | KMemoryRegionType_DramSystemPool) == type) {
30 return KMemoryManager::Pool::System;
31 } else if ((type | KMemoryRegionType_DramSystemNonSecurePool) == type) {
32 return KMemoryManager::Pool::SystemNonSecure;
33 } else {
34 UNREACHABLE_MSG("InvalidMemoryRegionType for conversion to Pool");
35 return {};
36 }
37}
20 38
21std::size_t KMemoryManager::Impl::Initialize(Pool new_pool, u64 start_address, u64 end_address) { 39} // namespace
22 const auto size{end_address - start_address}; 40
41KMemoryManager::KMemoryManager(Core::System& system_)
42 : system{system_}, pool_locks{
43 KLightLock{system_.Kernel()},
44 KLightLock{system_.Kernel()},
45 KLightLock{system_.Kernel()},
46 KLightLock{system_.Kernel()},
47 } {}
48
49void KMemoryManager::Initialize(VAddr management_region, size_t management_region_size) {
50
51 // Clear the management region to zero.
52 const VAddr management_region_end = management_region + management_region_size;
53
54 // Reset our manager count.
55 num_managers = 0;
56
57 // Traverse the virtual memory layout tree, initializing each manager as appropriate.
58 while (num_managers != MaxManagerCount) {
59 // Locate the region that should initialize the current manager.
60 PAddr region_address = 0;
61 size_t region_size = 0;
62 Pool region_pool = Pool::Count;
63 for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
64 // We only care about regions that we need to create managers for.
65 if (!it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
66 continue;
67 }
23 68
24 // Calculate metadata sizes 69 // We want to initialize the managers in order.
25 const auto ref_count_size{(size / PageSize) * sizeof(u16)}; 70 if (it.GetAttributes() != num_managers) {
26 const auto optimize_map_size{(Common::AlignUp((size / PageSize), 64) / 64) * sizeof(u64)}; 71 continue;
27 const auto manager_size{Common::AlignUp(optimize_map_size + ref_count_size, PageSize)}; 72 }
28 const auto page_heap_size{KPageHeap::CalculateManagementOverheadSize(size)};
29 const auto total_metadata_size{manager_size + page_heap_size};
30 ASSERT(manager_size <= total_metadata_size);
31 ASSERT(Common::IsAligned(total_metadata_size, PageSize));
32 73
33 // Setup region 74 const PAddr cur_start = it.GetAddress();
34 pool = new_pool; 75 const PAddr cur_end = it.GetEndAddress();
76
77 // Validate the region.
78 ASSERT(cur_end != 0);
79 ASSERT(cur_start != 0);
80 ASSERT(it.GetSize() > 0);
81
82 // Update the region's extents.
83 if (region_address == 0) {
84 region_address = cur_start;
85 region_size = it.GetSize();
86 region_pool = GetPoolFromMemoryRegionType(it.GetType());
87 } else {
88 ASSERT(cur_start == region_address + region_size);
89
90 // Update the size.
91 region_size = cur_end - region_address;
92 ASSERT(GetPoolFromMemoryRegionType(it.GetType()) == region_pool);
93 }
94 }
95
96 // If we didn't find a region, we're done.
97 if (region_size == 0) {
98 break;
99 }
35 100
36 // Initialize the manager's KPageHeap 101 // Initialize a new manager for the region.
37 heap.Initialize(start_address, size, page_heap_size); 102 Impl* manager = std::addressof(managers[num_managers++]);
103 ASSERT(num_managers <= managers.size());
104
105 const size_t cur_size = manager->Initialize(region_address, region_size, management_region,
106 management_region_end, region_pool);
107 management_region += cur_size;
108 ASSERT(management_region <= management_region_end);
109
110 // Insert the manager into the pool list.
111 const auto region_pool_index = static_cast<u32>(region_pool);
112 if (pool_managers_tail[region_pool_index] == nullptr) {
113 pool_managers_head[region_pool_index] = manager;
114 } else {
115 pool_managers_tail[region_pool_index]->SetNext(manager);
116 manager->SetPrev(pool_managers_tail[region_pool_index]);
117 }
118 pool_managers_tail[region_pool_index] = manager;
119 }
38 120
39 // Free the memory to the heap 121 // Free each region to its corresponding heap.
40 heap.Free(start_address, size / PageSize); 122 size_t reserved_sizes[MaxManagerCount] = {};
123 const PAddr ini_start = GetInitialProcessBinaryPhysicalAddress();
124 const PAddr ini_end = ini_start + InitialProcessBinarySizeMax;
125 const PAddr ini_last = ini_end - 1;
126 for (const auto& it : system.Kernel().MemoryLayout().GetPhysicalMemoryRegionTree()) {
127 if (it.IsDerivedFrom(KMemoryRegionType_DramUserPool)) {
128 // Get the manager for the region.
129 auto index = it.GetAttributes();
130 auto& manager = managers[index];
131
132 const PAddr cur_start = it.GetAddress();
133 const PAddr cur_last = it.GetLastAddress();
134 const PAddr cur_end = it.GetEndAddress();
135
136 if (cur_start <= ini_start && ini_last <= cur_last) {
137 // Free memory before the ini to the heap.
138 if (cur_start != ini_start) {
139 manager.Free(cur_start, (ini_start - cur_start) / PageSize);
140 }
41 141
42 // Update the heap's used size 142 // Open/reserve the ini memory.
43 heap.UpdateUsedSize(); 143 manager.OpenFirst(ini_start, InitialProcessBinarySizeMax / PageSize);
144 reserved_sizes[it.GetAttributes()] += InitialProcessBinarySizeMax;
44 145
45 return total_metadata_size; 146 // Free memory after the ini to the heap.
46} 147 if (ini_last != cur_last) {
148 ASSERT(cur_end != 0);
149 manager.Free(ini_end, cur_end - ini_end);
150 }
151 } else {
152 // Ensure there's no partial overlap with the ini image.
153 if (cur_start <= ini_last) {
154 ASSERT(cur_last < ini_start);
155 } else {
156 // Otherwise, check the region for general validity.
157 ASSERT(cur_end != 0);
158 }
47 159
48void KMemoryManager::InitializeManager(Pool pool, u64 start_address, u64 end_address) { 160 // Free the memory to the heap.
49 ASSERT(pool < Pool::Count); 161 manager.Free(cur_start, it.GetSize() / PageSize);
50 managers[static_cast<std::size_t>(pool)].Initialize(pool, start_address, end_address); 162 }
163 }
164 }
165
166 // Update the used size for all managers.
167 for (size_t i = 0; i < num_managers; ++i) {
168 managers[i].SetInitialUsedHeapSize(reserved_sizes[i]);
169 }
51} 170}
52 171
53VAddr KMemoryManager::AllocateAndOpenContinuous(std::size_t num_pages, std::size_t align_pages, 172PAddr KMemoryManager::AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option) {
54 u32 option) { 173 // Early return if we're allocating no pages.
55 // Early return if we're allocating no pages
56 if (num_pages == 0) { 174 if (num_pages == 0) {
57 return {}; 175 return 0;
58 } 176 }
59 177
60 // Lock the pool that we're allocating from 178 // Lock the pool that we're allocating from.
61 const auto [pool, dir] = DecodeOption(option); 179 const auto [pool, dir] = DecodeOption(option);
62 const auto pool_index{static_cast<std::size_t>(pool)}; 180 KScopedLightLock lk(pool_locks[static_cast<std::size_t>(pool)]);
63 std::lock_guard lock{pool_locks[pool_index]}; 181
64 182 // Choose a heap based on our page size request.
65 // Choose a heap based on our page size request 183 const s32 heap_index = KPageHeap::GetAlignedBlockIndex(num_pages, align_pages);
66 const s32 heap_index{KPageHeap::GetAlignedBlockIndex(num_pages, align_pages)}; 184
67 185 // Loop, trying to iterate from each block.
68 // Loop, trying to iterate from each block 186 Impl* chosen_manager = nullptr;
69 // TODO (bunnei): Support multiple managers 187 PAddr allocated_block = 0;
70 Impl& chosen_manager{managers[pool_index]}; 188 for (chosen_manager = this->GetFirstManager(pool, dir); chosen_manager != nullptr;
71 VAddr allocated_block{chosen_manager.AllocateBlock(heap_index, false)}; 189 chosen_manager = this->GetNextManager(chosen_manager, dir)) {
190 allocated_block = chosen_manager->AllocateBlock(heap_index, true);
191 if (allocated_block != 0) {
192 break;
193 }
194 }
72 195
73 // If we failed to allocate, quit now 196 // If we failed to allocate, quit now.
74 if (!allocated_block) { 197 if (allocated_block == 0) {
75 return {}; 198 return 0;
76 } 199 }
77 200
78 // If we allocated more than we need, free some 201 // If we allocated more than we need, free some.
79 const auto allocated_pages{KPageHeap::GetBlockNumPages(heap_index)}; 202 const size_t allocated_pages = KPageHeap::GetBlockNumPages(heap_index);
80 if (allocated_pages > num_pages) { 203 if (allocated_pages > num_pages) {
81 chosen_manager.Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages); 204 chosen_manager->Free(allocated_block + num_pages * PageSize, allocated_pages - num_pages);
82 } 205 }
83 206
207 // Open the first reference to the pages.
208 chosen_manager->OpenFirst(allocated_block, num_pages);
209
84 return allocated_block; 210 return allocated_block;
85} 211}
86 212
87ResultCode KMemoryManager::Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, 213ResultCode KMemoryManager::AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
88 Direction dir, u32 heap_fill_value) { 214 Direction dir, bool random) {
89 ASSERT(page_list.GetNumPages() == 0); 215 // Choose a heap based on our page size request.
216 const s32 heap_index = KPageHeap::GetBlockIndex(num_pages);
217 R_UNLESS(0 <= heap_index, ResultOutOfMemory);
218
219 // Ensure that we don't leave anything un-freed.
220 auto group_guard = SCOPE_GUARD({
221 for (const auto& it : out->Nodes()) {
222 auto& manager = this->GetManager(system.Kernel().MemoryLayout(), it.GetAddress());
223 const size_t num_pages_to_free =
224 std::min(it.GetNumPages(), (manager.GetEndAddress() - it.GetAddress()) / PageSize);
225 manager.Free(it.GetAddress(), num_pages_to_free);
226 }
227 });
90 228
91 // Early return if we're allocating no pages 229 // Keep allocating until we've allocated all our pages.
92 if (num_pages == 0) { 230 for (s32 index = heap_index; index >= 0 && num_pages > 0; index--) {
93 return ResultSuccess; 231 const size_t pages_per_alloc = KPageHeap::GetBlockNumPages(index);
94 } 232 for (Impl* cur_manager = this->GetFirstManager(pool, dir); cur_manager != nullptr;
233 cur_manager = this->GetNextManager(cur_manager, dir)) {
234 while (num_pages >= pages_per_alloc) {
235 // Allocate a block.
236 PAddr allocated_block = cur_manager->AllocateBlock(index, random);
237 if (allocated_block == 0) {
238 break;
239 }
95 240
96 // Lock the pool that we're allocating from 241 // Safely add it to our group.
97 const auto pool_index{static_cast<std::size_t>(pool)}; 242 {
98 std::lock_guard lock{pool_locks[pool_index]}; 243 auto block_guard =
244 SCOPE_GUARD({ cur_manager->Free(allocated_block, pages_per_alloc); });
245 R_TRY(out->AddBlock(allocated_block, pages_per_alloc));
246 block_guard.Cancel();
247 }
99 248
100 // Choose a heap based on our page size request 249 num_pages -= pages_per_alloc;
101 const s32 heap_index{KPageHeap::GetBlockIndex(num_pages)}; 250 }
102 if (heap_index < 0) { 251 }
103 return ResultOutOfMemory;
104 } 252 }
105 253
106 // TODO (bunnei): Support multiple managers 254 // Only succeed if we allocated as many pages as we wanted.
107 Impl& chosen_manager{managers[pool_index]}; 255 R_UNLESS(num_pages == 0, ResultOutOfMemory);
108 256
109 // Ensure that we don't leave anything un-freed 257 // We succeeded!
110 auto group_guard = detail::ScopeExit([&] { 258 group_guard.Cancel();
111 for (const auto& it : page_list.Nodes()) { 259 return ResultSuccess;
112 const auto min_num_pages{std::min<size_t>( 260}
113 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
114 chosen_manager.Free(it.GetAddress(), min_num_pages);
115 }
116 });
117 261
118 // Keep allocating until we've allocated all our pages 262ResultCode KMemoryManager::AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option) {
119 for (s32 index{heap_index}; index >= 0 && num_pages > 0; index--) { 263 ASSERT(out != nullptr);
120 const auto pages_per_alloc{KPageHeap::GetBlockNumPages(index)}; 264 ASSERT(out->GetNumPages() == 0);
121 265
122 while (num_pages >= pages_per_alloc) { 266 // Early return if we're allocating no pages.
123 // Allocate a block 267 R_SUCCEED_IF(num_pages == 0);
124 VAddr allocated_block{chosen_manager.AllocateBlock(index, false)};
125 if (!allocated_block) {
126 break;
127 }
128 268
129 // Safely add it to our group 269 // Lock the pool that we're allocating from.
130 { 270 const auto [pool, dir] = DecodeOption(option);
131 auto block_guard = detail::ScopeExit( 271 KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
132 [&] { chosen_manager.Free(allocated_block, pages_per_alloc); }); 272
273 // Allocate the page group.
274 R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
275
276 // Open the first reference to the pages.
277 for (const auto& block : out->Nodes()) {
278 PAddr cur_address = block.GetAddress();
279 size_t remaining_pages = block.GetNumPages();
280 while (remaining_pages > 0) {
281 // Get the manager for the current address.
282 auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
283
284 // Process part or all of the block.
285 const size_t cur_pages =
286 std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
287 manager.OpenFirst(cur_address, cur_pages);
288
289 // Advance.
290 cur_address += cur_pages * PageSize;
291 remaining_pages -= cur_pages;
292 }
293 }
133 294
134 if (const ResultCode result{page_list.AddBlock(allocated_block, pages_per_alloc)}; 295 return ResultSuccess;
135 result.IsError()) { 296}
136 return result;
137 }
138 297
139 block_guard.Cancel(); 298ResultCode KMemoryManager::AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages,
140 } 299 u32 option, u64 process_id, u8 fill_pattern) {
300 ASSERT(out != nullptr);
301 ASSERT(out->GetNumPages() == 0);
141 302
142 num_pages -= pages_per_alloc; 303 // Decode the option.
143 } 304 const auto [pool, dir] = DecodeOption(option);
144 }
145 305
146 // Clear allocated memory. 306 // Allocate the memory.
147 for (const auto& it : page_list.Nodes()) { 307 {
148 std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value, 308 // Lock the pool that we're allocating from.
149 it.GetSize()); 309 KScopedLightLock lk(pool_locks[static_cast<size_t>(pool)]);
310
311 // Allocate the page group.
312 R_TRY(this->AllocatePageGroupImpl(out, num_pages, pool, dir, false));
313
314 // Open the first reference to the pages.
315 for (const auto& block : out->Nodes()) {
316 PAddr cur_address = block.GetAddress();
317 size_t remaining_pages = block.GetNumPages();
318 while (remaining_pages > 0) {
319 // Get the manager for the current address.
320 auto& manager = this->GetManager(system.Kernel().MemoryLayout(), cur_address);
321
322 // Process part or all of the block.
323 const size_t cur_pages =
324 std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address));
325 manager.OpenFirst(cur_address, cur_pages);
326
327 // Advance.
328 cur_address += cur_pages * PageSize;
329 remaining_pages -= cur_pages;
330 }
331 }
150 } 332 }
151 333
152 // Only succeed if we allocated as many pages as we wanted 334 // Set all the allocated memory.
153 if (num_pages) { 335 for (const auto& block : out->Nodes()) {
154 return ResultOutOfMemory; 336 std::memset(system.DeviceMemory().GetPointer(block.GetAddress()), fill_pattern,
337 block.GetSize());
155 } 338 }
156 339
157 // We succeeded!
158 group_guard.Cancel();
159
160 return ResultSuccess; 340 return ResultSuccess;
161} 341}
162 342
163ResultCode KMemoryManager::Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, 343void KMemoryManager::Open(PAddr address, size_t num_pages) {
164 Direction dir, u32 heap_fill_value) { 344 // Repeatedly open references until we've done so for all pages.
165 // Early return if we're freeing no pages 345 while (num_pages) {
166 if (!num_pages) { 346 auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
167 return ResultSuccess; 347 const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
348
349 {
350 KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
351 manager.Open(address, cur_pages);
352 }
353
354 num_pages -= cur_pages;
355 address += cur_pages * PageSize;
168 } 356 }
357}
169 358
170 // Lock the pool that we're freeing from 359void KMemoryManager::Close(PAddr address, size_t num_pages) {
171 const auto pool_index{static_cast<std::size_t>(pool)}; 360 // Repeatedly close references until we've done so for all pages.
172 std::lock_guard lock{pool_locks[pool_index]}; 361 while (num_pages) {
362 auto& manager = this->GetManager(system.Kernel().MemoryLayout(), address);
363 const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address));
173 364
174 // TODO (bunnei): Support multiple managers 365 {
175 Impl& chosen_manager{managers[pool_index]}; 366 KScopedLightLock lk(pool_locks[static_cast<size_t>(manager.GetPool())]);
367 manager.Close(address, cur_pages);
368 }
176 369
177 // Free all of the pages 370 num_pages -= cur_pages;
178 for (const auto& it : page_list.Nodes()) { 371 address += cur_pages * PageSize;
179 const auto min_num_pages{std::min<size_t>(
180 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
181 chosen_manager.Free(it.GetAddress(), min_num_pages);
182 } 372 }
373}
183 374
184 return ResultSuccess; 375void KMemoryManager::Close(const KPageLinkedList& pg) {
376 for (const auto& node : pg.Nodes()) {
377 Close(node.GetAddress(), node.GetNumPages());
378 }
379}
380void KMemoryManager::Open(const KPageLinkedList& pg) {
381 for (const auto& node : pg.Nodes()) {
382 Open(node.GetAddress(), node.GetNumPages());
383 }
384}
385
386size_t KMemoryManager::Impl::Initialize(PAddr address, size_t size, VAddr management,
387 VAddr management_end, Pool p) {
388 // Calculate management sizes.
389 const size_t ref_count_size = (size / PageSize) * sizeof(u16);
390 const size_t optimize_map_size = CalculateOptimizedProcessOverheadSize(size);
391 const size_t manager_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
392 const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(size);
393 const size_t total_management_size = manager_size + page_heap_size;
394 ASSERT(manager_size <= total_management_size);
395 ASSERT(management + total_management_size <= management_end);
396 ASSERT(Common::IsAligned(total_management_size, PageSize));
397
398 // Setup region.
399 pool = p;
400 management_region = management;
401 page_reference_counts.resize(
402 Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize() / PageSize);
403 ASSERT(Common::IsAligned(management_region, PageSize));
404
405 // Initialize the manager's KPageHeap.
406 heap.Initialize(address, size, management + manager_size, page_heap_size);
407
408 return total_management_size;
185} 409}
186 410
187std::size_t KMemoryManager::Impl::CalculateManagementOverheadSize(std::size_t region_size) { 411size_t KMemoryManager::Impl::CalculateManagementOverheadSize(size_t region_size) {
188 const std::size_t ref_count_size = (region_size / PageSize) * sizeof(u16); 412 const size_t ref_count_size = (region_size / PageSize) * sizeof(u16);
189 const std::size_t optimize_map_size = 413 const size_t optimize_map_size =
190 (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / 414 (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
191 Common::BitSize<u64>()) * 415 Common::BitSize<u64>()) *
192 sizeof(u64); 416 sizeof(u64);
193 const std::size_t manager_meta_size = 417 const size_t manager_meta_size = Common::AlignUp(optimize_map_size + ref_count_size, PageSize);
194 Common::AlignUp(optimize_map_size + ref_count_size, PageSize); 418 const size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
195 const std::size_t page_heap_size = KPageHeap::CalculateManagementOverheadSize(region_size);
196 return manager_meta_size + page_heap_size; 419 return manager_meta_size + page_heap_size;
197} 420}
198 421
diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h
index 17c7690f1..18775b262 100644
--- a/src/core/hle/kernel/k_memory_manager.h
+++ b/src/core/hle/kernel/k_memory_manager.h
@@ -5,11 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <mutex>
9#include <tuple> 8#include <tuple>
10 9
11#include "common/common_funcs.h" 10#include "common/common_funcs.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/k_light_lock.h"
13#include "core/hle/kernel/k_memory_layout.h"
13#include "core/hle/kernel/k_page_heap.h" 14#include "core/hle/kernel/k_page_heap.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -52,22 +53,33 @@ public:
52 53
53 explicit KMemoryManager(Core::System& system_); 54 explicit KMemoryManager(Core::System& system_);
54 55
55 constexpr std::size_t GetSize(Pool pool) const { 56 void Initialize(VAddr management_region, size_t management_region_size);
56 return managers[static_cast<std::size_t>(pool)].GetSize(); 57
58 constexpr size_t GetSize(Pool pool) const {
59 constexpr Direction GetSizeDirection = Direction::FromFront;
60 size_t total = 0;
61 for (auto* manager = this->GetFirstManager(pool, GetSizeDirection); manager != nullptr;
62 manager = this->GetNextManager(manager, GetSizeDirection)) {
63 total += manager->GetSize();
64 }
65 return total;
57 } 66 }
58 67
59 void InitializeManager(Pool pool, u64 start_address, u64 end_address); 68 PAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option);
69 ResultCode AllocateAndOpen(KPageLinkedList* out, size_t num_pages, u32 option);
70 ResultCode AllocateAndOpenForProcess(KPageLinkedList* out, size_t num_pages, u32 option,
71 u64 process_id, u8 fill_pattern);
72
73 static constexpr size_t MaxManagerCount = 10;
60 74
61 VAddr AllocateAndOpenContinuous(size_t num_pages, size_t align_pages, u32 option); 75 void Close(PAddr address, size_t num_pages);
62 ResultCode Allocate(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir, 76 void Close(const KPageLinkedList& pg);
63 u32 heap_fill_value = 0);
64 ResultCode Free(KPageLinkedList& page_list, std::size_t num_pages, Pool pool, Direction dir,
65 u32 heap_fill_value = 0);
66 77
67 static constexpr std::size_t MaxManagerCount = 10; 78 void Open(PAddr address, size_t num_pages);
79 void Open(const KPageLinkedList& pg);
68 80
69public: 81public:
70 static std::size_t CalculateManagementOverheadSize(std::size_t region_size) { 82 static size_t CalculateManagementOverheadSize(size_t region_size) {
71 return Impl::CalculateManagementOverheadSize(region_size); 83 return Impl::CalculateManagementOverheadSize(region_size);
72 } 84 }
73 85
@@ -100,17 +112,26 @@ private:
100 Impl() = default; 112 Impl() = default;
101 ~Impl() = default; 113 ~Impl() = default;
102 114
103 std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address); 115 size_t Initialize(PAddr address, size_t size, VAddr management, VAddr management_end,
116 Pool p);
104 117
105 VAddr AllocateBlock(s32 index, bool random) { 118 VAddr AllocateBlock(s32 index, bool random) {
106 return heap.AllocateBlock(index, random); 119 return heap.AllocateBlock(index, random);
107 } 120 }
108 121
109 void Free(VAddr addr, std::size_t num_pages) { 122 void Free(VAddr addr, size_t num_pages) {
110 heap.Free(addr, num_pages); 123 heap.Free(addr, num_pages);
111 } 124 }
112 125
113 constexpr std::size_t GetSize() const { 126 void SetInitialUsedHeapSize(size_t reserved_size) {
127 heap.SetInitialUsedSize(reserved_size);
128 }
129
130 constexpr Pool GetPool() const {
131 return pool;
132 }
133
134 constexpr size_t GetSize() const {
114 return heap.GetSize(); 135 return heap.GetSize();
115 } 136 }
116 137
@@ -122,10 +143,88 @@ private:
122 return heap.GetEndAddress(); 143 return heap.GetEndAddress();
123 } 144 }
124 145
125 static std::size_t CalculateManagementOverheadSize(std::size_t region_size); 146 constexpr size_t GetPageOffset(PAddr address) const {
147 return heap.GetPageOffset(address);
148 }
149
150 constexpr size_t GetPageOffsetToEnd(PAddr address) const {
151 return heap.GetPageOffsetToEnd(address);
152 }
153
154 constexpr void SetNext(Impl* n) {
155 next = n;
156 }
157
158 constexpr void SetPrev(Impl* n) {
159 prev = n;
160 }
161
162 constexpr Impl* GetNext() const {
163 return next;
164 }
165
166 constexpr Impl* GetPrev() const {
167 return prev;
168 }
169
170 void OpenFirst(PAddr address, size_t num_pages) {
171 size_t index = this->GetPageOffset(address);
172 const size_t end = index + num_pages;
173 while (index < end) {
174 const RefCount ref_count = (++page_reference_counts[index]);
175 ASSERT(ref_count == 1);
126 176
127 static constexpr std::size_t CalculateOptimizedProcessOverheadSize( 177 index++;
128 std::size_t region_size) { 178 }
179 }
180
181 void Open(PAddr address, size_t num_pages) {
182 size_t index = this->GetPageOffset(address);
183 const size_t end = index + num_pages;
184 while (index < end) {
185 const RefCount ref_count = (++page_reference_counts[index]);
186 ASSERT(ref_count > 1);
187
188 index++;
189 }
190 }
191
192 void Close(PAddr address, size_t num_pages) {
193 size_t index = this->GetPageOffset(address);
194 const size_t end = index + num_pages;
195
196 size_t free_start = 0;
197 size_t free_count = 0;
198 while (index < end) {
199 ASSERT(page_reference_counts[index] > 0);
200 const RefCount ref_count = (--page_reference_counts[index]);
201
202 // Keep track of how many zero refcounts we see in a row, to minimize calls to free.
203 if (ref_count == 0) {
204 if (free_count > 0) {
205 free_count++;
206 } else {
207 free_start = index;
208 free_count = 1;
209 }
210 } else {
211 if (free_count > 0) {
212 this->Free(heap.GetAddress() + free_start * PageSize, free_count);
213 free_count = 0;
214 }
215 }
216
217 index++;
218 }
219
220 if (free_count > 0) {
221 this->Free(heap.GetAddress() + free_start * PageSize, free_count);
222 }
223 }
224
225 static size_t CalculateManagementOverheadSize(size_t region_size);
226
227 static constexpr size_t CalculateOptimizedProcessOverheadSize(size_t region_size) {
129 return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / 228 return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) /
130 Common::BitSize<u64>()) * 229 Common::BitSize<u64>()) *
131 sizeof(u64); 230 sizeof(u64);
@@ -135,13 +234,45 @@ private:
135 using RefCount = u16; 234 using RefCount = u16;
136 235
137 KPageHeap heap; 236 KPageHeap heap;
237 std::vector<RefCount> page_reference_counts;
238 VAddr management_region{};
138 Pool pool{}; 239 Pool pool{};
240 Impl* next{};
241 Impl* prev{};
139 }; 242 };
140 243
141private: 244private:
245 Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) {
246 return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
247 }
248
249 const Impl& GetManager(const KMemoryLayout& memory_layout, PAddr address) const {
250 return managers[memory_layout.GetPhysicalLinearRegion(address).GetAttributes()];
251 }
252
253 constexpr Impl* GetFirstManager(Pool pool, Direction dir) const {
254 return dir == Direction::FromBack ? pool_managers_tail[static_cast<size_t>(pool)]
255 : pool_managers_head[static_cast<size_t>(pool)];
256 }
257
258 constexpr Impl* GetNextManager(Impl* cur, Direction dir) const {
259 if (dir == Direction::FromBack) {
260 return cur->GetPrev();
261 } else {
262 return cur->GetNext();
263 }
264 }
265
266 ResultCode AllocatePageGroupImpl(KPageLinkedList* out, size_t num_pages, Pool pool,
267 Direction dir, bool random);
268
269private:
142 Core::System& system; 270 Core::System& system;
143 std::array<std::mutex, static_cast<std::size_t>(Pool::Count)> pool_locks; 271 std::array<KLightLock, static_cast<size_t>(Pool::Count)> pool_locks;
272 std::array<Impl*, MaxManagerCount> pool_managers_head{};
273 std::array<Impl*, MaxManagerCount> pool_managers_tail{};
144 std::array<Impl, MaxManagerCount> managers; 274 std::array<Impl, MaxManagerCount> managers;
275 size_t num_managers{};
145}; 276};
146 277
147} // namespace Kernel 278} // namespace Kernel
diff --git a/src/core/hle/kernel/k_memory_region_type.h b/src/core/hle/kernel/k_memory_region_type.h
index a05e66677..0baeddf51 100644
--- a/src/core/hle/kernel/k_memory_region_type.h
+++ b/src/core/hle/kernel/k_memory_region_type.h
@@ -14,7 +14,8 @@
14namespace Kernel { 14namespace Kernel {
15 15
16enum KMemoryRegionType : u32 { 16enum KMemoryRegionType : u32 {
17 KMemoryRegionAttr_CarveoutProtected = 0x04000000, 17 KMemoryRegionAttr_CarveoutProtected = 0x02000000,
18 KMemoryRegionAttr_Uncached = 0x04000000,
18 KMemoryRegionAttr_DidKernelMap = 0x08000000, 19 KMemoryRegionAttr_DidKernelMap = 0x08000000,
19 KMemoryRegionAttr_ShouldKernelMap = 0x10000000, 20 KMemoryRegionAttr_ShouldKernelMap = 0x10000000,
20 KMemoryRegionAttr_UserReadOnly = 0x20000000, 21 KMemoryRegionAttr_UserReadOnly = 0x20000000,
@@ -239,6 +240,11 @@ static_assert(KMemoryRegionType_VirtualDramHeapBase.GetValue() == 0x1A);
239static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A); 240static_assert(KMemoryRegionType_VirtualDramKernelPtHeap.GetValue() == 0x2A);
240static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A); 241static_assert(KMemoryRegionType_VirtualDramKernelTraceBuffer.GetValue() == 0x4A);
241 242
243// UNUSED: .DeriveSparse(2, 2, 0);
244constexpr auto KMemoryRegionType_VirtualDramUnknownDebug =
245 KMemoryRegionType_Dram.DeriveSparse(2, 2, 1);
246static_assert(KMemoryRegionType_VirtualDramUnknownDebug.GetValue() == (0x52));
247
242constexpr auto KMemoryRegionType_VirtualDramKernelInitPt = 248constexpr auto KMemoryRegionType_VirtualDramKernelInitPt =
243 KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0); 249 KMemoryRegionType_VirtualDramHeapBase.Derive(3, 0);
244constexpr auto KMemoryRegionType_VirtualDramPoolManagement = 250constexpr auto KMemoryRegionType_VirtualDramPoolManagement =
@@ -330,6 +336,8 @@ constexpr KMemoryRegionType GetTypeForVirtualLinearMapping(u32 type_id) {
330 return KMemoryRegionType_VirtualDramKernelTraceBuffer; 336 return KMemoryRegionType_VirtualDramKernelTraceBuffer;
331 } else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) { 337 } else if (KMemoryRegionType_DramKernelPtHeap.IsAncestorOf(type_id)) {
332 return KMemoryRegionType_VirtualDramKernelPtHeap; 338 return KMemoryRegionType_VirtualDramKernelPtHeap;
339 } else if ((type_id | KMemoryRegionAttr_ShouldKernelMap) == type_id) {
340 return KMemoryRegionType_VirtualDramUnknownDebug;
333 } else { 341 } else {
334 return KMemoryRegionType_Dram; 342 return KMemoryRegionType_Dram;
335 } 343 }
diff --git a/src/core/hle/kernel/k_page_buffer.h b/src/core/hle/kernel/k_page_buffer.h
new file mode 100644
index 000000000..0a9451228
--- /dev/null
+++ b/src/core/hle/kernel/k_page_buffer.h
@@ -0,0 +1,34 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "core/core.h"
13#include "core/device_memory.h"
14#include "core/hle/kernel/memory_types.h"
15
16namespace Kernel {
17
18class KPageBuffer final : public KSlabAllocated<KPageBuffer> {
19public:
20 KPageBuffer() = default;
21
22 static KPageBuffer* FromPhysicalAddress(Core::System& system, PAddr phys_addr) {
23 ASSERT(Common::IsAligned(phys_addr, PageSize));
24 return reinterpret_cast<KPageBuffer*>(system.DeviceMemory().GetPointer(phys_addr));
25 }
26
27private:
28 [[maybe_unused]] alignas(PageSize) std::array<u8, PageSize> m_buffer{};
29};
30
31static_assert(sizeof(KPageBuffer) == PageSize);
32static_assert(alignof(KPageBuffer) == PageSize);
33
34} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_heap.cpp b/src/core/hle/kernel/k_page_heap.cpp
index 29d996d62..97a5890a0 100644
--- a/src/core/hle/kernel/k_page_heap.cpp
+++ b/src/core/hle/kernel/k_page_heap.cpp
@@ -7,35 +7,51 @@
7 7
8namespace Kernel { 8namespace Kernel {
9 9
10void KPageHeap::Initialize(VAddr address, std::size_t size, std::size_t metadata_size) { 10void KPageHeap::Initialize(PAddr address, size_t size, VAddr management_address,
11 // Check our assumptions 11 size_t management_size, const size_t* block_shifts,
12 ASSERT(Common::IsAligned((address), PageSize)); 12 size_t num_block_shifts) {
13 // Check our assumptions.
14 ASSERT(Common::IsAligned(address, PageSize));
13 ASSERT(Common::IsAligned(size, PageSize)); 15 ASSERT(Common::IsAligned(size, PageSize));
16 ASSERT(0 < num_block_shifts && num_block_shifts <= NumMemoryBlockPageShifts);
17 const VAddr management_end = management_address + management_size;
14 18
15 // Set our members 19 // Set our members.
16 heap_address = address; 20 m_heap_address = address;
17 heap_size = size; 21 m_heap_size = size;
18 22 m_num_blocks = num_block_shifts;
19 // Setup bitmaps 23
20 metadata.resize(metadata_size / sizeof(u64)); 24 // Setup bitmaps.
21 u64* cur_bitmap_storage{metadata.data()}; 25 m_management_data.resize(management_size / sizeof(u64));
22 for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { 26 u64* cur_bitmap_storage{m_management_data.data()};
23 const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; 27 for (size_t i = 0; i < num_block_shifts; i++) {
24 const std::size_t next_block_shift{ 28 const size_t cur_block_shift = block_shifts[i];
25 (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; 29 const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
26 cur_bitmap_storage = blocks[i].Initialize(heap_address, heap_size, cur_block_shift, 30 cur_bitmap_storage = m_blocks[i].Initialize(m_heap_address, m_heap_size, cur_block_shift,
27 next_block_shift, cur_bitmap_storage); 31 next_block_shift, cur_bitmap_storage);
28 } 32 }
33
34 // Ensure we didn't overextend our bounds.
35 ASSERT(VAddr(cur_bitmap_storage) <= management_end);
36}
37
38size_t KPageHeap::GetNumFreePages() const {
39 size_t num_free = 0;
40
41 for (size_t i = 0; i < m_num_blocks; i++) {
42 num_free += m_blocks[i].GetNumFreePages();
43 }
44
45 return num_free;
29} 46}
30 47
31VAddr KPageHeap::AllocateBlock(s32 index, bool random) { 48PAddr KPageHeap::AllocateBlock(s32 index, bool random) {
32 const std::size_t needed_size{blocks[index].GetSize()}; 49 const size_t needed_size = m_blocks[index].GetSize();
33 50
34 for (s32 i{index}; i < static_cast<s32>(MemoryBlockPageShifts.size()); i++) { 51 for (s32 i = index; i < static_cast<s32>(m_num_blocks); i++) {
35 if (const VAddr addr{blocks[i].PopBlock(random)}; addr) { 52 if (const PAddr addr = m_blocks[i].PopBlock(random); addr != 0) {
36 if (const std::size_t allocated_size{blocks[i].GetSize()}; 53 if (const size_t allocated_size = m_blocks[i].GetSize(); allocated_size > needed_size) {
37 allocated_size > needed_size) { 54 this->Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
38 Free(addr + needed_size, (allocated_size - needed_size) / PageSize);
39 } 55 }
40 return addr; 56 return addr;
41 } 57 }
@@ -44,34 +60,34 @@ VAddr KPageHeap::AllocateBlock(s32 index, bool random) {
44 return 0; 60 return 0;
45} 61}
46 62
47void KPageHeap::FreeBlock(VAddr block, s32 index) { 63void KPageHeap::FreeBlock(PAddr block, s32 index) {
48 do { 64 do {
49 block = blocks[index++].PushBlock(block); 65 block = m_blocks[index++].PushBlock(block);
50 } while (block != 0); 66 } while (block != 0);
51} 67}
52 68
53void KPageHeap::Free(VAddr addr, std::size_t num_pages) { 69void KPageHeap::Free(PAddr addr, size_t num_pages) {
54 // Freeing no pages is a no-op 70 // Freeing no pages is a no-op.
55 if (num_pages == 0) { 71 if (num_pages == 0) {
56 return; 72 return;
57 } 73 }
58 74
59 // Find the largest block size that we can free, and free as many as possible 75 // Find the largest block size that we can free, and free as many as possible.
60 s32 big_index{static_cast<s32>(MemoryBlockPageShifts.size()) - 1}; 76 s32 big_index = static_cast<s32>(m_num_blocks) - 1;
61 const VAddr start{addr}; 77 const PAddr start = addr;
62 const VAddr end{(num_pages * PageSize) + addr}; 78 const PAddr end = addr + num_pages * PageSize;
63 VAddr before_start{start}; 79 PAddr before_start = start;
64 VAddr before_end{start}; 80 PAddr before_end = start;
65 VAddr after_start{end}; 81 PAddr after_start = end;
66 VAddr after_end{end}; 82 PAddr after_end = end;
67 while (big_index >= 0) { 83 while (big_index >= 0) {
68 const std::size_t block_size{blocks[big_index].GetSize()}; 84 const size_t block_size = m_blocks[big_index].GetSize();
69 const VAddr big_start{Common::AlignUp((start), block_size)}; 85 const PAddr big_start = Common::AlignUp(start, block_size);
70 const VAddr big_end{Common::AlignDown((end), block_size)}; 86 const PAddr big_end = Common::AlignDown(end, block_size);
71 if (big_start < big_end) { 87 if (big_start < big_end) {
72 // Free as many big blocks as we can 88 // Free as many big blocks as we can.
73 for (auto block{big_start}; block < big_end; block += block_size) { 89 for (auto block = big_start; block < big_end; block += block_size) {
74 FreeBlock(block, big_index); 90 this->FreeBlock(block, big_index);
75 } 91 }
76 before_end = big_start; 92 before_end = big_start;
77 after_start = big_end; 93 after_start = big_end;
@@ -81,31 +97,31 @@ void KPageHeap::Free(VAddr addr, std::size_t num_pages) {
81 } 97 }
82 ASSERT(big_index >= 0); 98 ASSERT(big_index >= 0);
83 99
84 // Free space before the big blocks 100 // Free space before the big blocks.
85 for (s32 i{big_index - 1}; i >= 0; i--) { 101 for (s32 i = big_index - 1; i >= 0; i--) {
86 const std::size_t block_size{blocks[i].GetSize()}; 102 const size_t block_size = m_blocks[i].GetSize();
87 while (before_start + block_size <= before_end) { 103 while (before_start + block_size <= before_end) {
88 before_end -= block_size; 104 before_end -= block_size;
89 FreeBlock(before_end, i); 105 this->FreeBlock(before_end, i);
90 } 106 }
91 } 107 }
92 108
93 // Free space after the big blocks 109 // Free space after the big blocks.
94 for (s32 i{big_index - 1}; i >= 0; i--) { 110 for (s32 i = big_index - 1; i >= 0; i--) {
95 const std::size_t block_size{blocks[i].GetSize()}; 111 const size_t block_size = m_blocks[i].GetSize();
96 while (after_start + block_size <= after_end) { 112 while (after_start + block_size <= after_end) {
97 FreeBlock(after_start, i); 113 this->FreeBlock(after_start, i);
98 after_start += block_size; 114 after_start += block_size;
99 } 115 }
100 } 116 }
101} 117}
102 118
103std::size_t KPageHeap::CalculateManagementOverheadSize(std::size_t region_size) { 119size_t KPageHeap::CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
104 std::size_t overhead_size = 0; 120 size_t num_block_shifts) {
105 for (std::size_t i = 0; i < MemoryBlockPageShifts.size(); i++) { 121 size_t overhead_size = 0;
106 const std::size_t cur_block_shift{MemoryBlockPageShifts[i]}; 122 for (size_t i = 0; i < num_block_shifts; i++) {
107 const std::size_t next_block_shift{ 123 const size_t cur_block_shift = block_shifts[i];
108 (i != MemoryBlockPageShifts.size() - 1) ? MemoryBlockPageShifts[i + 1] : 0}; 124 const size_t next_block_shift = (i != num_block_shifts - 1) ? block_shifts[i + 1] : 0;
109 overhead_size += KPageHeap::Block::CalculateManagementOverheadSize( 125 overhead_size += KPageHeap::Block::CalculateManagementOverheadSize(
110 region_size, cur_block_shift, next_block_shift); 126 region_size, cur_block_shift, next_block_shift);
111 } 127 }
diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h
index a65aa28a0..60fff766b 100644
--- a/src/core/hle/kernel/k_page_heap.h
+++ b/src/core/hle/kernel/k_page_heap.h
@@ -23,54 +23,73 @@ public:
23 KPageHeap() = default; 23 KPageHeap() = default;
24 ~KPageHeap() = default; 24 ~KPageHeap() = default;
25 25
26 constexpr VAddr GetAddress() const { 26 constexpr PAddr GetAddress() const {
27 return heap_address; 27 return m_heap_address;
28 } 28 }
29 constexpr std::size_t GetSize() const { 29 constexpr size_t GetSize() const {
30 return heap_size; 30 return m_heap_size;
31 } 31 }
32 constexpr VAddr GetEndAddress() const { 32 constexpr PAddr GetEndAddress() const {
33 return GetAddress() + GetSize(); 33 return this->GetAddress() + this->GetSize();
34 } 34 }
35 constexpr std::size_t GetPageOffset(VAddr block) const { 35 constexpr size_t GetPageOffset(PAddr block) const {
36 return (block - GetAddress()) / PageSize; 36 return (block - this->GetAddress()) / PageSize;
37 }
38 constexpr size_t GetPageOffsetToEnd(PAddr block) const {
39 return (this->GetEndAddress() - block) / PageSize;
40 }
41
42 void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
43 size_t management_size) {
44 return this->Initialize(heap_address, heap_size, management_address, management_size,
45 MemoryBlockPageShifts.data(), NumMemoryBlockPageShifts);
46 }
47
48 size_t GetFreeSize() const {
49 return this->GetNumFreePages() * PageSize;
37 } 50 }
38 51
39 void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); 52 void SetInitialUsedSize(size_t reserved_size) {
40 VAddr AllocateBlock(s32 index, bool random); 53 // Check that the reserved size is valid.
41 void Free(VAddr addr, std::size_t num_pages); 54 const size_t free_size = this->GetNumFreePages() * PageSize;
55 ASSERT(m_heap_size >= free_size + reserved_size);
42 56
43 void UpdateUsedSize() { 57 // Set the initial used size.
44 used_size = heap_size - (GetNumFreePages() * PageSize); 58 m_initial_used_size = m_heap_size - free_size - reserved_size;
45 } 59 }
46 60
47 static std::size_t CalculateManagementOverheadSize(std::size_t region_size); 61 PAddr AllocateBlock(s32 index, bool random);
62 void Free(PAddr addr, size_t num_pages);
63
64 static size_t CalculateManagementOverheadSize(size_t region_size) {
65 return CalculateManagementOverheadSize(region_size, MemoryBlockPageShifts.data(),
66 NumMemoryBlockPageShifts);
67 }
48 68
49 static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { 69 static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) {
50 const auto target_pages{std::max(num_pages, align_pages)}; 70 const size_t target_pages = std::max(num_pages, align_pages);
51 for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { 71 for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) {
52 if (target_pages <= 72 if (target_pages <= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
53 (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) {
54 return static_cast<s32>(i); 73 return static_cast<s32>(i);
55 } 74 }
56 } 75 }
57 return -1; 76 return -1;
58 } 77 }
59 78
60 static constexpr s32 GetBlockIndex(std::size_t num_pages) { 79 static constexpr s32 GetBlockIndex(size_t num_pages) {
61 for (s32 i{static_cast<s32>(NumMemoryBlockPageShifts) - 1}; i >= 0; i--) { 80 for (s32 i = static_cast<s32>(NumMemoryBlockPageShifts) - 1; i >= 0; i--) {
62 if (num_pages >= (static_cast<std::size_t>(1) << MemoryBlockPageShifts[i]) / PageSize) { 81 if (num_pages >= (size_t(1) << MemoryBlockPageShifts[i]) / PageSize) {
63 return i; 82 return i;
64 } 83 }
65 } 84 }
66 return -1; 85 return -1;
67 } 86 }
68 87
69 static constexpr std::size_t GetBlockSize(std::size_t index) { 88 static constexpr size_t GetBlockSize(size_t index) {
70 return static_cast<std::size_t>(1) << MemoryBlockPageShifts[index]; 89 return size_t(1) << MemoryBlockPageShifts[index];
71 } 90 }
72 91
73 static constexpr std::size_t GetBlockNumPages(std::size_t index) { 92 static constexpr size_t GetBlockNumPages(size_t index) {
74 return GetBlockSize(index) / PageSize; 93 return GetBlockSize(index) / PageSize;
75 } 94 }
76 95
@@ -83,114 +102,116 @@ private:
83 Block() = default; 102 Block() = default;
84 ~Block() = default; 103 ~Block() = default;
85 104
86 constexpr std::size_t GetShift() const { 105 constexpr size_t GetShift() const {
87 return block_shift; 106 return m_block_shift;
88 } 107 }
89 constexpr std::size_t GetNextShift() const { 108 constexpr size_t GetNextShift() const {
90 return next_block_shift; 109 return m_next_block_shift;
91 } 110 }
92 constexpr std::size_t GetSize() const { 111 constexpr size_t GetSize() const {
93 return static_cast<std::size_t>(1) << GetShift(); 112 return u64(1) << this->GetShift();
94 } 113 }
95 constexpr std::size_t GetNumPages() const { 114 constexpr size_t GetNumPages() const {
96 return GetSize() / PageSize; 115 return this->GetSize() / PageSize;
97 } 116 }
98 constexpr std::size_t GetNumFreeBlocks() const { 117 constexpr size_t GetNumFreeBlocks() const {
99 return bitmap.GetNumBits(); 118 return m_bitmap.GetNumBits();
100 } 119 }
101 constexpr std::size_t GetNumFreePages() const { 120 constexpr size_t GetNumFreePages() const {
102 return GetNumFreeBlocks() * GetNumPages(); 121 return this->GetNumFreeBlocks() * this->GetNumPages();
103 } 122 }
104 123
105 u64* Initialize(VAddr addr, std::size_t size, std::size_t bs, std::size_t nbs, 124 u64* Initialize(PAddr addr, size_t size, size_t bs, size_t nbs, u64* bit_storage) {
106 u64* bit_storage) { 125 // Set shifts.
107 // Set shifts 126 m_block_shift = bs;
108 block_shift = bs; 127 m_next_block_shift = nbs;
109 next_block_shift = nbs; 128
110 129 // Align up the address.
111 // Align up the address 130 PAddr end = addr + size;
112 VAddr end{addr + size}; 131 const size_t align = (m_next_block_shift != 0) ? (u64(1) << m_next_block_shift)
113 const auto align{(next_block_shift != 0) ? (1ULL << next_block_shift) 132 : (u64(1) << m_block_shift);
114 : (1ULL << block_shift)}; 133 addr = Common::AlignDown(addr, align);
115 addr = Common::AlignDown((addr), align); 134 end = Common::AlignUp(end, align);
116 end = Common::AlignUp((end), align); 135
117 136 m_heap_address = addr;
118 heap_address = addr; 137 m_end_offset = (end - addr) / (u64(1) << m_block_shift);
119 end_offset = (end - addr) / (1ULL << block_shift); 138 return m_bitmap.Initialize(bit_storage, m_end_offset);
120 return bitmap.Initialize(bit_storage, end_offset);
121 } 139 }
122 140
123 VAddr PushBlock(VAddr address) { 141 PAddr PushBlock(PAddr address) {
124 // Set the bit for the free block 142 // Set the bit for the free block.
125 std::size_t offset{(address - heap_address) >> GetShift()}; 143 size_t offset = (address - m_heap_address) >> this->GetShift();
126 bitmap.SetBit(offset); 144 m_bitmap.SetBit(offset);
127 145
128 // If we have a next shift, try to clear the blocks below and return the address 146 // If we have a next shift, try to clear the blocks below this one and return the new
129 if (GetNextShift()) { 147 // address.
130 const auto diff{1ULL << (GetNextShift() - GetShift())}; 148 if (this->GetNextShift()) {
149 const size_t diff = u64(1) << (this->GetNextShift() - this->GetShift());
131 offset = Common::AlignDown(offset, diff); 150 offset = Common::AlignDown(offset, diff);
132 if (bitmap.ClearRange(offset, diff)) { 151 if (m_bitmap.ClearRange(offset, diff)) {
133 return heap_address + (offset << GetShift()); 152 return m_heap_address + (offset << this->GetShift());
134 } 153 }
135 } 154 }
136 155
137 // We couldn't coalesce, or we're already as big as possible 156 // We couldn't coalesce, or we're already as big as possible.
138 return 0; 157 return {};
139 } 158 }
140 159
141 VAddr PopBlock(bool random) { 160 PAddr PopBlock(bool random) {
142 // Find a free block 161 // Find a free block.
143 const s64 soffset{bitmap.FindFreeBlock(random)}; 162 s64 soffset = m_bitmap.FindFreeBlock(random);
144 if (soffset < 0) { 163 if (soffset < 0) {
145 return 0; 164 return {};
146 } 165 }
147 const auto offset{static_cast<std::size_t>(soffset)}; 166 const size_t offset = static_cast<size_t>(soffset);
148 167
149 // Update our tracking and return it 168 // Update our tracking and return it.
150 bitmap.ClearBit(offset); 169 m_bitmap.ClearBit(offset);
151 return heap_address + (offset << GetShift()); 170 return m_heap_address + (offset << this->GetShift());
152 } 171 }
153 172
154 static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size, 173 public:
155 std::size_t cur_block_shift, 174 static constexpr size_t CalculateManagementOverheadSize(size_t region_size,
156 std::size_t next_block_shift) { 175 size_t cur_block_shift,
157 const auto cur_block_size{(1ULL << cur_block_shift)}; 176 size_t next_block_shift) {
158 const auto next_block_size{(1ULL << next_block_shift)}; 177 const size_t cur_block_size = (u64(1) << cur_block_shift);
159 const auto align{(next_block_shift != 0) ? next_block_size : cur_block_size}; 178 const size_t next_block_size = (u64(1) << next_block_shift);
179 const size_t align = (next_block_shift != 0) ? next_block_size : cur_block_size;
160 return KPageBitmap::CalculateManagementOverheadSize( 180 return KPageBitmap::CalculateManagementOverheadSize(
161 (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); 181 (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size);
162 } 182 }
163 183
164 private: 184 private:
165 KPageBitmap bitmap; 185 KPageBitmap m_bitmap;
166 VAddr heap_address{}; 186 PAddr m_heap_address{};
167 uintptr_t end_offset{}; 187 uintptr_t m_end_offset{};
168 std::size_t block_shift{}; 188 size_t m_block_shift{};
169 std::size_t next_block_shift{}; 189 size_t m_next_block_shift{};
170 }; 190 };
171 191
172 constexpr std::size_t GetNumFreePages() const { 192private:
173 std::size_t num_free{}; 193 void Initialize(PAddr heap_address, size_t heap_size, VAddr management_address,
174 194 size_t management_size, const size_t* block_shifts, size_t num_block_shifts);
175 for (const auto& block : blocks) { 195 size_t GetNumFreePages() const;
176 num_free += block.GetNumFreePages();
177 }
178
179 return num_free;
180 }
181 196
182 void FreeBlock(VAddr block, s32 index); 197 void FreeBlock(PAddr block, s32 index);
183 198
184 static constexpr std::size_t NumMemoryBlockPageShifts{7}; 199 static constexpr size_t NumMemoryBlockPageShifts{7};
185 static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ 200 static constexpr std::array<size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{
186 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, 201 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E,
187 }; 202 };
188 203
189 VAddr heap_address{}; 204private:
190 std::size_t heap_size{}; 205 static size_t CalculateManagementOverheadSize(size_t region_size, const size_t* block_shifts,
191 std::size_t used_size{}; 206 size_t num_block_shifts);
192 std::array<Block, NumMemoryBlockPageShifts> blocks{}; 207
193 std::vector<u64> metadata; 208private:
209 PAddr m_heap_address{};
210 size_t m_heap_size{};
211 size_t m_initial_used_size{};
212 size_t m_num_blocks{};
213 std::array<Block, NumMemoryBlockPageShifts> m_blocks{};
214 std::vector<u64> m_management_data;
194}; 215};
195 216
196} // namespace Kernel 217} // namespace Kernel
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index 88aa2a152..02d93b12e 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -273,87 +273,219 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
273 R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free, 273 R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
274 KMemoryPermission::None, KMemoryPermission::None, 274 KMemoryPermission::None, KMemoryPermission::None,
275 KMemoryAttribute::None, KMemoryAttribute::None)); 275 KMemoryAttribute::None, KMemoryAttribute::None));
276 KPageLinkedList pg;
277 R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
278 &pg, num_pages,
279 KMemoryManager::EncodeOption(KMemoryManager::Pool::Application, allocation_option)));
276 280
277 KPageLinkedList page_linked_list; 281 R_TRY(Operate(addr, num_pages, pg, OperationType::MapGroup));
278 R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
279 allocation_option));
280 R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
281 282
282 block_manager->Update(addr, num_pages, state, perm); 283 block_manager->Update(addr, num_pages, state, perm);
283 284
284 return ResultSuccess; 285 return ResultSuccess;
285} 286}
286 287
287ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { 288ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
289 // Validate the mapping request.
290 R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
291 ResultInvalidMemoryRegion);
292
293 // Lock the table.
288 KScopedLightLock lk(general_lock); 294 KScopedLightLock lk(general_lock);
289 295
290 const std::size_t num_pages{size / PageSize}; 296 // Verify that the source memory is normal heap.
297 KMemoryState src_state{};
298 KMemoryPermission src_perm{};
299 std::size_t num_src_allocator_blocks{};
300 R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
301 src_address, size, KMemoryState::All, KMemoryState::Normal,
302 KMemoryPermission::All, KMemoryPermission::UserReadWrite,
303 KMemoryAttribute::All, KMemoryAttribute::None));
291 304
292 KMemoryState state{}; 305 // Verify that the destination memory is unmapped.
293 KMemoryPermission perm{}; 306 std::size_t num_dst_allocator_blocks{};
294 CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size, 307 R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
295 KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All, 308 KMemoryState::Free, KMemoryPermission::None,
296 KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, 309 KMemoryPermission::None, KMemoryAttribute::None,
297 KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); 310 KMemoryAttribute::None));
298 311
299 if (IsRegionMapped(dst_addr, size)) { 312 // Map the code memory.
300 return ResultInvalidCurrentMemory; 313 {
301 } 314 // Determine the number of pages being operated on.
315 const std::size_t num_pages = size / PageSize;
302 316
303 KPageLinkedList page_linked_list; 317 // Create page groups for the memory being mapped.
304 AddRegionToPages(src_addr, num_pages, page_linked_list); 318 KPageLinkedList pg;
319 AddRegionToPages(src_address, num_pages, pg);
305 320
306 { 321 // Reprotect the source as kernel-read/not mapped.
307 auto block_guard = detail::ScopeExit( 322 const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
308 [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); }); 323 KMemoryPermission::NotMapped);
324 R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
309 325
310 CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None, 326 // Ensure that we unprotect the source pages on failure.
311 OperationType::ChangePermissions)); 327 auto unprot_guard = SCOPE_GUARD({
312 CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None)); 328 ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
329 .IsSuccess());
330 });
313 331
314 block_guard.Cancel(); 332 // Map the alias pages.
315 } 333 R_TRY(MapPages(dst_address, pg, new_perm));
316 334
317 block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None, 335 // We successfully mapped the alias pages, so we don't need to unprotect the src pages on
318 KMemoryAttribute::Locked); 336 // failure.
319 block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode); 337 unprot_guard.Cancel();
338
339 // Apply the memory block updates.
340 block_manager->Update(src_address, num_pages, src_state, new_perm,
341 KMemoryAttribute::Locked);
342 block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
343 KMemoryAttribute::None);
344 }
320 345
321 return ResultSuccess; 346 return ResultSuccess;
322} 347}
323 348
324ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { 349ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
350 // Validate the mapping request.
351 R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
352 ResultInvalidMemoryRegion);
353
354 // Lock the table.
325 KScopedLightLock lk(general_lock); 355 KScopedLightLock lk(general_lock);
326 356
327 if (!size) { 357 // Verify that the source memory is locked normal heap.
328 return ResultSuccess; 358 std::size_t num_src_allocator_blocks{};
359 R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
360 KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
361 KMemoryPermission::None, KMemoryAttribute::All,
362 KMemoryAttribute::Locked));
363
364 // Verify that the destination memory is aliasable code.
365 std::size_t num_dst_allocator_blocks{};
366 R_TRY(this->CheckMemoryStateContiguous(
367 std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
368 KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
369 KMemoryAttribute::All, KMemoryAttribute::None));
370
371 // Determine whether any pages being unmapped are code.
372 bool any_code_pages = false;
373 {
374 KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
375 while (true) {
376 // Get the memory info.
377 const KMemoryInfo info = it->GetMemoryInfo();
378
379 // Check if the memory has code flag.
380 if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
381 any_code_pages = true;
382 break;
383 }
384
385 // Check if we're done.
386 if (dst_address + size - 1 <= info.GetLastAddress()) {
387 break;
388 }
389
390 // Advance.
391 ++it;
392 }
329 } 393 }
330 394
331 const std::size_t num_pages{size / PageSize}; 395 // Ensure that we maintain the instruction cache.
396 bool reprotected_pages = false;
397 SCOPE_EXIT({
398 if (reprotected_pages && any_code_pages) {
399 system.InvalidateCpuInstructionCacheRange(dst_address, size);
400 }
401 });
332 402
333 CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size, 403 // Unmap.
334 KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None, 404 {
335 KMemoryPermission::None, KMemoryAttribute::Mask, 405 // Determine the number of pages being operated on.
336 KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); 406 const std::size_t num_pages = size / PageSize;
337 407
338 KMemoryState state{}; 408 // Unmap the aliased copy of the pages.
339 CASCADE_CODE(CheckMemoryState( 409 R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
340 &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
341 KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
342 KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
343 CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
344 KMemoryPermission::None, KMemoryAttribute::Mask,
345 KMemoryAttribute::None));
346 CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
347 410
348 block_manager->Update(dst_addr, num_pages, KMemoryState::Free); 411 // Try to set the permissions for the source pages back to what they should be.
349 block_manager->Update(src_addr, num_pages, KMemoryState::Normal, 412 R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
350 KMemoryPermission::UserReadWrite); 413 OperationType::ChangePermissions));
351 414
352 system.InvalidateCpuInstructionCacheRange(dst_addr, size); 415 // Apply the memory block updates.
416 block_manager->Update(dst_address, num_pages, KMemoryState::None);
417 block_manager->Update(src_address, num_pages, KMemoryState::Normal,
418 KMemoryPermission::UserReadWrite);
419
420 // Note that we reprotected pages.
421 reprotected_pages = true;
422 }
353 423
354 return ResultSuccess; 424 return ResultSuccess;
355} 425}
356 426
427VAddr KPageTable::FindFreeArea(VAddr region_start, std::size_t region_num_pages,
428 std::size_t num_pages, std::size_t alignment, std::size_t offset,
429 std::size_t guard_pages) {
430 VAddr address = 0;
431
432 if (num_pages <= region_num_pages) {
433 if (this->IsAslrEnabled()) {
434 // Try to directly find a free area up to 8 times.
435 for (std::size_t i = 0; i < 8; i++) {
436 const std::size_t random_offset =
437 KSystemControl::GenerateRandomRange(
438 0, (region_num_pages - num_pages - guard_pages) * PageSize / alignment) *
439 alignment;
440 const VAddr candidate =
441 Common::AlignDown((region_start + random_offset), alignment) + offset;
442
443 KMemoryInfo info = this->QueryInfoImpl(candidate);
444
445 if (info.state != KMemoryState::Free) {
446 continue;
447 }
448 if (region_start > candidate) {
449 continue;
450 }
451 if (info.GetAddress() + guard_pages * PageSize > candidate) {
452 continue;
453 }
454
455 const VAddr candidate_end = candidate + (num_pages + guard_pages) * PageSize - 1;
456 if (candidate_end > info.GetLastAddress()) {
457 continue;
458 }
459 if (candidate_end > region_start + region_num_pages * PageSize - 1) {
460 continue;
461 }
462
463 address = candidate;
464 break;
465 }
466 // Fall back to finding the first free area with a random offset.
467 if (address == 0) {
468 // NOTE: Nintendo does not account for guard pages here.
469 // This may theoretically cause an offset to be chosen that cannot be mapped. We
470 // will account for guard pages.
471 const std::size_t offset_pages = KSystemControl::GenerateRandomRange(
472 0, region_num_pages - num_pages - guard_pages);
473 address = block_manager->FindFreeArea(region_start + offset_pages * PageSize,
474 region_num_pages - offset_pages, num_pages,
475 alignment, offset, guard_pages);
476 }
477 }
478
479 // Find the first free area.
480 if (address == 0) {
481 address = block_manager->FindFreeArea(region_start, region_num_pages, num_pages,
482 alignment, offset, guard_pages);
483 }
484 }
485
486 return address;
487}
488
357ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, 489ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
358 KPageTable& src_page_table, VAddr src_addr) { 490 KPageTable& src_page_table, VAddr src_addr) {
359 KScopedLightLock lk(general_lock); 491 KScopedLightLock lk(general_lock);
@@ -443,9 +575,10 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
443 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); 575 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
444 576
445 // Allocate pages for the new memory. 577 // Allocate pages for the new memory.
446 KPageLinkedList page_linked_list; 578 KPageLinkedList pg;
447 R_TRY(system.Kernel().MemoryManager().Allocate( 579 R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
448 page_linked_list, (size - mapped_size) / PageSize, memory_pool, allocation_option)); 580 &pg, (size - mapped_size) / PageSize,
581 KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
449 582
450 // Map the memory. 583 // Map the memory.
451 { 584 {
@@ -547,7 +680,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
547 }); 680 });
548 681
549 // Iterate over the memory. 682 // Iterate over the memory.
550 auto pg_it = page_linked_list.Nodes().begin(); 683 auto pg_it = pg.Nodes().begin();
551 PAddr pg_phys_addr = pg_it->GetAddress(); 684 PAddr pg_phys_addr = pg_it->GetAddress();
552 size_t pg_pages = pg_it->GetNumPages(); 685 size_t pg_pages = pg_it->GetNumPages();
553 686
@@ -571,7 +704,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr address, std::size_t size) {
571 // Check if we're at the end of the physical block. 704 // Check if we're at the end of the physical block.
572 if (pg_pages == 0) { 705 if (pg_pages == 0) {
573 // Ensure there are more pages to map. 706 // Ensure there are more pages to map.
574 ASSERT(pg_it != page_linked_list.Nodes().end()); 707 ASSERT(pg_it != pg.Nodes().end());
575 708
576 // Advance our physical block. 709 // Advance our physical block.
577 ++pg_it; 710 ++pg_it;
@@ -841,10 +974,14 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr address, std::size_t size) {
841 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); 974 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
842 975
843 // Update memory blocks. 976 // Update memory blocks.
844 system.Kernel().MemoryManager().Free(pg, size / PageSize, memory_pool, allocation_option);
845 block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None, 977 block_manager->Update(address, size / PageSize, KMemoryState::Free, KMemoryPermission::None,
846 KMemoryAttribute::None); 978 KMemoryAttribute::None);
847 979
980 // TODO(bunnei): This is a workaround until the next set of changes, where we add reference
981 // counting for mapped pages. Until then, we must manually close the reference to the page
982 // group.
983 system.Kernel().MemoryManager().Close(pg);
984
848 // We succeeded. 985 // We succeeded.
849 remap_guard.Cancel(); 986 remap_guard.Cancel();
850 987
@@ -980,6 +1117,46 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list
980 return ResultSuccess; 1117 return ResultSuccess;
981} 1118}
982 1119
1120ResultCode KPageTable::MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
1121 PAddr phys_addr, bool is_pa_valid, VAddr region_start,
1122 std::size_t region_num_pages, KMemoryState state,
1123 KMemoryPermission perm) {
1124 ASSERT(Common::IsAligned(alignment, PageSize) && alignment >= PageSize);
1125
1126 // Ensure this is a valid map request.
1127 R_UNLESS(this->CanContain(region_start, region_num_pages * PageSize, state),
1128 ResultInvalidCurrentMemory);
1129 R_UNLESS(num_pages < region_num_pages, ResultOutOfMemory);
1130
1131 // Lock the table.
1132 KScopedLightLock lk(general_lock);
1133
1134 // Find a random address to map at.
1135 VAddr addr = this->FindFreeArea(region_start, region_num_pages, num_pages, alignment, 0,
1136 this->GetNumGuardPages());
1137 R_UNLESS(addr != 0, ResultOutOfMemory);
1138 ASSERT(Common::IsAligned(addr, alignment));
1139 ASSERT(this->CanContain(addr, num_pages * PageSize, state));
1140 ASSERT(this->CheckMemoryState(addr, num_pages * PageSize, KMemoryState::All, KMemoryState::Free,
1141 KMemoryPermission::None, KMemoryPermission::None,
1142 KMemoryAttribute::None, KMemoryAttribute::None)
1143 .IsSuccess());
1144
1145 // Perform mapping operation.
1146 if (is_pa_valid) {
1147 R_TRY(this->Operate(addr, num_pages, perm, OperationType::Map, phys_addr));
1148 } else {
1149 UNIMPLEMENTED();
1150 }
1151
1152 // Update the blocks.
1153 block_manager->Update(addr, num_pages, state, perm);
1154
1155 // We successfully mapped the pages.
1156 *out_addr = addr;
1157 return ResultSuccess;
1158}
1159
983ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) { 1160ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) {
984 ASSERT(this->IsLockedByCurrentThread()); 1161 ASSERT(this->IsLockedByCurrentThread());
985 1162
@@ -1022,6 +1199,30 @@ ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list,
1022 return ResultSuccess; 1199 return ResultSuccess;
1023} 1200}
1024 1201
1202ResultCode KPageTable::UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state) {
1203 // Check that the unmap is in range.
1204 const std::size_t size = num_pages * PageSize;
1205 R_UNLESS(this->Contains(address, size), ResultInvalidCurrentMemory);
1206
1207 // Lock the table.
1208 KScopedLightLock lk(general_lock);
1209
1210 // Check the memory state.
1211 std::size_t num_allocator_blocks{};
1212 R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), address, size,
1213 KMemoryState::All, state, KMemoryPermission::None,
1214 KMemoryPermission::None, KMemoryAttribute::All,
1215 KMemoryAttribute::None));
1216
1217 // Perform the unmap.
1218 R_TRY(Operate(address, num_pages, KMemoryPermission::None, OperationType::Unmap));
1219
1220 // Update the blocks.
1221 block_manager->Update(address, num_pages, KMemoryState::Free, KMemoryPermission::None);
1222
1223 return ResultSuccess;
1224}
1225
1025ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, 1226ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
1026 Svc::MemoryPermission svc_perm) { 1227 Svc::MemoryPermission svc_perm) {
1027 const size_t num_pages = size / PageSize; 1228 const size_t num_pages = size / PageSize;
@@ -1270,9 +1471,16 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
1270 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); 1471 R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached);
1271 1472
1272 // Allocate pages for the heap extension. 1473 // Allocate pages for the heap extension.
1273 KPageLinkedList page_linked_list; 1474 KPageLinkedList pg;
1274 R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize, 1475 R_TRY(system.Kernel().MemoryManager().AllocateAndOpen(
1275 memory_pool, allocation_option)); 1476 &pg, allocation_size / PageSize,
1477 KMemoryManager::EncodeOption(memory_pool, allocation_option)));
1478
1479 // Clear all the newly allocated pages.
1480 for (const auto& it : pg.Nodes()) {
1481 std::memset(system.DeviceMemory().GetPointer(it.GetAddress()), heap_fill_value,
1482 it.GetSize());
1483 }
1276 1484
1277 // Map the pages. 1485 // Map the pages.
1278 { 1486 {
@@ -1291,7 +1499,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
1291 1499
1292 // Map the pages. 1500 // Map the pages.
1293 const auto num_pages = allocation_size / PageSize; 1501 const auto num_pages = allocation_size / PageSize;
1294 R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup)); 1502 R_TRY(Operate(current_heap_end, num_pages, pg, OperationType::MapGroup));
1295 1503
1296 // Clear all the newly allocated pages. 1504 // Clear all the newly allocated pages.
1297 for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) { 1505 for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) {
@@ -1339,8 +1547,9 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
1339 R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); 1547 R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
1340 } else { 1548 } else {
1341 KPageLinkedList page_group; 1549 KPageLinkedList page_group;
1342 R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool, 1550 R_TRY(system.Kernel().MemoryManager().AllocateAndOpenForProcess(
1343 allocation_option)); 1551 &page_group, needed_num_pages,
1552 KMemoryManager::EncodeOption(memory_pool, allocation_option), 0, 0));
1344 R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); 1553 R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
1345 } 1554 }
1346 1555
@@ -1547,7 +1756,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermiss
1547 return ResultSuccess; 1756 return ResultSuccess;
1548} 1757}
1549 1758
1550constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const { 1759VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
1551 switch (state) { 1760 switch (state) {
1552 case KMemoryState::Free: 1761 case KMemoryState::Free:
1553 case KMemoryState::Kernel: 1762 case KMemoryState::Kernel:
@@ -1583,7 +1792,7 @@ constexpr VAddr KPageTable::GetRegionAddress(KMemoryState state) const {
1583 } 1792 }
1584} 1793}
1585 1794
1586constexpr std::size_t KPageTable::GetRegionSize(KMemoryState state) const { 1795std::size_t KPageTable::GetRegionSize(KMemoryState state) const {
1587 switch (state) { 1796 switch (state) {
1588 case KMemoryState::Free: 1797 case KMemoryState::Free:
1589 case KMemoryState::Kernel: 1798 case KMemoryState::Kernel:
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index c98887d34..54c6adf8d 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -36,8 +36,8 @@ public:
36 KMemoryManager::Pool pool); 36 KMemoryManager::Pool pool);
37 ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state, 37 ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
38 KMemoryPermission perm); 38 KMemoryPermission perm);
39 ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); 39 ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
40 ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); 40 ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
41 ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, 41 ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
42 VAddr src_addr); 42 VAddr src_addr);
43 ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); 43 ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
@@ -46,7 +46,14 @@ public:
46 ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); 46 ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
47 ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, 47 ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
48 KMemoryPermission perm); 48 KMemoryPermission perm);
49 ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
50 PAddr phys_addr, KMemoryState state, KMemoryPermission perm) {
51 return this->MapPages(out_addr, num_pages, alignment, phys_addr, true,
52 this->GetRegionAddress(state), this->GetRegionSize(state) / PageSize,
53 state, perm);
54 }
49 ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state); 55 ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state);
56 ResultCode UnmapPages(VAddr address, std::size_t num_pages, KMemoryState state);
50 ResultCode SetProcessMemoryPermission(VAddr addr, std::size_t size, 57 ResultCode SetProcessMemoryPermission(VAddr addr, std::size_t size,
51 Svc::MemoryPermission svc_perm); 58 Svc::MemoryPermission svc_perm);
52 KMemoryInfo QueryInfo(VAddr addr); 59 KMemoryInfo QueryInfo(VAddr addr);
@@ -91,6 +98,9 @@ private:
91 ResultCode InitializeMemoryLayout(VAddr start, VAddr end); 98 ResultCode InitializeMemoryLayout(VAddr start, VAddr end);
92 ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, 99 ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
93 KMemoryPermission perm); 100 KMemoryPermission perm);
101 ResultCode MapPages(VAddr* out_addr, std::size_t num_pages, std::size_t alignment,
102 PAddr phys_addr, bool is_pa_valid, VAddr region_start,
103 std::size_t region_num_pages, KMemoryState state, KMemoryPermission perm);
94 ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list); 104 ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list);
95 bool IsRegionMapped(VAddr address, u64 size); 105 bool IsRegionMapped(VAddr address, u64 size);
96 bool IsRegionContiguous(VAddr addr, u64 size) const; 106 bool IsRegionContiguous(VAddr addr, u64 size) const;
@@ -102,8 +112,11 @@ private:
102 OperationType operation); 112 OperationType operation);
103 ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, 113 ResultCode Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
104 OperationType operation, PAddr map_addr = 0); 114 OperationType operation, PAddr map_addr = 0);
105 constexpr VAddr GetRegionAddress(KMemoryState state) const; 115 VAddr GetRegionAddress(KMemoryState state) const;
106 constexpr std::size_t GetRegionSize(KMemoryState state) const; 116 std::size_t GetRegionSize(KMemoryState state) const;
117
118 VAddr FindFreeArea(VAddr region_start, std::size_t region_num_pages, std::size_t num_pages,
119 std::size_t alignment, std::size_t offset, std::size_t guard_pages);
107 120
108 ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr, 121 ResultCode CheckMemoryStateContiguous(std::size_t* out_blocks_needed, VAddr addr,
109 std::size_t size, KMemoryState state_mask, 122 std::size_t size, KMemoryState state_mask,
@@ -137,7 +150,7 @@ private:
137 return CheckMemoryState(nullptr, nullptr, nullptr, out_blocks_needed, addr, size, 150 return CheckMemoryState(nullptr, nullptr, nullptr, out_blocks_needed, addr, size,
138 state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr); 151 state_mask, state, perm_mask, perm, attr_mask, attr, ignore_attr);
139 } 152 }
140 ResultCode CheckMemoryState(VAddr addr, size_t size, KMemoryState state_mask, 153 ResultCode CheckMemoryState(VAddr addr, std::size_t size, KMemoryState state_mask,
141 KMemoryState state, KMemoryPermission perm_mask, 154 KMemoryState state, KMemoryPermission perm_mask,
142 KMemoryPermission perm, KMemoryAttribute attr_mask, 155 KMemoryPermission perm, KMemoryAttribute attr_mask,
143 KMemoryAttribute attr, 156 KMemoryAttribute attr,
@@ -210,7 +223,7 @@ public:
210 constexpr VAddr GetAliasCodeRegionSize() const { 223 constexpr VAddr GetAliasCodeRegionSize() const {
211 return alias_code_region_end - alias_code_region_start; 224 return alias_code_region_end - alias_code_region_start;
212 } 225 }
213 size_t GetNormalMemorySize() { 226 std::size_t GetNormalMemorySize() {
214 KScopedLightLock lk(general_lock); 227 KScopedLightLock lk(general_lock);
215 return GetHeapSize() + mapped_physical_memory_size; 228 return GetHeapSize() + mapped_physical_memory_size;
216 } 229 }
@@ -253,9 +266,10 @@ public:
253 constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { 266 constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const {
254 return !IsOutsideASLRRegion(address, size); 267 return !IsOutsideASLRRegion(address, size);
255 } 268 }
256 269 constexpr std::size_t GetNumGuardPages() const {
257 PAddr GetPhysicalAddr(VAddr addr) { 270 return IsKernel() ? 1 : 4;
258 ASSERT(IsLockedByCurrentThread()); 271 }
272 PAddr GetPhysicalAddr(VAddr addr) const {
259 const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; 273 const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
260 ASSERT(backing_addr); 274 ASSERT(backing_addr);
261 return backing_addr + addr; 275 return backing_addr + addr;
@@ -276,10 +290,6 @@ private:
276 return is_aslr_enabled; 290 return is_aslr_enabled;
277 } 291 }
278 292
279 constexpr std::size_t GetNumGuardPages() const {
280 return IsKernel() ? 1 : 4;
281 }
282
283 constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const { 293 constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const {
284 return (address_space_start <= addr) && 294 return (address_space_start <= addr) &&
285 (num_pages <= (address_space_end - address_space_start) / PageSize) && 295 (num_pages <= (address_space_end - address_space_start) / PageSize) &&
@@ -311,6 +321,8 @@ private:
311 bool is_kernel{}; 321 bool is_kernel{};
312 bool is_aslr_enabled{}; 322 bool is_aslr_enabled{};
313 323
324 u32 heap_fill_value{};
325
314 KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application}; 326 KMemoryManager::Pool memory_pool{KMemoryManager::Pool::Application};
315 KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront}; 327 KMemoryManager::Direction allocation_option{KMemoryManager::Direction::FromFront};
316 328
diff --git a/src/core/hle/kernel/k_port.cpp b/src/core/hle/kernel/k_port.cpp
index a8ba09c4a..ceb98709f 100644
--- a/src/core/hle/kernel/k_port.cpp
+++ b/src/core/hle/kernel/k_port.cpp
@@ -57,7 +57,12 @@ ResultCode KPort::EnqueueSession(KServerSession* session) {
57 R_UNLESS(state == State::Normal, ResultPortClosed); 57 R_UNLESS(state == State::Normal, ResultPortClosed);
58 58
59 server.EnqueueSession(session); 59 server.EnqueueSession(session);
60 server.GetSessionRequestHandler()->ClientConnected(server.AcceptSession()); 60
61 if (auto session_ptr = server.GetSessionRequestHandler().lock()) {
62 session_ptr->ClientConnected(server.AcceptSession());
63 } else {
64 UNREACHABLE();
65 }
61 66
62 return ResultSuccess; 67 return ResultSuccess;
63} 68}
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 9233261cd..b39405496 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -70,58 +70,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority
70} 70}
71} // Anonymous namespace 71} // Anonymous namespace
72 72
73// Represents a page used for thread-local storage.
74//
75// Each TLS page contains slots that may be used by processes and threads.
76// Every process and thread is created with a slot in some arbitrary page
77// (whichever page happens to have an available slot).
78class TLSPage {
79public:
80 static constexpr std::size_t num_slot_entries =
81 Core::Memory::PAGE_SIZE / Core::Memory::TLS_ENTRY_SIZE;
82
83 explicit TLSPage(VAddr address) : base_address{address} {}
84
85 bool HasAvailableSlots() const {
86 return !is_slot_used.all();
87 }
88
89 VAddr GetBaseAddress() const {
90 return base_address;
91 }
92
93 std::optional<VAddr> ReserveSlot() {
94 for (std::size_t i = 0; i < is_slot_used.size(); i++) {
95 if (is_slot_used[i]) {
96 continue;
97 }
98
99 is_slot_used[i] = true;
100 return base_address + (i * Core::Memory::TLS_ENTRY_SIZE);
101 }
102
103 return std::nullopt;
104 }
105
106 void ReleaseSlot(VAddr address) {
107 // Ensure that all given addresses are consistent with how TLS pages
108 // are intended to be used when releasing slots.
109 ASSERT(IsWithinPage(address));
110 ASSERT((address % Core::Memory::TLS_ENTRY_SIZE) == 0);
111
112 const std::size_t index = (address - base_address) / Core::Memory::TLS_ENTRY_SIZE;
113 is_slot_used[index] = false;
114 }
115
116private:
117 bool IsWithinPage(VAddr address) const {
118 return base_address <= address && address < base_address + Core::Memory::PAGE_SIZE;
119 }
120
121 VAddr base_address;
122 std::bitset<num_slot_entries> is_slot_used;
123};
124
125ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name, 73ResultCode KProcess::Initialize(KProcess* process, Core::System& system, std::string process_name,
126 ProcessType type, KResourceLimit* res_limit) { 74 ProcessType type, KResourceLimit* res_limit) {
127 auto& kernel = system.Kernel(); 75 auto& kernel = system.Kernel();
@@ -404,7 +352,7 @@ ResultCode KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata,
404 } 352 }
405 353
406 // Create TLS region 354 // Create TLS region
407 tls_region_address = CreateTLSRegion(); 355 R_TRY(this->CreateThreadLocalRegion(std::addressof(tls_region_address)));
408 memory_reservation.Commit(); 356 memory_reservation.Commit();
409 357
410 return handle_table.Initialize(capabilities.GetHandleTableSize()); 358 return handle_table.Initialize(capabilities.GetHandleTableSize());
@@ -444,7 +392,7 @@ void KProcess::PrepareForTermination() {
444 392
445 stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList()); 393 stop_threads(kernel.System().GlobalSchedulerContext().GetThreadList());
446 394
447 FreeTLSRegion(tls_region_address); 395 this->DeleteThreadLocalRegion(tls_region_address);
448 tls_region_address = 0; 396 tls_region_address = 0;
449 397
450 if (resource_limit) { 398 if (resource_limit) {
@@ -456,9 +404,6 @@ void KProcess::PrepareForTermination() {
456} 404}
457 405
458void KProcess::Finalize() { 406void KProcess::Finalize() {
459 // Finalize the handle table and close any open handles.
460 handle_table.Finalize();
461
462 // Free all shared memory infos. 407 // Free all shared memory infos.
463 { 408 {
464 auto it = shared_memory_list.begin(); 409 auto it = shared_memory_list.begin();
@@ -483,67 +428,110 @@ void KProcess::Finalize() {
483 resource_limit = nullptr; 428 resource_limit = nullptr;
484 } 429 }
485 430
431 // Finalize the page table.
432 page_table.reset();
433
486 // Perform inherited finalization. 434 // Perform inherited finalization.
487 KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask>::Finalize(); 435 KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask>::Finalize();
488} 436}
489 437
490/** 438ResultCode KProcess::CreateThreadLocalRegion(VAddr* out) {
491 * Attempts to find a TLS page that contains a free slot for 439 KThreadLocalPage* tlp = nullptr;
492 * use by a thread. 440 VAddr tlr = 0;
493 *
494 * @returns If a page with an available slot is found, then an iterator
495 * pointing to the page is returned. Otherwise the end iterator
496 * is returned instead.
497 */
498static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
499 return std::find_if(tls_pages.begin(), tls_pages.end(),
500 [](const auto& page) { return page.HasAvailableSlots(); });
501}
502 441
503VAddr KProcess::CreateTLSRegion() { 442 // See if we can get a region from a partially used TLP.
504 KScopedSchedulerLock lock(kernel); 443 {
505 if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)}; 444 KScopedSchedulerLock sl{kernel};
506 tls_page_iter != tls_pages.cend()) {
507 return *tls_page_iter->ReserveSlot();
508 }
509 445
510 Page* const tls_page_ptr{kernel.GetUserSlabHeapPages().Allocate()}; 446 if (auto it = partially_used_tlp_tree.begin(); it != partially_used_tlp_tree.end()) {
511 ASSERT(tls_page_ptr); 447 tlr = it->Reserve();
448 ASSERT(tlr != 0);
512 449
513 const VAddr start{page_table->GetKernelMapRegionStart()}; 450 if (it->IsAllUsed()) {
514 const VAddr size{page_table->GetKernelMapRegionEnd() - start}; 451 tlp = std::addressof(*it);
515 const PAddr tls_map_addr{kernel.System().DeviceMemory().GetPhysicalAddr(tls_page_ptr)}; 452 partially_used_tlp_tree.erase(it);
516 const VAddr tls_page_addr{page_table 453 fully_used_tlp_tree.insert(*tlp);
517 ->AllocateAndMapMemory(1, PageSize, true, start, size / PageSize, 454 }
518 KMemoryState::ThreadLocal,
519 KMemoryPermission::UserReadWrite,
520 tls_map_addr)
521 .ValueOr(0)};
522 455
523 ASSERT(tls_page_addr); 456 *out = tlr;
457 return ResultSuccess;
458 }
459 }
524 460
525 std::memset(tls_page_ptr, 0, PageSize); 461 // Allocate a new page.
526 tls_pages.emplace_back(tls_page_addr); 462 tlp = KThreadLocalPage::Allocate(kernel);
463 R_UNLESS(tlp != nullptr, ResultOutOfMemory);
464 auto tlp_guard = SCOPE_GUARD({ KThreadLocalPage::Free(kernel, tlp); });
527 465
528 const auto reserve_result{tls_pages.back().ReserveSlot()}; 466 // Initialize the new page.
529 ASSERT(reserve_result.has_value()); 467 R_TRY(tlp->Initialize(kernel, this));
468
469 // Reserve a TLR.
470 tlr = tlp->Reserve();
471 ASSERT(tlr != 0);
472
473 // Insert into our tree.
474 {
475 KScopedSchedulerLock sl{kernel};
476 if (tlp->IsAllUsed()) {
477 fully_used_tlp_tree.insert(*tlp);
478 } else {
479 partially_used_tlp_tree.insert(*tlp);
480 }
481 }
530 482
531 return *reserve_result; 483 // We succeeded!
484 tlp_guard.Cancel();
485 *out = tlr;
486 return ResultSuccess;
532} 487}
533 488
534void KProcess::FreeTLSRegion(VAddr tls_address) { 489ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) {
535 KScopedSchedulerLock lock(kernel); 490 KThreadLocalPage* page_to_free = nullptr;
536 const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE); 491
537 auto iter = 492 // Release the region.
538 std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { 493 {
539 return page.GetBaseAddress() == aligned_address; 494 KScopedSchedulerLock sl{kernel};
540 }); 495
496 // Try to find the page in the partially used list.
497 auto it = partially_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
498 if (it == partially_used_tlp_tree.end()) {
499 // If we don't find it, it has to be in the fully used list.
500 it = fully_used_tlp_tree.find_key(Common::AlignDown(addr, PageSize));
501 R_UNLESS(it != fully_used_tlp_tree.end(), ResultInvalidAddress);
502
503 // Release the region.
504 it->Release(addr);
505
506 // Move the page out of the fully used list.
507 KThreadLocalPage* tlp = std::addressof(*it);
508 fully_used_tlp_tree.erase(it);
509 if (tlp->IsAllFree()) {
510 page_to_free = tlp;
511 } else {
512 partially_used_tlp_tree.insert(*tlp);
513 }
514 } else {
515 // Release the region.
516 it->Release(addr);
517
518 // Handle the all-free case.
519 KThreadLocalPage* tlp = std::addressof(*it);
520 if (tlp->IsAllFree()) {
521 partially_used_tlp_tree.erase(it);
522 page_to_free = tlp;
523 }
524 }
525 }
526
527 // If we should free the page it was in, do so.
528 if (page_to_free != nullptr) {
529 page_to_free->Finalize();
541 530
542 // Something has gone very wrong if we're freeing a region 531 KThreadLocalPage::Free(kernel, page_to_free);
543 // with no actual page available. 532 }
544 ASSERT(iter != tls_pages.cend());
545 533
546 iter->ReleaseSlot(tls_address); 534 return ResultSuccess;
547} 535}
548 536
549void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) { 537void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index cf1b67428..5ed0f2d83 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -15,6 +15,7 @@
15#include "core/hle/kernel/k_condition_variable.h" 15#include "core/hle/kernel/k_condition_variable.h"
16#include "core/hle/kernel/k_handle_table.h" 16#include "core/hle/kernel/k_handle_table.h"
17#include "core/hle/kernel/k_synchronization_object.h" 17#include "core/hle/kernel/k_synchronization_object.h"
18#include "core/hle/kernel/k_thread_local_page.h"
18#include "core/hle/kernel/k_worker_task.h" 19#include "core/hle/kernel/k_worker_task.h"
19#include "core/hle/kernel/process_capability.h" 20#include "core/hle/kernel/process_capability.h"
20#include "core/hle/kernel/slab_helpers.h" 21#include "core/hle/kernel/slab_helpers.h"
@@ -362,10 +363,10 @@ public:
362 // Thread-local storage management 363 // Thread-local storage management
363 364
364 // Marks the next available region as used and returns the address of the slot. 365 // Marks the next available region as used and returns the address of the slot.
365 [[nodiscard]] VAddr CreateTLSRegion(); 366 [[nodiscard]] ResultCode CreateThreadLocalRegion(VAddr* out);
366 367
367 // Frees a used TLS slot identified by the given address 368 // Frees a used TLS slot identified by the given address
368 void FreeTLSRegion(VAddr tls_address); 369 ResultCode DeleteThreadLocalRegion(VAddr addr);
369 370
370private: 371private:
371 void PinThread(s32 core_id, KThread* thread) { 372 void PinThread(s32 core_id, KThread* thread) {
@@ -413,13 +414,6 @@ private:
413 /// The ideal CPU core for this process, threads are scheduled on this core by default. 414 /// The ideal CPU core for this process, threads are scheduled on this core by default.
414 u8 ideal_core = 0; 415 u8 ideal_core = 0;
415 416
416 /// The Thread Local Storage area is allocated as processes create threads,
417 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
418 /// holds the TLS for a specific thread. This vector contains which parts are in use for each
419 /// page as a bitmask.
420 /// This vector will grow as more pages are allocated for new threads.
421 std::vector<TLSPage> tls_pages;
422
423 /// Contains the parsed process capability descriptors. 417 /// Contains the parsed process capability descriptors.
424 ProcessCapabilities capabilities; 418 ProcessCapabilities capabilities;
425 419
@@ -482,6 +476,12 @@ private:
482 KThread* exception_thread{}; 476 KThread* exception_thread{};
483 477
484 KLightLock state_lock; 478 KLightLock state_lock;
479
480 using TLPTree =
481 Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>;
482 using TLPIterator = TLPTree::iterator;
483 TLPTree fully_used_tlp_tree;
484 TLPTree partially_used_tlp_tree;
485}; 485};
486 486
487} // namespace Kernel 487} // namespace Kernel
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h
index 6302d5e61..2185736be 100644
--- a/src/core/hle/kernel/k_server_port.h
+++ b/src/core/hle/kernel/k_server_port.h
@@ -30,11 +30,11 @@ public:
30 30
31 /// Whether or not this server port has an HLE handler available. 31 /// Whether or not this server port has an HLE handler available.
32 bool HasSessionRequestHandler() const { 32 bool HasSessionRequestHandler() const {
33 return session_handler != nullptr; 33 return !session_handler.expired();
34 } 34 }
35 35
36 /// Gets the HLE handler for this port. 36 /// Gets the HLE handler for this port.
37 SessionRequestHandlerPtr GetSessionRequestHandler() const { 37 SessionRequestHandlerWeakPtr GetSessionRequestHandler() const {
38 return session_handler; 38 return session_handler;
39 } 39 }
40 40
@@ -42,7 +42,7 @@ public:
42 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port 42 * Sets the HLE handler template for the port. ServerSessions crated by connecting to this port
43 * will inherit a reference to this handler. 43 * will inherit a reference to this handler.
44 */ 44 */
45 void SetSessionHandler(SessionRequestHandlerPtr&& handler) { 45 void SetSessionHandler(SessionRequestHandlerWeakPtr&& handler) {
46 session_handler = std::move(handler); 46 session_handler = std::move(handler);
47 } 47 }
48 48
@@ -66,7 +66,7 @@ private:
66 void CleanupSessions(); 66 void CleanupSessions();
67 67
68 SessionList session_list; 68 SessionList session_list;
69 SessionRequestHandlerPtr session_handler; 69 SessionRequestHandlerWeakPtr session_handler;
70 KPort* parent{}; 70 KPort* parent{};
71}; 71};
72 72
diff --git a/src/core/hle/kernel/k_server_session.cpp b/src/core/hle/kernel/k_server_session.cpp
index 4d94eb9cf..30c56ff29 100644
--- a/src/core/hle/kernel/k_server_session.cpp
+++ b/src/core/hle/kernel/k_server_session.cpp
@@ -27,10 +27,7 @@ namespace Kernel {
27 27
28KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {} 28KServerSession::KServerSession(KernelCore& kernel_) : KSynchronizationObject{kernel_} {}
29 29
30KServerSession::~KServerSession() { 30KServerSession::~KServerSession() = default;
31 // Ensure that the global list tracking server sessions does not hold on to a reference.
32 kernel.UnregisterServerSession(this);
33}
34 31
35void KServerSession::Initialize(KSession* parent_session_, std::string&& name_, 32void KServerSession::Initialize(KSession* parent_session_, std::string&& name_,
36 std::shared_ptr<SessionRequestManager> manager_) { 33 std::shared_ptr<SessionRequestManager> manager_) {
@@ -49,6 +46,9 @@ void KServerSession::Destroy() {
49 parent->OnServerClosed(); 46 parent->OnServerClosed();
50 47
51 parent->Close(); 48 parent->Close();
49
50 // Release host emulation members.
51 manager.reset();
52} 52}
53 53
54void KServerSession::OnClientClosed() { 54void KServerSession::OnClientClosed() {
@@ -98,7 +98,12 @@ ResultCode KServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& co
98 UNREACHABLE(); 98 UNREACHABLE();
99 return ResultSuccess; // Ignore error if asserts are off 99 return ResultSuccess; // Ignore error if asserts are off
100 } 100 }
101 return manager->DomainHandler(object_id - 1)->HandleSyncRequest(*this, context); 101 if (auto strong_ptr = manager->DomainHandler(object_id - 1).lock()) {
102 return strong_ptr->HandleSyncRequest(*this, context);
103 } else {
104 UNREACHABLE();
105 return ResultSuccess;
106 }
102 107
103 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { 108 case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: {
104 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); 109 LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id);
diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h
index 05c0bec9c..5690cc757 100644
--- a/src/core/hle/kernel/k_slab_heap.h
+++ b/src/core/hle/kernel/k_slab_heap.h
@@ -16,39 +16,34 @@ class KernelCore;
16 16
17namespace impl { 17namespace impl {
18 18
19class KSlabHeapImpl final { 19class KSlabHeapImpl {
20public:
21 YUZU_NON_COPYABLE(KSlabHeapImpl); 20 YUZU_NON_COPYABLE(KSlabHeapImpl);
22 YUZU_NON_MOVEABLE(KSlabHeapImpl); 21 YUZU_NON_MOVEABLE(KSlabHeapImpl);
23 22
23public:
24 struct Node { 24 struct Node {
25 Node* next{}; 25 Node* next{};
26 }; 26 };
27 27
28public:
28 constexpr KSlabHeapImpl() = default; 29 constexpr KSlabHeapImpl() = default;
29 constexpr ~KSlabHeapImpl() = default;
30 30
31 void Initialize(std::size_t size) { 31 void Initialize() {
32 ASSERT(head == nullptr); 32 ASSERT(m_head == nullptr);
33 obj_size = size;
34 }
35
36 constexpr std::size_t GetObjectSize() const {
37 return obj_size;
38 } 33 }
39 34
40 Node* GetHead() const { 35 Node* GetHead() const {
41 return head; 36 return m_head;
42 } 37 }
43 38
44 void* Allocate() { 39 void* Allocate() {
45 Node* ret = head.load(); 40 Node* ret = m_head.load();
46 41
47 do { 42 do {
48 if (ret == nullptr) { 43 if (ret == nullptr) {
49 break; 44 break;
50 } 45 }
51 } while (!head.compare_exchange_weak(ret, ret->next)); 46 } while (!m_head.compare_exchange_weak(ret, ret->next));
52 47
53 return ret; 48 return ret;
54 } 49 }
@@ -56,170 +51,157 @@ public:
56 void Free(void* obj) { 51 void Free(void* obj) {
57 Node* node = static_cast<Node*>(obj); 52 Node* node = static_cast<Node*>(obj);
58 53
59 Node* cur_head = head.load(); 54 Node* cur_head = m_head.load();
60 do { 55 do {
61 node->next = cur_head; 56 node->next = cur_head;
62 } while (!head.compare_exchange_weak(cur_head, node)); 57 } while (!m_head.compare_exchange_weak(cur_head, node));
63 } 58 }
64 59
65private: 60private:
66 std::atomic<Node*> head{}; 61 std::atomic<Node*> m_head{};
67 std::size_t obj_size{};
68}; 62};
69 63
70} // namespace impl 64} // namespace impl
71 65
72class KSlabHeapBase { 66template <bool SupportDynamicExpansion>
73public: 67class KSlabHeapBase : protected impl::KSlabHeapImpl {
74 YUZU_NON_COPYABLE(KSlabHeapBase); 68 YUZU_NON_COPYABLE(KSlabHeapBase);
75 YUZU_NON_MOVEABLE(KSlabHeapBase); 69 YUZU_NON_MOVEABLE(KSlabHeapBase);
76 70
77 constexpr KSlabHeapBase() = default; 71private:
78 constexpr ~KSlabHeapBase() = default; 72 size_t m_obj_size{};
73 uintptr_t m_peak{};
74 uintptr_t m_start{};
75 uintptr_t m_end{};
79 76
80 constexpr bool Contains(uintptr_t addr) const { 77private:
81 return start <= addr && addr < end; 78 void UpdatePeakImpl(uintptr_t obj) {
82 } 79 static_assert(std::atomic_ref<uintptr_t>::is_always_lock_free);
80 std::atomic_ref<uintptr_t> peak_ref(m_peak);
83 81
84 constexpr std::size_t GetSlabHeapSize() const { 82 const uintptr_t alloc_peak = obj + this->GetObjectSize();
85 return (end - start) / GetObjectSize(); 83 uintptr_t cur_peak = m_peak;
84 do {
85 if (alloc_peak <= cur_peak) {
86 break;
87 }
88 } while (!peak_ref.compare_exchange_strong(cur_peak, alloc_peak));
86 } 89 }
87 90
88 constexpr std::size_t GetObjectSize() const { 91public:
89 return impl.GetObjectSize(); 92 constexpr KSlabHeapBase() = default;
90 }
91 93
92 constexpr uintptr_t GetSlabHeapAddress() const { 94 bool Contains(uintptr_t address) const {
93 return start; 95 return m_start <= address && address < m_end;
94 } 96 }
95 97
96 std::size_t GetObjectIndexImpl(const void* obj) const { 98 void Initialize(size_t obj_size, void* memory, size_t memory_size) {
97 return (reinterpret_cast<uintptr_t>(obj) - start) / GetObjectSize(); 99 // Ensure we don't initialize a slab using null memory.
100 ASSERT(memory != nullptr);
101
102 // Set our object size.
103 m_obj_size = obj_size;
104
105 // Initialize the base allocator.
106 KSlabHeapImpl::Initialize();
107
108 // Set our tracking variables.
109 const size_t num_obj = (memory_size / obj_size);
110 m_start = reinterpret_cast<uintptr_t>(memory);
111 m_end = m_start + num_obj * obj_size;
112 m_peak = m_start;
113
114 // Free the objects.
115 u8* cur = reinterpret_cast<u8*>(m_end);
116
117 for (size_t i = 0; i < num_obj; i++) {
118 cur -= obj_size;
119 KSlabHeapImpl::Free(cur);
120 }
98 } 121 }
99 122
100 std::size_t GetPeakIndex() const { 123 size_t GetSlabHeapSize() const {
101 return GetObjectIndexImpl(reinterpret_cast<const void*>(peak)); 124 return (m_end - m_start) / this->GetObjectSize();
102 } 125 }
103 126
104 void* AllocateImpl() { 127 size_t GetObjectSize() const {
105 return impl.Allocate(); 128 return m_obj_size;
106 } 129 }
107 130
108 void FreeImpl(void* obj) { 131 void* Allocate() {
109 // Don't allow freeing an object that wasn't allocated from this heap 132 void* obj = KSlabHeapImpl::Allocate();
110 ASSERT(Contains(reinterpret_cast<uintptr_t>(obj)));
111 133
112 impl.Free(obj); 134 return obj;
113 } 135 }
114 136
115 void InitializeImpl(std::size_t obj_size, void* memory, std::size_t memory_size) { 137 void Free(void* obj) {
116 // Ensure we don't initialize a slab using null memory 138 // Don't allow freeing an object that wasn't allocated from this heap.
117 ASSERT(memory != nullptr); 139 const bool contained = this->Contains(reinterpret_cast<uintptr_t>(obj));
118 140 ASSERT(contained);
119 // Initialize the base allocator 141 KSlabHeapImpl::Free(obj);
120 impl.Initialize(obj_size); 142 }
121 143
122 // Set our tracking variables 144 size_t GetObjectIndex(const void* obj) const {
123 const std::size_t num_obj = (memory_size / obj_size); 145 if constexpr (SupportDynamicExpansion) {
124 start = reinterpret_cast<uintptr_t>(memory); 146 if (!this->Contains(reinterpret_cast<uintptr_t>(obj))) {
125 end = start + num_obj * obj_size; 147 return std::numeric_limits<size_t>::max();
126 peak = start; 148 }
149 }
127 150
128 // Free the objects 151 return (reinterpret_cast<uintptr_t>(obj) - m_start) / this->GetObjectSize();
129 u8* cur = reinterpret_cast<u8*>(end); 152 }
130 153
131 for (std::size_t i{}; i < num_obj; i++) { 154 size_t GetPeakIndex() const {
132 cur -= obj_size; 155 return this->GetObjectIndex(reinterpret_cast<const void*>(m_peak));
133 impl.Free(cur);
134 }
135 } 156 }
136 157
137private: 158 uintptr_t GetSlabHeapAddress() const {
138 using Impl = impl::KSlabHeapImpl; 159 return m_start;
160 }
139 161
140 Impl impl; 162 size_t GetNumRemaining() const {
141 uintptr_t peak{}; 163 // Only calculate the number of remaining objects under debug configuration.
142 uintptr_t start{}; 164 return 0;
143 uintptr_t end{}; 165 }
144}; 166};
145 167
146template <typename T> 168template <typename T>
147class KSlabHeap final : public KSlabHeapBase { 169class KSlabHeap final : public KSlabHeapBase<false> {
148public: 170private:
149 enum class AllocationType { 171 using BaseHeap = KSlabHeapBase<false>;
150 Host,
151 Guest,
152 };
153 172
154 explicit constexpr KSlabHeap(AllocationType allocation_type_ = AllocationType::Host) 173public:
155 : KSlabHeapBase(), allocation_type{allocation_type_} {} 174 constexpr KSlabHeap() = default;
156 175
157 void Initialize(void* memory, std::size_t memory_size) { 176 void Initialize(void* memory, size_t memory_size) {
158 if (allocation_type == AllocationType::Guest) { 177 BaseHeap::Initialize(sizeof(T), memory, memory_size);
159 InitializeImpl(sizeof(T), memory, memory_size);
160 }
161 } 178 }
162 179
163 T* Allocate() { 180 T* Allocate() {
164 switch (allocation_type) { 181 T* obj = static_cast<T*>(BaseHeap::Allocate());
165 case AllocationType::Host:
166 // Fallback for cases where we do not yet support allocating guest memory from the slab
167 // heap, such as for kernel memory regions.
168 return new T;
169
170 case AllocationType::Guest:
171 T* obj = static_cast<T*>(AllocateImpl());
172 if (obj != nullptr) {
173 new (obj) T();
174 }
175 return obj;
176 }
177 182
178 UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type); 183 if (obj != nullptr) [[likely]] {
179 return nullptr; 184 std::construct_at(obj);
185 }
186 return obj;
180 } 187 }
181 188
182 T* AllocateWithKernel(KernelCore& kernel) { 189 T* Allocate(KernelCore& kernel) {
183 switch (allocation_type) { 190 T* obj = static_cast<T*>(BaseHeap::Allocate());
184 case AllocationType::Host:
185 // Fallback for cases where we do not yet support allocating guest memory from the slab
186 // heap, such as for kernel memory regions.
187 return new T(kernel);
188 191
189 case AllocationType::Guest: 192 if (obj != nullptr) [[likely]] {
190 T* obj = static_cast<T*>(AllocateImpl()); 193 std::construct_at(obj, kernel);
191 if (obj != nullptr) {
192 new (obj) T(kernel);
193 }
194 return obj;
195 } 194 }
196 195 return obj;
197 UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
198 return nullptr;
199 } 196 }
200 197
201 void Free(T* obj) { 198 void Free(T* obj) {
202 switch (allocation_type) { 199 BaseHeap::Free(obj);
203 case AllocationType::Host:
204 // Fallback for cases where we do not yet support allocating guest memory from the slab
205 // heap, such as for kernel memory regions.
206 delete obj;
207 return;
208
209 case AllocationType::Guest:
210 FreeImpl(obj);
211 return;
212 }
213
214 UNREACHABLE_MSG("Invalid AllocationType {}", allocation_type);
215 } 200 }
216 201
217 constexpr std::size_t GetObjectIndex(const T* obj) const { 202 size_t GetObjectIndex(const T* obj) const {
218 return GetObjectIndexImpl(obj); 203 return BaseHeap::GetObjectIndex(obj);
219 } 204 }
220
221private:
222 const AllocationType allocation_type;
223}; 205};
224 206
225} // namespace Kernel 207} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index de3ffe0c7..ba7f72c6b 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -210,7 +210,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s
210 if (owner != nullptr) { 210 if (owner != nullptr) {
211 // Setup the TLS, if needed. 211 // Setup the TLS, if needed.
212 if (type == ThreadType::User) { 212 if (type == ThreadType::User) {
213 tls_address = owner->CreateTLSRegion(); 213 R_TRY(owner->CreateThreadLocalRegion(std::addressof(tls_address)));
214 } 214 }
215 215
216 parent = owner; 216 parent = owner;
@@ -305,7 +305,7 @@ void KThread::Finalize() {
305 305
306 // If the thread has a local region, delete it. 306 // If the thread has a local region, delete it.
307 if (tls_address != 0) { 307 if (tls_address != 0) {
308 parent->FreeTLSRegion(tls_address); 308 ASSERT(parent->DeleteThreadLocalRegion(tls_address).IsSuccess());
309 } 309 }
310 310
311 // Release any waiters. 311 // Release any waiters.
@@ -326,6 +326,9 @@ void KThread::Finalize() {
326 } 326 }
327 } 327 }
328 328
329 // Release host emulation members.
330 host_context.reset();
331
329 // Perform inherited finalization. 332 // Perform inherited finalization.
330 KSynchronizationObject::Finalize(); 333 KSynchronizationObject::Finalize();
331} 334}
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index d058db62c..f46db7298 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -656,7 +656,7 @@ private:
656 static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles)); 656 static_assert(sizeof(SyncObjectBuffer::sync_objects) == sizeof(SyncObjectBuffer::handles));
657 657
658 struct ConditionVariableComparator { 658 struct ConditionVariableComparator {
659 struct LightCompareType { 659 struct RedBlackKeyType {
660 u64 cv_key{}; 660 u64 cv_key{};
661 s32 priority{}; 661 s32 priority{};
662 662
@@ -672,8 +672,8 @@ private:
672 template <typename T> 672 template <typename T>
673 requires( 673 requires(
674 std::same_as<T, KThread> || 674 std::same_as<T, KThread> ||
675 std::same_as<T, LightCompareType>) static constexpr int Compare(const T& lhs, 675 std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
676 const KThread& rhs) { 676 const KThread& rhs) {
677 const u64 l_key = lhs.GetConditionVariableKey(); 677 const u64 l_key = lhs.GetConditionVariableKey();
678 const u64 r_key = rhs.GetConditionVariableKey(); 678 const u64 r_key = rhs.GetConditionVariableKey();
679 679
diff --git a/src/core/hle/kernel/k_thread_local_page.cpp b/src/core/hle/kernel/k_thread_local_page.cpp
new file mode 100644
index 000000000..4653c29f6
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.cpp
@@ -0,0 +1,65 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/scope_exit.h"
6#include "core/hle/kernel/k_memory_block.h"
7#include "core/hle/kernel/k_page_table.h"
8#include "core/hle/kernel/k_process.h"
9#include "core/hle/kernel/k_thread_local_page.h"
10#include "core/hle/kernel/kernel.h"
11
12namespace Kernel {
13
14ResultCode KThreadLocalPage::Initialize(KernelCore& kernel, KProcess* process) {
15 // Set that this process owns us.
16 m_owner = process;
17 m_kernel = &kernel;
18
19 // Allocate a new page.
20 KPageBuffer* page_buf = KPageBuffer::Allocate(kernel);
21 R_UNLESS(page_buf != nullptr, ResultOutOfMemory);
22 auto page_buf_guard = SCOPE_GUARD({ KPageBuffer::Free(kernel, page_buf); });
23
24 // Map the address in.
25 const auto phys_addr = kernel.System().DeviceMemory().GetPhysicalAddr(page_buf);
26 R_TRY(m_owner->PageTable().MapPages(std::addressof(m_virt_addr), 1, PageSize, phys_addr,
27 KMemoryState::ThreadLocal,
28 KMemoryPermission::UserReadWrite));
29
30 // We succeeded.
31 page_buf_guard.Cancel();
32
33 return ResultSuccess;
34}
35
36ResultCode KThreadLocalPage::Finalize() {
37 // Get the physical address of the page.
38 const PAddr phys_addr = m_owner->PageTable().GetPhysicalAddr(m_virt_addr);
39 ASSERT(phys_addr);
40
41 // Unmap the page.
42 R_TRY(m_owner->PageTable().UnmapPages(this->GetAddress(), 1, KMemoryState::ThreadLocal));
43
44 // Free the page.
45 KPageBuffer::Free(*m_kernel, KPageBuffer::FromPhysicalAddress(m_kernel->System(), phys_addr));
46
47 return ResultSuccess;
48}
49
50VAddr KThreadLocalPage::Reserve() {
51 for (size_t i = 0; i < m_is_region_free.size(); i++) {
52 if (m_is_region_free[i]) {
53 m_is_region_free[i] = false;
54 return this->GetRegionAddress(i);
55 }
56 }
57
58 return 0;
59}
60
61void KThreadLocalPage::Release(VAddr addr) {
62 m_is_region_free[this->GetRegionIndex(addr)] = true;
63}
64
65} // namespace Kernel
diff --git a/src/core/hle/kernel/k_thread_local_page.h b/src/core/hle/kernel/k_thread_local_page.h
new file mode 100644
index 000000000..658c67e94
--- /dev/null
+++ b/src/core/hle/kernel/k_thread_local_page.h
@@ -0,0 +1,112 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9
10#include "common/alignment.h"
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "common/intrusive_red_black_tree.h"
14#include "core/hle/kernel/k_page_buffer.h"
15#include "core/hle/kernel/memory_types.h"
16#include "core/hle/kernel/slab_helpers.h"
17#include "core/hle/result.h"
18
19namespace Kernel {
20
21class KernelCore;
22class KProcess;
23
24class KThreadLocalPage final : public Common::IntrusiveRedBlackTreeBaseNode<KThreadLocalPage>,
25 public KSlabAllocated<KThreadLocalPage> {
26public:
27 static constexpr size_t RegionsPerPage = PageSize / Svc::ThreadLocalRegionSize;
28 static_assert(RegionsPerPage > 0);
29
30public:
31 constexpr explicit KThreadLocalPage(VAddr addr = {}) : m_virt_addr(addr) {
32 m_is_region_free.fill(true);
33 }
34
35 constexpr VAddr GetAddress() const {
36 return m_virt_addr;
37 }
38
39 ResultCode Initialize(KernelCore& kernel, KProcess* process);
40 ResultCode Finalize();
41
42 VAddr Reserve();
43 void Release(VAddr addr);
44
45 bool IsAllUsed() const {
46 return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
47 [](bool is_free) { return !is_free; });
48 }
49
50 bool IsAllFree() const {
51 return std::ranges::all_of(m_is_region_free.begin(), m_is_region_free.end(),
52 [](bool is_free) { return is_free; });
53 }
54
55 bool IsAnyUsed() const {
56 return !this->IsAllFree();
57 }
58
59 bool IsAnyFree() const {
60 return !this->IsAllUsed();
61 }
62
63public:
64 using RedBlackKeyType = VAddr;
65
66 static constexpr RedBlackKeyType GetRedBlackKey(const RedBlackKeyType& v) {
67 return v;
68 }
69 static constexpr RedBlackKeyType GetRedBlackKey(const KThreadLocalPage& v) {
70 return v.GetAddress();
71 }
72
73 template <typename T>
74 requires(std::same_as<T, KThreadLocalPage> ||
75 std::same_as<T, RedBlackKeyType>) static constexpr int Compare(const T& lhs,
76 const KThreadLocalPage&
77 rhs) {
78 const VAddr lval = GetRedBlackKey(lhs);
79 const VAddr rval = GetRedBlackKey(rhs);
80
81 if (lval < rval) {
82 return -1;
83 } else if (lval == rval) {
84 return 0;
85 } else {
86 return 1;
87 }
88 }
89
90private:
91 constexpr VAddr GetRegionAddress(size_t i) const {
92 return this->GetAddress() + i * Svc::ThreadLocalRegionSize;
93 }
94
95 constexpr bool Contains(VAddr addr) const {
96 return this->GetAddress() <= addr && addr < this->GetAddress() + PageSize;
97 }
98
99 constexpr size_t GetRegionIndex(VAddr addr) const {
100 ASSERT(Common::IsAligned(addr, Svc::ThreadLocalRegionSize));
101 ASSERT(this->Contains(addr));
102 return (addr - this->GetAddress()) / Svc::ThreadLocalRegionSize;
103 }
104
105private:
106 VAddr m_virt_addr{};
107 KProcess* m_owner{};
108 KernelCore* m_kernel{};
109 std::array<bool, RegionsPerPage> m_is_region_free{};
110};
111
112} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 797f47021..f9828bc43 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -52,7 +52,7 @@ namespace Kernel {
52 52
53struct KernelCore::Impl { 53struct KernelCore::Impl {
54 explicit Impl(Core::System& system_, KernelCore& kernel_) 54 explicit Impl(Core::System& system_, KernelCore& kernel_)
55 : time_manager{system_}, object_list_container{kernel_}, 55 : time_manager{system_},
56 service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {} 56 service_threads_manager{1, "yuzu:ServiceThreadsManager"}, system{system_} {}
57 57
58 void SetMulticore(bool is_multi) { 58 void SetMulticore(bool is_multi) {
@@ -60,6 +60,7 @@ struct KernelCore::Impl {
60 } 60 }
61 61
62 void Initialize(KernelCore& kernel) { 62 void Initialize(KernelCore& kernel) {
63 global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
63 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); 64 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
64 global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel); 65 global_handle_table = std::make_unique<Kernel::KHandleTable>(kernel);
65 global_handle_table->Initialize(KHandleTable::MaxTableSize); 66 global_handle_table->Initialize(KHandleTable::MaxTableSize);
@@ -70,14 +71,13 @@ struct KernelCore::Impl {
70 71
71 // Derive the initial memory layout from the emulated board 72 // Derive the initial memory layout from the emulated board
72 Init::InitializeSlabResourceCounts(kernel); 73 Init::InitializeSlabResourceCounts(kernel);
73 KMemoryLayout memory_layout; 74 DeriveInitialMemoryLayout();
74 DeriveInitialMemoryLayout(memory_layout); 75 Init::InitializeSlabHeaps(system, *memory_layout);
75 Init::InitializeSlabHeaps(system, memory_layout);
76 76
77 // Initialize kernel memory and resources. 77 // Initialize kernel memory and resources.
78 InitializeSystemResourceLimit(kernel, system.CoreTiming(), memory_layout); 78 InitializeSystemResourceLimit(kernel, system.CoreTiming());
79 InitializeMemoryLayout(memory_layout); 79 InitializeMemoryLayout();
80 InitializePageSlab(); 80 Init::InitializeKPageBufferSlabHeap(system);
81 InitializeSchedulers(); 81 InitializeSchedulers();
82 InitializeSuspendThreads(); 82 InitializeSuspendThreads();
83 InitializePreemption(kernel); 83 InitializePreemption(kernel);
@@ -108,19 +108,6 @@ struct KernelCore::Impl {
108 for (auto* server_port : server_ports_) { 108 for (auto* server_port : server_ports_) {
109 server_port->Close(); 109 server_port->Close();
110 } 110 }
111 // Close all open server sessions.
112 std::unordered_set<KServerSession*> server_sessions_;
113 {
114 std::lock_guard lk(server_sessions_lock);
115 server_sessions_ = server_sessions;
116 server_sessions.clear();
117 }
118 for (auto* server_session : server_sessions_) {
119 server_session->Close();
120 }
121
122 // Ensure that the object list container is finalized and properly shutdown.
123 object_list_container.Finalize();
124 111
125 // Ensures all service threads gracefully shutdown. 112 // Ensures all service threads gracefully shutdown.
126 ClearServiceThreads(); 113 ClearServiceThreads();
@@ -195,11 +182,15 @@ struct KernelCore::Impl {
195 { 182 {
196 std::lock_guard lk(registered_objects_lock); 183 std::lock_guard lk(registered_objects_lock);
197 if (registered_objects.size()) { 184 if (registered_objects.size()) {
198 LOG_WARNING(Kernel, "{} kernel objects were dangling on shutdown!", 185 LOG_DEBUG(Kernel, "{} kernel objects were dangling on shutdown!",
199 registered_objects.size()); 186 registered_objects.size());
200 registered_objects.clear(); 187 registered_objects.clear();
201 } 188 }
202 } 189 }
190
191 // Ensure that the object list container is finalized and properly shutdown.
192 global_object_list_container->Finalize();
193 global_object_list_container.reset();
203 } 194 }
204 195
205 void InitializePhysicalCores() { 196 void InitializePhysicalCores() {
@@ -219,12 +210,11 @@ struct KernelCore::Impl {
219 210
220 // Creates the default system resource limit 211 // Creates the default system resource limit
221 void InitializeSystemResourceLimit(KernelCore& kernel, 212 void InitializeSystemResourceLimit(KernelCore& kernel,
222 const Core::Timing::CoreTiming& core_timing, 213 const Core::Timing::CoreTiming& core_timing) {
223 const KMemoryLayout& memory_layout) {
224 system_resource_limit = KResourceLimit::Create(system.Kernel()); 214 system_resource_limit = KResourceLimit::Create(system.Kernel());
225 system_resource_limit->Initialize(&core_timing); 215 system_resource_limit->Initialize(&core_timing);
226 216
227 const auto [total_size, kernel_size] = memory_layout.GetTotalAndKernelMemorySizes(); 217 const auto [total_size, kernel_size] = memory_layout->GetTotalAndKernelMemorySizes();
228 218
229 // If setting the default system values fails, then something seriously wrong has occurred. 219 // If setting the default system values fails, then something seriously wrong has occurred.
230 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size) 220 ASSERT(system_resource_limit->SetLimitValue(LimitableResource::PhysicalMemory, total_size)
@@ -293,15 +283,16 @@ struct KernelCore::Impl {
293 283
294 // Gets the dummy KThread for the caller, allocating a new one if this is the first time 284 // Gets the dummy KThread for the caller, allocating a new one if this is the first time
295 KThread* GetHostDummyThread() { 285 KThread* GetHostDummyThread() {
296 auto make_thread = [this]() { 286 auto initialize = [this](KThread* thread) {
297 KThread* thread = KThread::Create(system.Kernel());
298 ASSERT(KThread::InitializeDummyThread(thread).IsSuccess()); 287 ASSERT(KThread::InitializeDummyThread(thread).IsSuccess());
299 thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId())); 288 thread->SetName(fmt::format("DummyThread:{}", GetHostThreadId()));
300 return thread; 289 return thread;
301 }; 290 };
302 291
303 thread_local KThread* saved_thread = make_thread(); 292 thread_local auto raw_thread = KThread(system.Kernel());
304 return saved_thread; 293 thread_local auto thread = initialize(&raw_thread);
294
295 return thread;
305 } 296 }
306 297
307 /// Registers a CPU core thread by allocating a host thread ID for it 298 /// Registers a CPU core thread by allocating a host thread ID for it
@@ -353,16 +344,18 @@ struct KernelCore::Impl {
353 return schedulers[thread_id]->GetCurrentThread(); 344 return schedulers[thread_id]->GetCurrentThread();
354 } 345 }
355 346
356 void DeriveInitialMemoryLayout(KMemoryLayout& memory_layout) { 347 void DeriveInitialMemoryLayout() {
348 memory_layout = std::make_unique<KMemoryLayout>();
349
357 // Insert the root region for the virtual memory tree, from which all other regions will 350 // Insert the root region for the virtual memory tree, from which all other regions will
358 // derive. 351 // derive.
359 memory_layout.GetVirtualMemoryRegionTree().InsertDirectly( 352 memory_layout->GetVirtualMemoryRegionTree().InsertDirectly(
360 KernelVirtualAddressSpaceBase, 353 KernelVirtualAddressSpaceBase,
361 KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1); 354 KernelVirtualAddressSpaceBase + KernelVirtualAddressSpaceSize - 1);
362 355
363 // Insert the root region for the physical memory tree, from which all other regions will 356 // Insert the root region for the physical memory tree, from which all other regions will
364 // derive. 357 // derive.
365 memory_layout.GetPhysicalMemoryRegionTree().InsertDirectly( 358 memory_layout->GetPhysicalMemoryRegionTree().InsertDirectly(
366 KernelPhysicalAddressSpaceBase, 359 KernelPhysicalAddressSpaceBase,
367 KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1); 360 KernelPhysicalAddressSpaceBase + KernelPhysicalAddressSpaceSize - 1);
368 361
@@ -379,7 +372,7 @@ struct KernelCore::Impl {
379 if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) { 372 if (!(kernel_region_start + KernelRegionSize - 1 <= KernelVirtualAddressSpaceLast)) {
380 kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start; 373 kernel_region_size = KernelVirtualAddressSpaceEnd - kernel_region_start;
381 } 374 }
382 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 375 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
383 kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel)); 376 kernel_region_start, kernel_region_size, KMemoryRegionType_Kernel));
384 377
385 // Setup the code region. 378 // Setup the code region.
@@ -388,11 +381,11 @@ struct KernelCore::Impl {
388 Common::AlignDown(code_start_virt_addr, CodeRegionAlign); 381 Common::AlignDown(code_start_virt_addr, CodeRegionAlign);
389 constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign); 382 constexpr VAddr code_region_end = Common::AlignUp(code_end_virt_addr, CodeRegionAlign);
390 constexpr size_t code_region_size = code_region_end - code_region_start; 383 constexpr size_t code_region_size = code_region_end - code_region_start;
391 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 384 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
392 code_region_start, code_region_size, KMemoryRegionType_KernelCode)); 385 code_region_start, code_region_size, KMemoryRegionType_KernelCode));
393 386
394 // Setup board-specific device physical regions. 387 // Setup board-specific device physical regions.
395 Init::SetupDevicePhysicalMemoryRegions(memory_layout); 388 Init::SetupDevicePhysicalMemoryRegions(*memory_layout);
396 389
397 // Determine the amount of space needed for the misc region. 390 // Determine the amount of space needed for the misc region.
398 size_t misc_region_needed_size; 391 size_t misc_region_needed_size;
@@ -401,7 +394,7 @@ struct KernelCore::Impl {
401 misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize)); 394 misc_region_needed_size = Core::Hardware::NUM_CPU_CORES * (3 * (PageSize + PageSize));
402 395
403 // Account for each auto-map device. 396 // Account for each auto-map device.
404 for (const auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { 397 for (const auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
405 if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) { 398 if (region.HasTypeAttribute(KMemoryRegionAttr_ShouldKernelMap)) {
406 // Check that the region is valid. 399 // Check that the region is valid.
407 ASSERT(region.GetEndAddress() != 0); 400 ASSERT(region.GetEndAddress() != 0);
@@ -426,22 +419,22 @@ struct KernelCore::Impl {
426 419
427 // Setup the misc region. 420 // Setup the misc region.
428 const VAddr misc_region_start = 421 const VAddr misc_region_start =
429 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( 422 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
430 misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel); 423 misc_region_size, MiscRegionAlign, KMemoryRegionType_Kernel);
431 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 424 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
432 misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc)); 425 misc_region_start, misc_region_size, KMemoryRegionType_KernelMisc));
433 426
434 // Setup the stack region. 427 // Setup the stack region.
435 constexpr size_t StackRegionSize = 14_MiB; 428 constexpr size_t StackRegionSize = 14_MiB;
436 constexpr size_t StackRegionAlign = KernelAslrAlignment; 429 constexpr size_t StackRegionAlign = KernelAslrAlignment;
437 const VAddr stack_region_start = 430 const VAddr stack_region_start =
438 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( 431 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
439 StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel); 432 StackRegionSize, StackRegionAlign, KMemoryRegionType_Kernel);
440 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 433 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
441 stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack)); 434 stack_region_start, StackRegionSize, KMemoryRegionType_KernelStack));
442 435
443 // Determine the size of the resource region. 436 // Determine the size of the resource region.
444 const size_t resource_region_size = memory_layout.GetResourceRegionSizeForInit(); 437 const size_t resource_region_size = memory_layout->GetResourceRegionSizeForInit();
445 438
446 // Determine the size of the slab region. 439 // Determine the size of the slab region.
447 const size_t slab_region_size = 440 const size_t slab_region_size =
@@ -458,23 +451,23 @@ struct KernelCore::Impl {
458 Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) - 451 Common::AlignUp(code_end_phys_addr + slab_region_size, SlabRegionAlign) -
459 Common::AlignDown(code_end_phys_addr, SlabRegionAlign); 452 Common::AlignDown(code_end_phys_addr, SlabRegionAlign);
460 const VAddr slab_region_start = 453 const VAddr slab_region_start =
461 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( 454 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
462 slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) + 455 slab_region_needed_size, SlabRegionAlign, KMemoryRegionType_Kernel) +
463 (code_end_phys_addr % SlabRegionAlign); 456 (code_end_phys_addr % SlabRegionAlign);
464 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 457 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
465 slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab)); 458 slab_region_start, slab_region_size, KMemoryRegionType_KernelSlab));
466 459
467 // Setup the temp region. 460 // Setup the temp region.
468 constexpr size_t TempRegionSize = 128_MiB; 461 constexpr size_t TempRegionSize = 128_MiB;
469 constexpr size_t TempRegionAlign = KernelAslrAlignment; 462 constexpr size_t TempRegionAlign = KernelAslrAlignment;
470 const VAddr temp_region_start = 463 const VAddr temp_region_start =
471 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegion( 464 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegion(
472 TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel); 465 TempRegionSize, TempRegionAlign, KMemoryRegionType_Kernel);
473 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize, 466 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(temp_region_start, TempRegionSize,
474 KMemoryRegionType_KernelTemp)); 467 KMemoryRegionType_KernelTemp));
475 468
476 // Automatically map in devices that have auto-map attributes. 469 // Automatically map in devices that have auto-map attributes.
477 for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { 470 for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
478 // We only care about kernel regions. 471 // We only care about kernel regions.
479 if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) { 472 if (!region.IsDerivedFrom(KMemoryRegionType_Kernel)) {
480 continue; 473 continue;
@@ -501,21 +494,21 @@ struct KernelCore::Impl {
501 const size_t map_size = 494 const size_t map_size =
502 Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr; 495 Common::AlignUp(region.GetEndAddress(), PageSize) - map_phys_addr;
503 const VAddr map_virt_addr = 496 const VAddr map_virt_addr =
504 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( 497 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
505 map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize); 498 map_size, PageSize, KMemoryRegionType_KernelMisc, PageSize);
506 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 499 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
507 map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice)); 500 map_virt_addr, map_size, KMemoryRegionType_KernelMiscMappedDevice));
508 region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr); 501 region.SetPairAddress(map_virt_addr + region.GetAddress() - map_phys_addr);
509 } 502 }
510 503
511 Init::SetupDramPhysicalMemoryRegions(memory_layout); 504 Init::SetupDramPhysicalMemoryRegions(*memory_layout);
512 505
513 // Insert a physical region for the kernel code region. 506 // Insert a physical region for the kernel code region.
514 ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( 507 ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
515 code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode)); 508 code_start_phys_addr, code_region_size, KMemoryRegionType_DramKernelCode));
516 509
517 // Insert a physical region for the kernel slab region. 510 // Insert a physical region for the kernel slab region.
518 ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( 511 ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
519 slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab)); 512 slab_start_phys_addr, slab_region_size, KMemoryRegionType_DramKernelSlab));
520 513
521 // Determine size available for kernel page table heaps, requiring > 8 MB. 514 // Determine size available for kernel page table heaps, requiring > 8 MB.
@@ -524,12 +517,12 @@ struct KernelCore::Impl {
524 ASSERT(page_table_heap_size / 4_MiB > 2); 517 ASSERT(page_table_heap_size / 4_MiB > 2);
525 518
526 // Insert a physical region for the kernel page table heap region 519 // Insert a physical region for the kernel page table heap region
527 ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( 520 ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
528 slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap)); 521 slab_end_phys_addr, page_table_heap_size, KMemoryRegionType_DramKernelPtHeap));
529 522
530 // All DRAM regions that we haven't tagged by this point will be mapped under the linear 523 // All DRAM regions that we haven't tagged by this point will be mapped under the linear
531 // mapping. Tag them. 524 // mapping. Tag them.
532 for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { 525 for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
533 if (region.GetType() == KMemoryRegionType_Dram) { 526 if (region.GetType() == KMemoryRegionType_Dram) {
534 // Check that the region is valid. 527 // Check that the region is valid.
535 ASSERT(region.GetEndAddress() != 0); 528 ASSERT(region.GetEndAddress() != 0);
@@ -541,7 +534,7 @@ struct KernelCore::Impl {
541 534
542 // Get the linear region extents. 535 // Get the linear region extents.
543 const auto linear_extents = 536 const auto linear_extents =
544 memory_layout.GetPhysicalMemoryRegionTree().GetDerivedRegionExtents( 537 memory_layout->GetPhysicalMemoryRegionTree().GetDerivedRegionExtents(
545 KMemoryRegionAttr_LinearMapped); 538 KMemoryRegionAttr_LinearMapped);
546 ASSERT(linear_extents.GetEndAddress() != 0); 539 ASSERT(linear_extents.GetEndAddress() != 0);
547 540
@@ -553,7 +546,7 @@ struct KernelCore::Impl {
553 Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) - 546 Common::AlignUp(linear_extents.GetEndAddress(), LinearRegionAlign) -
554 aligned_linear_phys_start; 547 aligned_linear_phys_start;
555 const VAddr linear_region_start = 548 const VAddr linear_region_start =
556 memory_layout.GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard( 549 memory_layout->GetVirtualMemoryRegionTree().GetRandomAlignedRegionWithGuard(
557 linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign); 550 linear_region_size, LinearRegionAlign, KMemoryRegionType_None, LinearRegionAlign);
558 551
559 const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start; 552 const u64 linear_region_phys_to_virt_diff = linear_region_start - aligned_linear_phys_start;
@@ -562,7 +555,7 @@ struct KernelCore::Impl {
562 { 555 {
563 PAddr cur_phys_addr = 0; 556 PAddr cur_phys_addr = 0;
564 u64 cur_size = 0; 557 u64 cur_size = 0;
565 for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { 558 for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
566 if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) { 559 if (!region.HasTypeAttribute(KMemoryRegionAttr_LinearMapped)) {
567 continue; 560 continue;
568 } 561 }
@@ -581,55 +574,49 @@ struct KernelCore::Impl {
581 574
582 const VAddr region_virt_addr = 575 const VAddr region_virt_addr =
583 region.GetAddress() + linear_region_phys_to_virt_diff; 576 region.GetAddress() + linear_region_phys_to_virt_diff;
584 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 577 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
585 region_virt_addr, region.GetSize(), 578 region_virt_addr, region.GetSize(),
586 GetTypeForVirtualLinearMapping(region.GetType()))); 579 GetTypeForVirtualLinearMapping(region.GetType())));
587 region.SetPairAddress(region_virt_addr); 580 region.SetPairAddress(region_virt_addr);
588 581
589 KMemoryRegion* virt_region = 582 KMemoryRegion* virt_region =
590 memory_layout.GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr); 583 memory_layout->GetVirtualMemoryRegionTree().FindModifiable(region_virt_addr);
591 ASSERT(virt_region != nullptr); 584 ASSERT(virt_region != nullptr);
592 virt_region->SetPairAddress(region.GetAddress()); 585 virt_region->SetPairAddress(region.GetAddress());
593 } 586 }
594 } 587 }
595 588
596 // Insert regions for the initial page table region. 589 // Insert regions for the initial page table region.
597 ASSERT(memory_layout.GetPhysicalMemoryRegionTree().Insert( 590 ASSERT(memory_layout->GetPhysicalMemoryRegionTree().Insert(
598 resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt)); 591 resource_end_phys_addr, KernelPageTableHeapSize, KMemoryRegionType_DramKernelInitPt));
599 ASSERT(memory_layout.GetVirtualMemoryRegionTree().Insert( 592 ASSERT(memory_layout->GetVirtualMemoryRegionTree().Insert(
600 resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize, 593 resource_end_phys_addr + linear_region_phys_to_virt_diff, KernelPageTableHeapSize,
601 KMemoryRegionType_VirtualDramKernelInitPt)); 594 KMemoryRegionType_VirtualDramKernelInitPt));
602 595
603 // All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to 596 // All linear-mapped DRAM regions that we haven't tagged by this point will be allocated to
604 // some pool partition. Tag them. 597 // some pool partition. Tag them.
605 for (auto& region : memory_layout.GetPhysicalMemoryRegionTree()) { 598 for (auto& region : memory_layout->GetPhysicalMemoryRegionTree()) {
606 if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) { 599 if (region.GetType() == (KMemoryRegionType_Dram | KMemoryRegionAttr_LinearMapped)) {
607 region.SetType(KMemoryRegionType_DramPoolPartition); 600 region.SetType(KMemoryRegionType_DramPoolPartition);
608 } 601 }
609 } 602 }
610 603
611 // Setup all other memory regions needed to arrange the pool partitions. 604 // Setup all other memory regions needed to arrange the pool partitions.
612 Init::SetupPoolPartitionMemoryRegions(memory_layout); 605 Init::SetupPoolPartitionMemoryRegions(*memory_layout);
613 606
614 // Cache all linear regions in their own trees for faster access, later. 607 // Cache all linear regions in their own trees for faster access, later.
615 memory_layout.InitializeLinearMemoryRegionTrees(aligned_linear_phys_start, 608 memory_layout->InitializeLinearMemoryRegionTrees(aligned_linear_phys_start,
616 linear_region_start); 609 linear_region_start);
617 } 610 }
618 611
619 void InitializeMemoryLayout(const KMemoryLayout& memory_layout) { 612 void InitializeMemoryLayout() {
620 const auto system_pool = memory_layout.GetKernelSystemPoolRegionPhysicalExtents(); 613 const auto system_pool = memory_layout->GetKernelSystemPoolRegionPhysicalExtents();
621 const auto applet_pool = memory_layout.GetKernelAppletPoolRegionPhysicalExtents();
622 const auto application_pool = memory_layout.GetKernelApplicationPoolRegionPhysicalExtents();
623 614
624 // Initialize memory managers 615 // Initialize the memory manager.
625 memory_manager = std::make_unique<KMemoryManager>(system); 616 memory_manager = std::make_unique<KMemoryManager>(system);
626 memory_manager->InitializeManager(KMemoryManager::Pool::Application, 617 const auto& management_region = memory_layout->GetPoolManagementRegion();
627 application_pool.GetAddress(), 618 ASSERT(management_region.GetEndAddress() != 0);
628 application_pool.GetEndAddress()); 619 memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
629 memory_manager->InitializeManager(KMemoryManager::Pool::Applet, applet_pool.GetAddress(),
630 applet_pool.GetEndAddress());
631 memory_manager->InitializeManager(KMemoryManager::Pool::System, system_pool.GetAddress(),
632 system_pool.GetEndAddress());
633 620
634 // Setup memory regions for emulated processes 621 // Setup memory regions for emulated processes
635 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel 622 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel
@@ -666,22 +653,6 @@ struct KernelCore::Impl {
666 time_phys_addr, time_size, "Time:SharedMemory"); 653 time_phys_addr, time_size, "Time:SharedMemory");
667 } 654 }
668 655
669 void InitializePageSlab() {
670 // Allocate slab heaps
671 user_slab_heap_pages =
672 std::make_unique<KSlabHeap<Page>>(KSlabHeap<Page>::AllocationType::Guest);
673
674 // TODO(ameerj): This should be derived, not hardcoded within the kernel
675 constexpr u64 user_slab_heap_size{0x3de000};
676 // Reserve slab heaps
677 ASSERT(
678 system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size));
679 // Initialize slab heap
680 user_slab_heap_pages->Initialize(
681 system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase),
682 user_slab_heap_size);
683 }
684
685 KClientPort* CreateNamedServicePort(std::string name) { 656 KClientPort* CreateNamedServicePort(std::string name) {
686 auto search = service_interface_factory.find(name); 657 auto search = service_interface_factory.find(name);
687 if (search == service_interface_factory.end()) { 658 if (search == service_interface_factory.end()) {
@@ -719,7 +690,6 @@ struct KernelCore::Impl {
719 } 690 }
720 691
721 std::mutex server_ports_lock; 692 std::mutex server_ports_lock;
722 std::mutex server_sessions_lock;
723 std::mutex registered_objects_lock; 693 std::mutex registered_objects_lock;
724 std::mutex registered_in_use_objects_lock; 694 std::mutex registered_in_use_objects_lock;
725 695
@@ -743,14 +713,13 @@ struct KernelCore::Impl {
743 // stores all the objects in place. 713 // stores all the objects in place.
744 std::unique_ptr<KHandleTable> global_handle_table; 714 std::unique_ptr<KHandleTable> global_handle_table;
745 715
746 KAutoObjectWithListContainer object_list_container; 716 std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
747 717
748 /// Map of named ports managed by the kernel, which can be retrieved using 718 /// Map of named ports managed by the kernel, which can be retrieved using
749 /// the ConnectToPort SVC. 719 /// the ConnectToPort SVC.
750 std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory; 720 std::unordered_map<std::string, ServiceInterfaceFactory> service_interface_factory;
751 NamedPortTable named_ports; 721 NamedPortTable named_ports;
752 std::unordered_set<KServerPort*> server_ports; 722 std::unordered_set<KServerPort*> server_ports;
753 std::unordered_set<KServerSession*> server_sessions;
754 std::unordered_set<KAutoObject*> registered_objects; 723 std::unordered_set<KAutoObject*> registered_objects;
755 std::unordered_set<KAutoObject*> registered_in_use_objects; 724 std::unordered_set<KAutoObject*> registered_in_use_objects;
756 725
@@ -762,7 +731,6 @@ struct KernelCore::Impl {
762 731
763 // Kernel memory management 732 // Kernel memory management
764 std::unique_ptr<KMemoryManager> memory_manager; 733 std::unique_ptr<KMemoryManager> memory_manager;
765 std::unique_ptr<KSlabHeap<Page>> user_slab_heap_pages;
766 734
767 // Shared memory for services 735 // Shared memory for services
768 Kernel::KSharedMemory* hid_shared_mem{}; 736 Kernel::KSharedMemory* hid_shared_mem{};
@@ -770,6 +738,9 @@ struct KernelCore::Impl {
770 Kernel::KSharedMemory* irs_shared_mem{}; 738 Kernel::KSharedMemory* irs_shared_mem{};
771 Kernel::KSharedMemory* time_shared_mem{}; 739 Kernel::KSharedMemory* time_shared_mem{};
772 740
741 // Memory layout
742 std::unique_ptr<KMemoryLayout> memory_layout;
743
773 // Threads used for services 744 // Threads used for services
774 std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads; 745 std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
775 Common::ThreadWorker service_threads_manager; 746 Common::ThreadWorker service_threads_manager;
@@ -918,11 +889,11 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
918} 889}
919 890
920KAutoObjectWithListContainer& KernelCore::ObjectListContainer() { 891KAutoObjectWithListContainer& KernelCore::ObjectListContainer() {
921 return impl->object_list_container; 892 return *impl->global_object_list_container;
922} 893}
923 894
924const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const { 895const KAutoObjectWithListContainer& KernelCore::ObjectListContainer() const {
925 return impl->object_list_container; 896 return *impl->global_object_list_container;
926} 897}
927 898
928void KernelCore::InvalidateAllInstructionCaches() { 899void KernelCore::InvalidateAllInstructionCaches() {
@@ -952,16 +923,6 @@ KClientPort* KernelCore::CreateNamedServicePort(std::string name) {
952 return impl->CreateNamedServicePort(std::move(name)); 923 return impl->CreateNamedServicePort(std::move(name));
953} 924}
954 925
955void KernelCore::RegisterServerSession(KServerSession* server_session) {
956 std::lock_guard lk(impl->server_sessions_lock);
957 impl->server_sessions.insert(server_session);
958}
959
960void KernelCore::UnregisterServerSession(KServerSession* server_session) {
961 std::lock_guard lk(impl->server_sessions_lock);
962 impl->server_sessions.erase(server_session);
963}
964
965void KernelCore::RegisterKernelObject(KAutoObject* object) { 926void KernelCore::RegisterKernelObject(KAutoObject* object) {
966 std::lock_guard lk(impl->registered_objects_lock); 927 std::lock_guard lk(impl->registered_objects_lock);
967 impl->registered_objects.insert(object); 928 impl->registered_objects.insert(object);
@@ -1034,14 +995,6 @@ const KMemoryManager& KernelCore::MemoryManager() const {
1034 return *impl->memory_manager; 995 return *impl->memory_manager;
1035} 996}
1036 997
1037KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() {
1038 return *impl->user_slab_heap_pages;
1039}
1040
1041const KSlabHeap<Page>& KernelCore::GetUserSlabHeapPages() const {
1042 return *impl->user_slab_heap_pages;
1043}
1044
1045Kernel::KSharedMemory& KernelCore::GetHidSharedMem() { 998Kernel::KSharedMemory& KernelCore::GetHidSharedMem() {
1046 return *impl->hid_shared_mem; 999 return *impl->hid_shared_mem;
1047} 1000}
@@ -1135,6 +1088,10 @@ const KWorkerTaskManager& KernelCore::WorkerTaskManager() const {
1135 return impl->worker_task_manager; 1088 return impl->worker_task_manager;
1136} 1089}
1137 1090
1091const KMemoryLayout& KernelCore::MemoryLayout() const {
1092 return *impl->memory_layout;
1093}
1094
1138bool KernelCore::IsPhantomModeForSingleCore() const { 1095bool KernelCore::IsPhantomModeForSingleCore() const {
1139 return impl->IsPhantomModeForSingleCore(); 1096 return impl->IsPhantomModeForSingleCore();
1140} 1097}
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 0e04fc3bb..7087bbda6 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -41,7 +41,9 @@ class KClientSession;
41class KEvent; 41class KEvent;
42class KHandleTable; 42class KHandleTable;
43class KLinkedListNode; 43class KLinkedListNode;
44class KMemoryLayout;
44class KMemoryManager; 45class KMemoryManager;
46class KPageBuffer;
45class KPort; 47class KPort;
46class KProcess; 48class KProcess;
47class KResourceLimit; 49class KResourceLimit;
@@ -51,6 +53,7 @@ class KSession;
51class KSharedMemory; 53class KSharedMemory;
52class KSharedMemoryInfo; 54class KSharedMemoryInfo;
53class KThread; 55class KThread;
56class KThreadLocalPage;
54class KTransferMemory; 57class KTransferMemory;
55class KWorkerTaskManager; 58class KWorkerTaskManager;
56class KWritableEvent; 59class KWritableEvent;
@@ -193,14 +196,6 @@ public:
193 /// Opens a port to a service previously registered with RegisterNamedService. 196 /// Opens a port to a service previously registered with RegisterNamedService.
194 KClientPort* CreateNamedServicePort(std::string name); 197 KClientPort* CreateNamedServicePort(std::string name);
195 198
196 /// Registers a server session with the gobal emulation state, to be freed on shutdown. This is
197 /// necessary because we do not emulate processes for HLE sessions.
198 void RegisterServerSession(KServerSession* server_session);
199
200 /// Unregisters a server session previously registered with RegisterServerSession when it was
201 /// destroyed during the current emulation session.
202 void UnregisterServerSession(KServerSession* server_session);
203
204 /// Registers all kernel objects with the global emulation state, this is purely for tracking 199 /// Registers all kernel objects with the global emulation state, this is purely for tracking
205 /// leaks after emulation has been shutdown. 200 /// leaks after emulation has been shutdown.
206 void RegisterKernelObject(KAutoObject* object); 201 void RegisterKernelObject(KAutoObject* object);
@@ -238,12 +233,6 @@ public:
238 /// Gets the virtual memory manager for the kernel. 233 /// Gets the virtual memory manager for the kernel.
239 const KMemoryManager& MemoryManager() const; 234 const KMemoryManager& MemoryManager() const;
240 235
241 /// Gets the slab heap allocated for user space pages.
242 KSlabHeap<Page>& GetUserSlabHeapPages();
243
244 /// Gets the slab heap allocated for user space pages.
245 const KSlabHeap<Page>& GetUserSlabHeapPages() const;
246
247 /// Gets the shared memory object for HID services. 236 /// Gets the shared memory object for HID services.
248 Kernel::KSharedMemory& GetHidSharedMem(); 237 Kernel::KSharedMemory& GetHidSharedMem();
249 238
@@ -335,6 +324,10 @@ public:
335 return slab_heap_container->writeable_event; 324 return slab_heap_container->writeable_event;
336 } else if constexpr (std::is_same_v<T, KCodeMemory>) { 325 } else if constexpr (std::is_same_v<T, KCodeMemory>) {
337 return slab_heap_container->code_memory; 326 return slab_heap_container->code_memory;
327 } else if constexpr (std::is_same_v<T, KPageBuffer>) {
328 return slab_heap_container->page_buffer;
329 } else if constexpr (std::is_same_v<T, KThreadLocalPage>) {
330 return slab_heap_container->thread_local_page;
338 } 331 }
339 } 332 }
340 333
@@ -350,6 +343,9 @@ public:
350 /// Gets the current worker task manager, used for dispatching KThread/KProcess tasks. 343 /// Gets the current worker task manager, used for dispatching KThread/KProcess tasks.
351 const KWorkerTaskManager& WorkerTaskManager() const; 344 const KWorkerTaskManager& WorkerTaskManager() const;
352 345
346 /// Gets the memory layout.
347 const KMemoryLayout& MemoryLayout() const;
348
353private: 349private:
354 friend class KProcess; 350 friend class KProcess;
355 friend class KThread; 351 friend class KThread;
@@ -393,6 +389,8 @@ private:
393 KSlabHeap<KTransferMemory> transfer_memory; 389 KSlabHeap<KTransferMemory> transfer_memory;
394 KSlabHeap<KWritableEvent> writeable_event; 390 KSlabHeap<KWritableEvent> writeable_event;
395 KSlabHeap<KCodeMemory> code_memory; 391 KSlabHeap<KCodeMemory> code_memory;
392 KSlabHeap<KPageBuffer> page_buffer;
393 KSlabHeap<KThreadLocalPage> thread_local_page;
396 }; 394 };
397 395
398 std::unique_ptr<SlabHeapContainer> slab_heap_container; 396 std::unique_ptr<SlabHeapContainer> slab_heap_container;
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp
index 4eb3a5988..52d25b837 100644
--- a/src/core/hle/kernel/service_thread.cpp
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -49,12 +49,9 @@ ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std
49 return; 49 return;
50 } 50 }
51 51
52 // Allocate a dummy guest thread for this host thread.
52 kernel.RegisterHostThread(); 53 kernel.RegisterHostThread();
53 54
54 // Ensure the dummy thread allocated for this host thread is closed on exit.
55 auto* dummy_thread = kernel.GetCurrentEmuThread();
56 SCOPE_EXIT({ dummy_thread->Close(); });
57
58 while (true) { 55 while (true) {
59 std::function<void()> task; 56 std::function<void()> task;
60 57
diff --git a/src/core/hle/kernel/slab_helpers.h b/src/core/hle/kernel/slab_helpers.h
index f1c11256e..dc1e48fc9 100644
--- a/src/core/hle/kernel/slab_helpers.h
+++ b/src/core/hle/kernel/slab_helpers.h
@@ -59,7 +59,7 @@ class KAutoObjectWithSlabHeapAndContainer : public Base {
59 59
60private: 60private:
61 static Derived* Allocate(KernelCore& kernel) { 61 static Derived* Allocate(KernelCore& kernel) {
62 return kernel.SlabHeap<Derived>().AllocateWithKernel(kernel); 62 return kernel.SlabHeap<Derived>().Allocate(kernel);
63 } 63 }
64 64
65 static void Free(KernelCore& kernel, Derived* obj) { 65 static void Free(KernelCore& kernel, Derived* obj) {
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 365e22e4e..b2e9ec092 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -96,4 +96,6 @@ constexpr inline s32 IdealCoreNoUpdate = -3;
96constexpr inline s32 LowestThreadPriority = 63; 96constexpr inline s32 LowestThreadPriority = 63;
97constexpr inline s32 HighestThreadPriority = 0; 97constexpr inline s32 HighestThreadPriority = 0;
98 98
99constexpr inline size_t ThreadLocalRegionSize = 0x200;
100
99} // namespace Kernel::Svc 101} // namespace Kernel::Svc
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 2f8e21568..420de3c54 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -980,7 +980,7 @@ private:
980 LOG_DEBUG(Service_AM, "called"); 980 LOG_DEBUG(Service_AM, "called");
981 981
982 IPC::RequestParser rp{ctx}; 982 IPC::RequestParser rp{ctx};
983 applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>()); 983 applet->GetBroker().PushNormalDataFromGame(rp.PopIpcInterface<IStorage>().lock());
984 984
985 IPC::ResponseBuilder rb{ctx, 2}; 985 IPC::ResponseBuilder rb{ctx, 2};
986 rb.Push(ResultSuccess); 986 rb.Push(ResultSuccess);
@@ -1007,7 +1007,7 @@ private:
1007 LOG_DEBUG(Service_AM, "called"); 1007 LOG_DEBUG(Service_AM, "called");
1008 1008
1009 IPC::RequestParser rp{ctx}; 1009 IPC::RequestParser rp{ctx};
1010 applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>()); 1010 applet->GetBroker().PushInteractiveDataFromGame(rp.PopIpcInterface<IStorage>().lock());
1011 1011
1012 ASSERT(applet->IsInitialized()); 1012 ASSERT(applet->IsInitialized());
1013 applet->ExecuteInteractive(); 1013 applet->ExecuteInteractive();
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
index b8c2c6e51..ff0bbb788 100644
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -17,21 +17,12 @@ namespace Service::KernelHelpers {
17 17
18ServiceContext::ServiceContext(Core::System& system_, std::string name_) 18ServiceContext::ServiceContext(Core::System& system_, std::string name_)
19 : kernel(system_.Kernel()) { 19 : kernel(system_.Kernel()) {
20
21 // Create a resource limit for the process.
22 const auto physical_memory_size =
23 kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::System);
24 auto* resource_limit = Kernel::CreateResourceLimitForProcess(system_, physical_memory_size);
25
26 // Create the process. 20 // Create the process.
27 process = Kernel::KProcess::Create(kernel); 21 process = Kernel::KProcess::Create(kernel);
28 ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_), 22 ASSERT(Kernel::KProcess::Initialize(process, system_, std::move(name_),
29 Kernel::KProcess::ProcessType::KernelInternal, 23 Kernel::KProcess::ProcessType::KernelInternal,
30 resource_limit) 24 kernel.GetSystemResourceLimit())
31 .IsSuccess()); 25 .IsSuccess());
32
33 // Close reference to our resource limit, as the process opens one.
34 resource_limit->Close();
35} 26}
36 27
37ServiceContext::~ServiceContext() { 28ServiceContext::~ServiceContext() {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 9fc7bb1b1..099276420 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -288,7 +288,7 @@ public:
288 } 288 }
289 289
290 bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const { 290 bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const {
291 constexpr std::size_t padding_size{4 * Kernel::PageSize}; 291 const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize};
292 const auto start_info{page_table.QueryInfo(start - 1)}; 292 const auto start_info{page_table.QueryInfo(start - 1)};
293 293
294 if (start_info.state != Kernel::KMemoryState::Free) { 294 if (start_info.state != Kernel::KMemoryState::Free) {
@@ -308,31 +308,69 @@ public:
308 return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); 308 return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize());
309 } 309 }
310 310
311 VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const { 311 ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) {
312 VAddr addr{}; 312 size = Common::AlignUp(size, Kernel::PageSize);
313 const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >> 313 size += page_table.GetNumGuardPages() * Kernel::PageSize * 4;
314 Kernel::PageBits}; 314
315 do { 315 const auto is_region_available = [&](VAddr addr) {
316 addr = page_table.GetAliasCodeRegionStart() + 316 const auto end_addr = addr + size;
317 (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits); 317 while (addr < end_addr) {
318 } while (!page_table.IsInsideAddressSpace(addr, size) || 318 if (system.Memory().IsValidVirtualAddress(addr)) {
319 page_table.IsInsideHeapRegion(addr, size) || 319 return false;
320 page_table.IsInsideAliasRegion(addr, size)); 320 }
321 return addr; 321
322 if (!page_table.IsInsideAddressSpace(out_addr, size)) {
323 return false;
324 }
325
326 if (page_table.IsInsideHeapRegion(out_addr, size)) {
327 return false;
328 }
329
330 if (page_table.IsInsideAliasRegion(out_addr, size)) {
331 return false;
332 }
333
334 addr += Kernel::PageSize;
335 }
336 return true;
337 };
338
339 bool succeeded = false;
340 const auto map_region_end =
341 page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize();
342 while (current_map_addr < map_region_end) {
343 if (is_region_available(current_map_addr)) {
344 succeeded = true;
345 break;
346 }
347 current_map_addr += 0x100000;
348 }
349
350 if (!succeeded) {
351 UNREACHABLE_MSG("Out of address space!");
352 return Kernel::ResultOutOfMemory;
353 }
354
355 out_addr = current_map_addr;
356 current_map_addr += size;
357
358 return ResultSuccess;
322 } 359 }
323 360
324 ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress, 361 ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) {
325 u64 size) const { 362 auto& page_table{process->PageTable()};
363 VAddr addr{};
364
326 for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { 365 for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
327 auto& page_table{process->PageTable()}; 366 R_TRY(GetAvailableMapRegion(page_table, size, addr));
328 const VAddr addr{GetRandomMapRegion(page_table, size)};
329 const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)};
330 367
368 const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)};
331 if (result == Kernel::ResultInvalidCurrentMemory) { 369 if (result == Kernel::ResultInvalidCurrentMemory) {
332 continue; 370 continue;
333 } 371 }
334 372
335 CASCADE_CODE(result); 373 R_TRY(result);
336 374
337 if (ValidateRegionForMap(page_table, addr, size)) { 375 if (ValidateRegionForMap(page_table, addr, size)) {
338 return addr; 376 return addr;
@@ -343,7 +381,7 @@ public:
343 } 381 }
344 382
345 ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size, 383 ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size,
346 VAddr bss_addr, std::size_t bss_size, std::size_t size) const { 384 VAddr bss_addr, std::size_t bss_size, std::size_t size) {
347 for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { 385 for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) {
348 auto& page_table{process->PageTable()}; 386 auto& page_table{process->PageTable()};
349 VAddr addr{}; 387 VAddr addr{};
@@ -597,6 +635,7 @@ public:
597 LOG_WARNING(Service_LDR, "(STUBBED) called"); 635 LOG_WARNING(Service_LDR, "(STUBBED) called");
598 636
599 initialized = true; 637 initialized = true;
638 current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart();
600 639
601 IPC::ResponseBuilder rb{ctx, 2}; 640 IPC::ResponseBuilder rb{ctx, 2};
602 rb.Push(ResultSuccess); 641 rb.Push(ResultSuccess);
@@ -607,6 +646,7 @@ private:
607 646
608 std::map<VAddr, NROInfo> nro; 647 std::map<VAddr, NROInfo> nro;
609 std::map<VAddr, std::vector<SHA256Hash>> nrr; 648 std::map<VAddr, std::vector<SHA256Hash>> nrr;
649 VAddr current_map_addr{};
610 650
611 bool IsValidNROHash(const SHA256Hash& hash) const { 651 bool IsValidNROHash(const SHA256Hash& hash) const {
612 return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) { 652 return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) {
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index eaa172595..695a1faa6 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -81,6 +81,8 @@ ResultVal<Kernel::KPort*> ServiceManager::GetServicePort(const std::string& name
81 } 81 }
82 82
83 auto* port = Kernel::KPort::Create(kernel); 83 auto* port = Kernel::KPort::Create(kernel);
84 SCOPE_EXIT({ port->Close(); });
85
84 port->Initialize(ServerSessionCountMax, false, name); 86 port->Initialize(ServerSessionCountMax, false, name);
85 auto handler = it->second; 87 auto handler = it->second;
86 port->GetServerPort().SetSessionHandler(std::move(handler)); 88 port->GetServerPort().SetSessionHandler(std::move(handler));
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index b412957c7..2b360e073 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS =
22struct RescalingLayout { 22struct RescalingLayout {
23 alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; 23 alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures;
24 alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; 24 alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images;
25 alignas(16) u32 down_factor; 25 u32 down_factor;
26}; 26};
27constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); 27constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures);
28constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); 28constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
index e0fe47912..f3c7ceb57 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -13,59 +13,535 @@ namespace {
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) 13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, 14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) { 15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)}; 16 switch (ttbl) {
17 const IR::U32 not_a{ir.BitwiseNot(a)}; 17 // generated code, do not edit manually
18 const IR::U32 not_b{ir.BitwiseNot(b)}; 18 case 0:
19 const IR::U32 not_c{ir.BitwiseNot(c)}; 19 return ir.Imm32(0);
20 if (ttbl & 0x01) { 20 case 1:
21 // r |= ~a & ~b & ~c; 21 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)}; 22 case 2:
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)}; 23 return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
24 r = ir.BitwiseOr(r, rhs); 24 case 3:
25 return ir.BitwiseNot(ir.BitwiseOr(a, b));
26 case 4:
27 return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
28 case 5:
29 return ir.BitwiseNot(ir.BitwiseOr(a, c));
30 case 6:
31 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
32 case 7:
33 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
34 case 8:
35 return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
36 case 9:
37 return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
38 case 10:
39 return ir.BitwiseAnd(c, ir.BitwiseNot(a));
40 case 11:
41 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
42 case 12:
43 return ir.BitwiseAnd(b, ir.BitwiseNot(a));
44 case 13:
45 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
46 case 14:
47 return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
48 case 15:
49 return ir.BitwiseNot(a);
50 case 16:
51 return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
52 case 17:
53 return ir.BitwiseNot(ir.BitwiseOr(b, c));
54 case 18:
55 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
56 case 19:
57 return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
58 case 20:
59 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
60 case 21:
61 return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
62 case 22:
63 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
64 case 23:
65 return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
66 ir.BitwiseNot(a));
67 case 24:
68 return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
69 case 25:
70 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
71 case 26:
72 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
73 case 27:
74 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
75 case 28:
76 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
77 case 29:
78 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
79 case 30:
80 return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
81 case 31:
82 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
83 case 32:
84 return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
85 case 33:
86 return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
87 case 34:
88 return ir.BitwiseAnd(c, ir.BitwiseNot(b));
89 case 35:
90 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
91 case 36:
92 return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
93 case 37:
94 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
95 case 38:
96 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
97 case 39:
98 return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
99 case 40:
100 return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
101 case 41:
102 return ir.BitwiseXor(ir.BitwiseOr(a, b),
103 ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
104 case 42:
105 return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
106 case 43:
107 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
108 ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
109 case 44:
110 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
111 case 45:
112 return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
113 case 46:
114 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
115 case 47:
116 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
117 case 48:
118 return ir.BitwiseAnd(a, ir.BitwiseNot(b));
119 case 49:
120 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
121 case 50:
122 return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
123 case 51:
124 return ir.BitwiseNot(b);
125 case 52:
126 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
127 case 53:
128 return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
129 case 54:
130 return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
131 case 55:
132 return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
133 case 56:
134 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
135 case 57:
136 return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
137 case 58:
138 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
139 case 59:
140 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
141 case 60:
142 return ir.BitwiseXor(a, b);
143 case 61:
144 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
145 case 62:
146 return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
147 case 63:
148 return ir.BitwiseNot(ir.BitwiseAnd(a, b));
149 case 64:
150 return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
151 case 65:
152 return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
153 case 66:
154 return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
155 case 67:
156 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
157 case 68:
158 return ir.BitwiseAnd(b, ir.BitwiseNot(c));
159 case 69:
160 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
161 case 70:
162 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
163 case 71:
164 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
165 case 72:
166 return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
167 case 73:
168 return ir.BitwiseXor(ir.BitwiseOr(a, c),
169 ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
170 case 74:
171 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
172 case 75:
173 return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
174 case 76:
175 return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
176 case 77:
177 return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
178 ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
179 case 78:
180 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
181 case 79:
182 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
183 case 80:
184 return ir.BitwiseAnd(a, ir.BitwiseNot(c));
185 case 81:
186 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
187 case 82:
188 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
189 case 83:
190 return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
191 case 84:
192 return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
193 case 85:
194 return ir.BitwiseNot(c);
195 case 86:
196 return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
197 case 87:
198 return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
199 case 88:
200 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
201 case 89:
202 return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
203 case 90:
204 return ir.BitwiseXor(a, c);
205 case 91:
206 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
207 case 92:
208 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
209 case 93:
210 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
211 case 94:
212 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
213 case 95:
214 return ir.BitwiseNot(ir.BitwiseAnd(a, c));
215 case 96:
216 return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
217 case 97:
218 return ir.BitwiseXor(ir.BitwiseOr(b, c),
219 ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
220 case 98:
221 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
222 case 99:
223 return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
224 case 100:
225 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
226 case 101:
227 return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
228 case 102:
229 return ir.BitwiseXor(b, c);
230 case 103:
231 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
232 case 104:
233 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
234 case 105:
235 return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
236 case 106:
237 return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
238 case 107:
239 return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
240 ir.BitwiseXor(a, ir.BitwiseNot(b)));
241 case 108:
242 return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
243 case 109:
244 return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
245 ir.BitwiseXor(a, ir.BitwiseNot(c)));
246 case 110:
247 return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
248 case 111:
249 return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
250 case 112:
251 return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
252 case 113:
253 return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
254 ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
255 case 114:
256 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
257 case 115:
258 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
259 case 116:
260 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
261 case 117:
262 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
263 case 118:
264 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
265 case 119:
266 return ir.BitwiseNot(ir.BitwiseAnd(b, c));
267 case 120:
268 return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
269 case 121:
270 return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
271 ir.BitwiseXor(b, ir.BitwiseNot(c)));
272 case 122:
273 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
274 case 123:
275 return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
276 case 124:
277 return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
278 case 125:
279 return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
280 case 126:
281 return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
282 case 127:
283 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
284 case 128:
285 return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
286 case 129:
287 return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
288 case 130:
289 return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
290 case 131:
291 return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
292 case 132:
293 return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
294 case 133:
295 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
296 case 134:
297 return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
298 case 135:
299 return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
300 case 136:
301 return ir.BitwiseAnd(b, c);
302 case 137:
303 return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
304 case 138:
305 return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
306 case 139:
307 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
308 case 140:
309 return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
310 case 141:
311 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
312 case 142:
313 return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
314 case 143:
315 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
316 case 144:
317 return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
318 case 145:
319 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
320 case 146:
321 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
322 case 147:
323 return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
324 case 148:
325 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
326 case 149:
327 return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
328 case 150:
329 return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
330 case 151:
331 return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
332 ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
333 case 152:
334 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
335 case 153:
336 return ir.BitwiseXor(b, ir.BitwiseNot(c));
337 case 154:
338 return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
339 case 155:
340 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
341 case 156:
342 return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
343 case 157:
344 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
345 case 158:
346 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
347 case 159:
348 return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
349 case 160:
350 return ir.BitwiseAnd(a, c);
351 case 161:
352 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
353 case 162:
354 return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
355 case 163:
356 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
357 case 164:
358 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
359 case 165:
360 return ir.BitwiseXor(a, ir.BitwiseNot(c));
361 case 166:
362 return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
363 case 167:
364 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
365 case 168:
366 return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
367 case 169:
368 return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
369 case 170:
370 return c;
371 case 171:
372 return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
373 case 172:
374 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
375 case 173:
376 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
377 case 174:
378 return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
379 case 175:
380 return ir.BitwiseOr(c, ir.BitwiseNot(a));
381 case 176:
382 return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
383 case 177:
384 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
385 case 178:
386 return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
387 case 179:
388 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
389 case 180:
390 return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
391 case 181:
392 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
393 case 182:
394 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
395 case 183:
396 return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
397 case 184:
398 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
399 case 185:
400 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
401 case 186:
402 return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
403 case 187:
404 return ir.BitwiseOr(c, ir.BitwiseNot(b));
405 case 188:
406 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
407 case 189:
408 return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
409 case 190:
410 return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
411 case 191:
412 return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
413 case 192:
414 return ir.BitwiseAnd(a, b);
415 case 193:
416 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
417 case 194:
418 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
419 case 195:
420 return ir.BitwiseXor(a, ir.BitwiseNot(b));
421 case 196:
422 return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
423 case 197:
424 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
425 case 198:
426 return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
427 case 199:
428 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
429 case 200:
430 return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
431 case 201:
432 return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
433 case 202:
434 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
435 case 203:
436 return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
437 case 204:
438 return b;
439 case 205:
440 return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
441 case 206:
442 return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
443 case 207:
444 return ir.BitwiseOr(b, ir.BitwiseNot(a));
445 case 208:
446 return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
447 case 209:
448 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
449 case 210:
450 return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
451 case 211:
452 return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
453 case 212:
454 return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
455 case 213:
456 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
457 case 214:
458 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
459 case 215:
460 return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
461 case 216:
462 return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
463 case 217:
464 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
465 case 218:
466 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
467 case 219:
468 return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
469 case 220:
470 return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
471 case 221:
472 return ir.BitwiseOr(b, ir.BitwiseNot(c));
473 case 222:
474 return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
475 case 223:
476 return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
477 case 224:
478 return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
479 case 225:
480 return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
481 case 226:
482 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
483 case 227:
484 return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
485 case 228:
486 return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
487 case 229:
488 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
489 case 230:
490 return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
491 case 231:
492 return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
493 case 232:
494 return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
495 case 233:
496 return ir.BitwiseOr(ir.BitwiseAnd(a, b),
497 ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
498 case 234:
499 return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
500 case 235:
501 return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
502 case 236:
503 return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
504 case 237:
505 return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
506 case 238:
507 return ir.BitwiseOr(b, c);
508 case 239:
509 return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
510 case 240:
511 return a;
512 case 241:
513 return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
514 case 242:
515 return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
516 case 243:
517 return ir.BitwiseOr(a, ir.BitwiseNot(b));
518 case 244:
519 return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
520 case 245:
521 return ir.BitwiseOr(a, ir.BitwiseNot(c));
522 case 246:
523 return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
524 case 247:
525 return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
526 case 248:
527 return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
528 case 249:
529 return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
530 case 250:
531 return ir.BitwiseOr(a, c);
532 case 251:
533 return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
534 case 252:
535 return ir.BitwiseOr(a, b);
536 case 253:
537 return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
538 case 254:
539 return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
540 case 255:
541 return ir.Imm32(0xFFFFFFFF);
542 // end of generated code
25 } 543 }
26 if (ttbl & 0x02) { 544 throw NotImplementedException("LOP3 with out of range ttbl");
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69} 545}
70 546
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { 547IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
1# Copyright © 2022 degasus <markus@selfnet.de>
2# This work is free. You can redistribute it and/or modify it under the
3# terms of the Do What The Fuck You Want To Public License, Version 2,
4# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
5
6from itertools import product
7
8# The primitive instructions
9OPS = {
10 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
11 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
12 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
13 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
14}
15
16# Our database of combination of instructions
17optimized_calls = {}
18def cmp(lhs, rhs):
19 if lhs is None: # new entry
20 return True
21 if lhs[3] > rhs[3]: # costs
22 return True
23 if lhs[3] < rhs[3]: # costs
24 return False
25 if len(lhs[0]) > len(rhs[0]): # string len
26 return True
27 if len(lhs[0]) < len(rhs[0]): # string len
28 return False
29 if lhs[0] > rhs[0]: # string sorting
30 return True
31 if lhs[0] < rhs[0]: # string sorting
32 return False
33 assert lhs == rhs, "redundant instruction, bug in brute force"
34 return False
35def register(imm, instruction, count, latency):
36 # Use the sum of instruction count and latency as costs to evaluate which combination is best
37 costs = count + latency
38
39 old = optimized_calls.get(imm, None)
40 new = (instruction, count, latency, costs)
41
42 # Update if new or better
43 if cmp(old, new):
44 optimized_calls[imm] = new
45 return True
46
47 return False
48
49# Constants: 0, 1 (for free)
50register(0, 'ir.Imm32(0)', 0, 0)
51register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
52
53# Inputs: a, b, c (for free)
54ta = 0xF0
55tb = 0xCC
56tc = 0xAA
57inputs = {
58 ta : 'a',
59 tb : 'b',
60 tc : 'c',
61}
62for imm, instruction in inputs.items():
63 register(imm, instruction, 0, 0)
64 register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
65
66# Try to combine two values from the db with an instruction.
67# If it is better than the old method, update it.
68while True:
69 registered = 0
70 calls_copy = optimized_calls.copy()
71 for OP, (argc, cost, f) in OPS.items():
72 for args in product(calls_copy.items(), repeat=argc):
73 # unpack(transponse) the arrays
74 imm = [arg[0] for arg in args]
75 value = [arg[1][0] for arg in args]
76 count = [arg[1][1] for arg in args]
77 latency = [arg[1][2] for arg in args]
78
79 registered += register(
80 f(*imm),
81 OP.format(*value),
82 sum(count) + cost,
83 max(latency) + cost)
84 if registered == 0:
85 # No update at all? So terminate
86 break
87
88# Hacky output. Please improve me to output valid C++ instead.
89s = """ case {imm}:
90 return {op};"""
91for imm in range(256):
92 print(s.format(imm=imm, op=optimized_calls[imm][0]))
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 248ad3ced..b22725584 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
212 } 212 }
213 Optimization::SsaRewritePass(program); 213 Optimization::SsaRewritePass(program);
214 214
215 Optimization::ConstantPropagationPass(program);
216
215 Optimization::GlobalMemoryToStorageBufferPass(program); 217 Optimization::GlobalMemoryToStorageBufferPass(program);
216 Optimization::TexturePass(env, program); 218 Optimization::TexturePass(env, program);
217 219
218 Optimization::ConstantPropagationPass(program);
219
220 if (Settings::values.resolution_info.active) { 220 if (Settings::values.resolution_info.active) {
221 Optimization::RescalingPass(program); 221 Optimization::RescalingPass(program);
222 } 222 }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 38592afd0..ddf497e32 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
334/// Tries to track the storage buffer address used by a global memory instruction 334/// Tries to track the storage buffer address used by a global memory instruction
335std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { 335std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
336 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { 336 const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
337 if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { 337 if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
338 inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
338 return std::nullopt; 339 return std::nullopt;
339 } 340 }
340 const IR::Value index{inst->Arg(0)}; 341 const IR::Value index{inst->Arg(0)};
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
index c28500dd1..496d4667e 100644
--- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp
+++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp
@@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s
183 } 183 }
184} 184}
185 185
186void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
187 size_t index) {
188 const IR::Value composite{inst.Arg(index)};
189 if (composite.IsEmpty()) {
190 return;
191 }
192 const auto info{inst.Flags<IR::TextureInstInfo>()};
193 const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
194 const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
195 switch (info.type) {
196 case TextureType::ColorArray2D:
197 case TextureType::Color2D:
198 inst.SetArg(index, ir.CompositeConstruct(x, y));
199 break;
200 case TextureType::Color1D:
201 case TextureType::ColorArray1D:
202 case TextureType::Color3D:
203 case TextureType::ColorCube:
204 case TextureType::ColorArrayCube:
205 case TextureType::Buffer:
206 // Nothing to patch here
207 break;
208 }
209}
210
186void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { 211void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
187 const auto info{inst.Flags<IR::TextureInstInfo>()}; 212 const auto info{inst.Flags<IR::TextureInstInfo>()};
188 const IR::Value coord{inst.Arg(1)}; 213 const IR::Value coord{inst.Arg(1)};
@@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
220 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; 245 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
221 SubScaleCoord(ir, inst, is_scaled); 246 SubScaleCoord(ir, inst, is_scaled);
222 // Scale ImageFetch offset 247 // Scale ImageFetch offset
223 ScaleIntegerComposite(ir, inst, is_scaled, 2); 248 ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
224} 249}
225 250
226void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { 251void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
@@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
242 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; 267 const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
243 ScaleIntegerComposite(ir, inst, is_scaled, 1); 268 ScaleIntegerComposite(ir, inst, is_scaled, 1);
244 // Scale ImageFetch offset 269 // Scale ImageFetch offset
245 ScaleIntegerComposite(ir, inst, is_scaled, 2); 270 ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2);
246} 271}
247 272
248void PatchImageRead(IR::Block& block, IR::Inst& inst) { 273void PatchImageRead(IR::Block& block, IR::Inst& inst) {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..54a902f56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/dirty_flags.h"
10#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
11#include "video_core/gpu.h" 12#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
@@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
195 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: 196 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
196 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: 197 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
197 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: 198 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
198 return StartCBData(method); 199 return ProcessCBData(argument);
199 case MAXWELL3D_REG_INDEX(cb_bind[0]): 200 case MAXWELL3D_REG_INDEX(cb_bind[0]):
200 return ProcessCBBind(0); 201 return ProcessCBBind(0);
201 case MAXWELL3D_REG_INDEX(cb_bind[1]): 202 case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
208 return ProcessCBBind(4); 209 return ProcessCBBind(4);
209 case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): 210 case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
210 return DrawArrays(); 211 return DrawArrays();
212 case MAXWELL3D_REG_INDEX(small_index):
213 regs.index_array.count = regs.small_index.count;
214 regs.index_array.first = regs.small_index.first;
215 dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
216 return DrawArrays();
217 case MAXWELL3D_REG_INDEX(topology_override):
218 use_topology_override = true;
219 return;
211 case MAXWELL3D_REG_INDEX(clear_buffers): 220 case MAXWELL3D_REG_INDEX(clear_buffers):
212 return ProcessClearBuffers(); 221 return ProcessClearBuffers();
213 case MAXWELL3D_REG_INDEX(query.query_get): 222 case MAXWELL3D_REG_INDEX(query.query_get):
@@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
248} 257}
249 258
250void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 259void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
251 if (method == cb_data_state.current) {
252 regs.reg_array[method] = method_argument;
253 ProcessCBData(method_argument);
254 return;
255 } else if (cb_data_state.current != null_cb_data) {
256 FinishCBData();
257 }
258
259 // It is an error to write to a register other than the current macro's ARG register before it 260 // It is an error to write to a register other than the current macro's ARG register before it
260 // has finished execution. 261 // has finished execution.
261 if (executing_macro != 0) { 262 if (executing_macro != 0) {
@@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
302 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: 303 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
303 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: 304 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
304 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: 305 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
305 ProcessCBMultiData(method, base_start, amount); 306 ProcessCBMultiData(base_start, amount);
306 break; 307 break;
307 default: 308 default:
308 for (std::size_t i = 0; i < amount; i++) { 309 for (std::size_t i = 0; i < amount; i++) {
@@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
360 } 361 }
361} 362}
362 363
364void Maxwell3D::ProcessTopologyOverride() {
365 using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
366 using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
367
368 PrimitiveTopology topology{};
369
370 switch (regs.topology_override) {
371 case PrimitiveTopologyOverride::None:
372 topology = regs.draw.topology;
373 break;
374 case PrimitiveTopologyOverride::Points:
375 topology = PrimitiveTopology::Points;
376 break;
377 case PrimitiveTopologyOverride::Lines:
378 topology = PrimitiveTopology::Lines;
379 break;
380 case PrimitiveTopologyOverride::LineStrip:
381 topology = PrimitiveTopology::LineStrip;
382 break;
383 default:
384 topology = static_cast<PrimitiveTopology>(regs.topology_override);
385 break;
386 }
387
388 if (use_topology_override) {
389 regs.draw.topology.Assign(topology);
390 }
391}
392
363void Maxwell3D::FlushMMEInlineDraw() { 393void Maxwell3D::FlushMMEInlineDraw() {
364 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), 394 LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
365 regs.vertex_buffer.count); 395 regs.vertex_buffer.count);
@@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
370 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, 400 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
371 "Illegal combination of instancing parameters"); 401 "Illegal combination of instancing parameters");
372 402
403 ProcessTopologyOverride();
404
373 const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; 405 const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
374 if (ShouldExecute()) { 406 if (ShouldExecute()) {
375 rasterizer->Draw(is_indexed, true); 407 rasterizer->Draw(is_indexed, true);
@@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() {
529 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, 561 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
530 "Illegal combination of instancing parameters"); 562 "Illegal combination of instancing parameters");
531 563
564 ProcessTopologyOverride();
565
532 if (regs.draw.instance_next) { 566 if (regs.draw.instance_next) {
533 // Increment the current instance *before* drawing. 567 // Increment the current instance *before* drawing.
534 state.current_instance += 1; 568 state.current_instance += 1;
@@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
587 rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); 621 rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
588} 622}
589 623
590void Maxwell3D::ProcessCBData(u32 value) { 624void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
591 const u32 id = cb_data_state.id;
592 cb_data_state.buffer[id][cb_data_state.counter] = value;
593 // Increment the current buffer position.
594 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
595 cb_data_state.counter++;
596}
597
598void Maxwell3D::StartCBData(u32 method) {
599 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
600 cb_data_state.start_pos = regs.const_buffer.cb_pos;
601 cb_data_state.id = method - first_cb_data;
602 cb_data_state.current = method;
603 cb_data_state.counter = 0;
604 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
605}
606
607void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
608 if (cb_data_state.current != method) {
609 if (cb_data_state.current != null_cb_data) {
610 FinishCBData();
611 }
612 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
613 cb_data_state.start_pos = regs.const_buffer.cb_pos;
614 cb_data_state.id = method - first_cb_data;
615 cb_data_state.current = method;
616 cb_data_state.counter = 0;
617 }
618 const std::size_t id = cb_data_state.id;
619 const std::size_t size = amount;
620 std::size_t i = 0;
621 for (; i < size; i++) {
622 cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
623 cb_data_state.counter++;
624 }
625 // Increment the current buffer position.
626 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
627}
628
629void Maxwell3D::FinishCBData() {
630 // Write the input value to the current const buffer at the current position. 625 // Write the input value to the current const buffer at the current position.
631 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 626 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
632 ASSERT(buffer_address != 0); 627 ASSERT(buffer_address != 0);
@@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() {
634 // Don't allow writing past the end of the buffer. 629 // Don't allow writing past the end of the buffer.
635 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); 630 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
636 631
637 const GPUVAddr address{buffer_address + cb_data_state.start_pos}; 632 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
638 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; 633 const size_t copy_size = amount * sizeof(u32);
634 memory_manager.WriteBlock(address, start_base, copy_size);
639 635
640 const u32 id = cb_data_state.id; 636 // Increment the current buffer position.
641 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); 637 regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
638}
642 639
643 cb_data_state.id = null_cb_data; 640void Maxwell3D::ProcessCBData(u32 value) {
644 cb_data_state.current = null_cb_data; 641 ProcessCBMultiData(&value, 1);
645} 642}
646 643
647Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 644Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..357a74c70 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -367,6 +367,22 @@ public:
367 Patches = 0xe, 367 Patches = 0xe,
368 }; 368 };
369 369
370 // Constants as from NVC0_3D_UNK1970_D3D
371 // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
372 enum class PrimitiveTopologyOverride : u32 {
373 None = 0x0,
374 Points = 0x1,
375 Lines = 0x2,
376 LineStrip = 0x3,
377 Triangles = 0x4,
378 TriangleStrip = 0x5,
379 LinesAdjacency = 0xa,
380 LineStripAdjacency = 0xb,
381 TrianglesAdjacency = 0xc,
382 TriangleStripAdjacency = 0xd,
383 Patches = 0xe,
384 };
385
370 enum class IndexFormat : u32 { 386 enum class IndexFormat : u32 {
371 UnsignedByte = 0x0, 387 UnsignedByte = 0x0,
372 UnsignedShort = 0x1, 388 UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
1200 } 1216 }
1201 } index_array; 1217 } index_array;
1202 1218
1203 INSERT_PADDING_WORDS_NOINIT(0x7); 1219 union {
1220 BitField<0, 16, u32> first;
1221 BitField<16, 16, u32> count;
1222 } small_index;
1223
1224 INSERT_PADDING_WORDS_NOINIT(0x6);
1204 1225
1205 INSERT_PADDING_WORDS_NOINIT(0x1F); 1226 INSERT_PADDING_WORDS_NOINIT(0x1F);
1206 1227
@@ -1244,7 +1265,11 @@ public:
1244 BitField<11, 1, u32> depth_clamp_disabled; 1265 BitField<11, 1, u32> depth_clamp_disabled;
1245 } view_volume_clip_control; 1266 } view_volume_clip_control;
1246 1267
1247 INSERT_PADDING_WORDS_NOINIT(0x1F); 1268 INSERT_PADDING_WORDS_NOINIT(0xC);
1269
1270 PrimitiveTopologyOverride topology_override;
1271
1272 INSERT_PADDING_WORDS_NOINIT(0x12);
1248 1273
1249 u32 depth_bounds_enable; 1274 u32 depth_bounds_enable;
1250 1275
@@ -1520,10 +1545,8 @@ private:
1520 void ProcessSyncPoint(); 1545 void ProcessSyncPoint();
1521 1546
1522 /// Handles a write to the CB_DATA[i] register. 1547 /// Handles a write to the CB_DATA[i] register.
1523 void StartCBData(u32 method);
1524 void ProcessCBData(u32 value); 1548 void ProcessCBData(u32 value);
1525 void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount); 1549 void ProcessCBMultiData(const u32* start_base, u32 amount);
1526 void FinishCBData();
1527 1550
1528 /// Handles a write to the CB_BIND register. 1551 /// Handles a write to the CB_BIND register.
1529 void ProcessCBBind(size_t stage_index); 1552 void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1554,9 @@ private:
1531 /// Handles a write to the VERTEX_END_GL register, triggering a draw. 1554 /// Handles a write to the VERTEX_END_GL register, triggering a draw.
1532 void DrawArrays(); 1555 void DrawArrays();
1533 1556
1557 /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
1558 void ProcessTopologyOverride();
1559
1534 // Handles a instance drawcall from MME 1560 // Handles a instance drawcall from MME
1535 void StepInstance(MMEDrawMode expected_mode, u32 count); 1561 void StepInstance(MMEDrawMode expected_mode, u32 count);
1536 1562
@@ -1555,20 +1581,10 @@ private:
1555 /// Interpreter for the macro codes uploaded to the GPU. 1581 /// Interpreter for the macro codes uploaded to the GPU.
1556 std::unique_ptr<MacroEngine> macro_engine; 1582 std::unique_ptr<MacroEngine> macro_engine;
1557 1583
1558 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1559 struct CBDataState {
1560 static constexpr size_t inline_size = 0x4000;
1561 std::array<std::array<u32, inline_size>, 16> buffer;
1562 u32 current{null_cb_data};
1563 u32 id{null_cb_data};
1564 u32 start_pos{};
1565 u32 counter{};
1566 };
1567 CBDataState cb_data_state;
1568
1569 Upload::State upload_state; 1584 Upload::State upload_state;
1570 1585
1571 bool execute_on{true}; 1586 bool execute_on{true};
1587 bool use_topology_override{false};
1572}; 1588};
1573 1589
1574#define ASSERT_REG_POSITION(field_name, position) \ 1590#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585);
1685ASSERT_REG_POSITION(primitive_restart, 0x591); 1701ASSERT_REG_POSITION(primitive_restart, 0x591);
1686ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); 1702ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
1687ASSERT_REG_POSITION(index_array, 0x5F2); 1703ASSERT_REG_POSITION(index_array, 0x5F2);
1704ASSERT_REG_POSITION(small_index, 0x5F9);
1688ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1705ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1689ASSERT_REG_POSITION(instanced_arrays, 0x620); 1706ASSERT_REG_POSITION(instanced_arrays, 0x620);
1690ASSERT_REG_POSITION(vp_point_size, 0x644); 1707ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
1694ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1711ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1695ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1712ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
1696ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); 1713ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
1714ASSERT_REG_POSITION(topology_override, 0x65C);
1697ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); 1715ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
1698ASSERT_REG_POSITION(logic_op, 0x671); 1716ASSERT_REG_POSITION(logic_op, 0x671);
1699ASSERT_REG_POSITION(clear_buffers, 0x674); 1717ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
53 53
54 // TODO(Subv): Perform more research and implement all features of this engine. 54 // TODO(Subv): Perform more research and implement all features of this engine.
55 const LaunchDMA& launch = regs.launch_dma; 55 const LaunchDMA& launch = regs.launch_dma;
56 ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
57 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); 56 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
58 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); 57 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
59 ASSERT(regs.dst_params.origin.x == 0); 58 ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
79 CopyPitchToBlockLinear(); 78 CopyPitchToBlockLinear();
80 } 79 }
81 } 80 }
81 ReleaseSemaphore();
82} 82}
83 83
84void MaxwellDMA::CopyPitchToPitch() { 84void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
244 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 244 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
245} 245}
246 246
247void MaxwellDMA::ReleaseSemaphore() {
248 const auto type = regs.launch_dma.semaphore_type;
249 const GPUVAddr address = regs.semaphore.address;
250 switch (type) {
251 case LaunchDMA::SemaphoreType::NONE:
252 break;
253 case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
254 memory_manager.Write<u32>(address, regs.semaphore.payload);
255 break;
256 case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
257 memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
258 memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
259 break;
260 default:
261 UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
262 }
263}
264
247} // namespace Tegra::Engines 265} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..2692cac8a 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -224,6 +224,8 @@ private:
224 224
225 void FastCopyBlockLinearToPitch(); 225 void FastCopyBlockLinearToPitch();
226 226
227 void ReleaseSemaphore();
228
227 Core::System& system; 229 Core::System& system;
228 230
229 MemoryManager& memory_manager; 231 MemoryManager& memory_manager;
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 151290101..293ad7d59 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const {
31 return true; 31 return true;
32 } 32 }
33 ASSERT(sync_object.handle != 0); 33 ASSERT(sync_object.handle != 0);
34 GLsizei length;
35 GLint sync_status; 34 GLint sync_status;
36 glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); 35 glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
37 return sync_status == GL_SIGNALED; 36 return sync_status == GL_SIGNALED;
38} 37}
39 38
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..9e6732abd 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
243 case Settings::ShaderBackend::GLASM: 243 case Settings::ShaderBackend::GLASM:
244 if (!sources[stage].empty()) { 244 if (!sources[stage].empty()) {
245 assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); 245 assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
246 if (in_parallel) {
247 // Make sure program is built before continuing when building in parallel
248 glGetString(GL_PROGRAM_ERROR_STRING_NV);
249 }
250 } 246 }
251 break; 247 break;
252 case Settings::ShaderBackend::SPIRV: 248 case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
256 break; 252 break;
257 } 253 }
258 } 254 }
259 if (in_parallel && backend != Settings::ShaderBackend::GLASM) { 255 if (in_parallel) {
260 // Make sure programs have built if we are building shaders in parallel 256 std::lock_guard lock{built_mutex};
261 for (OGLProgram& program : source_programs) { 257 built_fence.Create();
262 if (program.handle != 0) { 258 // Flush this context to ensure compilation commands and fence are in the GPU pipe.
263 GLint status{}; 259 glFlush();
264 glGetProgramiv(program.handle, GL_LINK_STATUS, &status); 260 built_condvar.notify_one();
265 } 261 } else {
266 } 262 is_built = true;
267 } 263 }
268 if (shader_notify) { 264 if (shader_notify) {
269 shader_notify->MarkShaderComplete(); 265 shader_notify->MarkShaderComplete();
270 } 266 }
271 is_built = true;
272 built_condvar.notify_one();
273 }}; 267 }};
274 if (thread_worker) { 268 if (thread_worker) {
275 thread_worker->QueueWork(std::move(func)); 269 thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
440 buffer_cache.UpdateGraphicsBuffers(is_indexed); 434 buffer_cache.UpdateGraphicsBuffers(is_indexed);
441 buffer_cache.BindHostGeometryBuffers(is_indexed); 435 buffer_cache.BindHostGeometryBuffers(is_indexed);
442 436
443 if (!is_built.load(std::memory_order::relaxed)) { 437 if (!IsBuilt()) {
444 WaitForBuild(); 438 WaitForBuild();
445 } 439 }
446 const bool use_assembly{assembly_programs[0].handle != 0}; 440 const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
585} 579}
586 580
587void GraphicsPipeline::WaitForBuild() { 581void GraphicsPipeline::WaitForBuild() {
588 std::unique_lock lock{built_mutex}; 582 if (built_fence.handle == 0) {
589 built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); 583 std::unique_lock lock{built_mutex};
584 built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
585 }
586 ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
587 is_built = true;
588}
589
590bool GraphicsPipeline::IsBuilt() noexcept {
591 if (is_built) {
592 return true;
593 }
594 if (built_fence.handle == 0) {
595 return false;
596 }
597 // Timeout of zero means this is non-blocking
598 const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
599 ASSERT(sync_status != GL_WAIT_FAILED);
600 is_built = sync_status != GL_TIMEOUT_EXPIRED;
601 return is_built;
590} 602}
591 603
592} // namespace OpenGL 604} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..311d49f3f 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -100,9 +100,7 @@ public:
100 return writes_global_memory; 100 return writes_global_memory;
101 } 101 }
102 102
103 [[nodiscard]] bool IsBuilt() const noexcept { 103 [[nodiscard]] bool IsBuilt() noexcept;
104 return is_built.load(std::memory_order::relaxed);
105 }
106 104
107 template <typename Spec> 105 template <typename Spec>
108 static auto MakeConfigureSpecFunc() { 106 static auto MakeConfigureSpecFunc() {
@@ -154,7 +152,8 @@ private:
154 152
155 std::mutex built_mutex; 153 std::mutex built_mutex;
156 std::condition_variable built_condvar; 154 std::condition_variable built_condvar;
157 std::atomic_bool is_built{false}; 155 OGLSync built_fence{};
156 bool is_built{false};
158}; 157};
159 158
160} // namespace OpenGL 159} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..4d73427b4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cstring> 6#include <cstring>
6#include <memory> 7#include <memory>
7#include <optional> 8#include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
292 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 293 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
293 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, 294 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
294 }; 295 };
295 const std::array push_constants{base_vertex, index_shift}; 296 const std::array<u32, 2> push_constants{base_vertex, index_shift};
296 const VkDescriptorSet set = descriptor_allocator.Commit(); 297 const VkDescriptorSet set = descriptor_allocator.Commit();
297 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); 298 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
298 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); 299 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0f62779de..ca6019a3a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
1067 } 1067 }
1068 break; 1068 break;
1069 case PixelFormat::A8B8G8R8_UNORM: 1069 case PixelFormat::A8B8G8R8_UNORM:
1070 if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { 1070 if (src_view.format == PixelFormat::S8_UINT_D24_UNORM ||
1071 src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
1071 return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); 1072 return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
1072 } 1073 }
1073 break; 1074 break;
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..2f2594585 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
50 gpu->BindRenderer(std::move(renderer)); 50 gpu->BindRenderer(std::move(renderer));
51 return gpu; 51 return gpu;
52 } catch (const std::runtime_error& exception) { 52 } catch (const std::runtime_error& exception) {
53 scope.Cancel();
53 LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); 54 LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
54 return nullptr; 55 return nullptr;
55 } 56 }
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index f915bd856..4b943c6ba 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
609 ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); 609 ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
610 ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); 610 ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
611 ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); 611 ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
612 ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
612 613
613 if (global) { 614 if (global) {
614 ReadBasicSetting(Settings::values.cpu_debug_mode); 615 ReadBasicSetting(Settings::values.cpu_debug_mode);
@@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
621 ReadBasicSetting(Settings::values.cpuopt_misc_ir); 622 ReadBasicSetting(Settings::values.cpuopt_misc_ir);
622 ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); 623 ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
623 ReadBasicSetting(Settings::values.cpuopt_fastmem); 624 ReadBasicSetting(Settings::values.cpuopt_fastmem);
625 ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
626 ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
624 } 627 }
625 628
626 qt_config->endGroup(); 629 qt_config->endGroup();
@@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
1139 WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr); 1142 WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
1140 WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan); 1143 WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
1141 WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check); 1144 WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
1145 WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
1142 1146
1143 if (global) { 1147 if (global) {
1144 WriteBasicSetting(Settings::values.cpu_debug_mode); 1148 WriteBasicSetting(Settings::values.cpu_debug_mode);
@@ -1151,6 +1155,8 @@ void Config::SaveCpuValues() {
1151 WriteBasicSetting(Settings::values.cpuopt_misc_ir); 1155 WriteBasicSetting(Settings::values.cpuopt_misc_ir);
1152 WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); 1156 WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
1153 WriteBasicSetting(Settings::values.cpuopt_fastmem); 1157 WriteBasicSetting(Settings::values.cpuopt_fastmem);
1158 WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
1159 WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives);
1154 } 1160 }
1155 1161
1156 qt_config->endGroup(); 1162 qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index f66cab5d4..bf74ccc7c 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
36 ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); 36 ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
37 ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); 37 ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
38 ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); 38 ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
39 ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
39 40
40 ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); 41 ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
41 ui->cpuopt_unsafe_reduce_fp_error->setChecked( 42 ui->cpuopt_unsafe_reduce_fp_error->setChecked(
@@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
46 Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); 47 Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
47 ui->cpuopt_unsafe_fastmem_check->setChecked( 48 ui->cpuopt_unsafe_fastmem_check->setChecked(
48 Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); 49 Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
50 ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
51 Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
49 52
50 if (Settings::IsConfiguringGlobal()) { 53 if (Settings::IsConfiguringGlobal()) {
51 ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); 54 ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
82 ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, 85 ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
83 ui->cpuopt_unsafe_fastmem_check, 86 ui->cpuopt_unsafe_fastmem_check,
84 cpuopt_unsafe_fastmem_check); 87 cpuopt_unsafe_fastmem_check);
88 ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
89 ui->cpuopt_unsafe_ignore_global_monitor,
90 cpuopt_unsafe_ignore_global_monitor);
85} 91}
86 92
87void ConfigureCpu::changeEvent(QEvent* event) { 93void ConfigureCpu::changeEvent(QEvent* event) {
@@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
120 ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, 126 ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
121 Settings::values.cpuopt_unsafe_fastmem_check, 127 Settings::values.cpuopt_unsafe_fastmem_check,
122 cpuopt_unsafe_fastmem_check); 128 cpuopt_unsafe_fastmem_check);
129 ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
130 Settings::values.cpuopt_unsafe_ignore_global_monitor,
131 cpuopt_unsafe_ignore_global_monitor);
123} 132}
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index ed9af0e9f..733e38be4 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -45,6 +45,7 @@ private:
45 ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; 45 ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
46 ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; 46 ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
47 ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; 47 ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
48 ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
48 49
49 const Core::System& system; 50 const Core::System& system;
50}; 51};
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index d8064db24..5d80a8c91 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -150,6 +150,18 @@
150 </property> 150 </property>
151 </widget> 151 </widget>
152 </item> 152 </item>
153 <item>
154 <widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
155 <property name="toolTip">
156 <string>
157 &lt;div&gt;This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.&lt;/div&gt;
158 </string>
159 </property>
160 <property name="text">
161 <string>Ignore global monitor</string>
162 </property>
163 </widget>
164 </item>
153 </layout> 165 </layout>
154 </widget> 166 </widget>
155 </item> 167 </item>
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp
index 05a90963d..616a0be75 100644
--- a/src/yuzu/configuration/configure_cpu_debug.cpp
+++ b/src/yuzu/configuration/configure_cpu_debug.cpp
@@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
44 Settings::values.cpuopt_reduce_misalign_checks.GetValue()); 44 Settings::values.cpuopt_reduce_misalign_checks.GetValue());
45 ui->cpuopt_fastmem->setEnabled(runtime_lock); 45 ui->cpuopt_fastmem->setEnabled(runtime_lock);
46 ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue()); 46 ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
47 ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
48 ui->cpuopt_fastmem_exclusives->setChecked(
49 Settings::values.cpuopt_fastmem_exclusives.GetValue());
50 ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
51 ui->cpuopt_recompile_exclusives->setChecked(
52 Settings::values.cpuopt_recompile_exclusives.GetValue());
47} 53}
48 54
49void ConfigureCpuDebug::ApplyConfiguration() { 55void ConfigureCpuDebug::ApplyConfiguration() {
@@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
56 Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); 62 Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
57 Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); 63 Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
58 Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); 64 Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
65 Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
66 Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
59} 67}
60 68
61void ConfigureCpuDebug::changeEvent(QEvent* event) { 69void ConfigureCpuDebug::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui
index 6e635bb2f..2bc268810 100644
--- a/src/yuzu/configuration/configure_cpu_debug.ui
+++ b/src/yuzu/configuration/configure_cpu_debug.ui
@@ -144,7 +144,34 @@
144 </string> 144 </string>
145 </property> 145 </property>
146 <property name="text"> 146 <property name="text">
147 <string>Enable Host MMU Emulation</string> 147 <string>Enable Host MMU Emulation (general memory instructions)</string>
148 </property>
149 </widget>
150 </item>
151 <item>
152 <widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
153 <property name="toolTip">
154 <string>
155 &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
156 &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
157 &lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all exclusive memory accesses to use Software MMU Emulation.&lt;/div&gt;
158 </string>
159 </property>
160 <property name="text">
161 <string>Enable Host MMU Emulation (exclusive memory instructions)</string>
162 </property>
163 </widget>
164 </item>
165 <item>
166 <widget class="QCheckBox" name="cpuopt_recompile_exclusives">
167 <property name="toolTip">
168 <string>
169 &lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
170 &lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.&lt;/div&gt;
171 </string>
172 </property>
173 <property name="text">
174 <string>Enable recompilation of exclusive memory instructions</string>
148 </property> 175 </property>
149 </widget> 176 </widget>
150 </item> 177 </item>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index d573829be..06774768d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -250,9 +250,9 @@ GMainWindow::GMainWindow()
250#ifdef ARCHITECTURE_x86_64 250#ifdef ARCHITECTURE_x86_64
251 const auto& caps = Common::GetCPUCaps(); 251 const auto& caps = Common::GetCPUCaps();
252 std::string cpu_string = caps.cpu_string; 252 std::string cpu_string = caps.cpu_string;
253 if (caps.avx || caps.avx2 || caps.avx512) { 253 if (caps.avx || caps.avx2 || caps.avx512f) {
254 cpu_string += " | AVX"; 254 cpu_string += " | AVX";
255 if (caps.avx512) { 255 if (caps.avx512f) {
256 cpu_string += "512"; 256 cpu_string += "512";
257 } else if (caps.avx2) { 257 } else if (caps.avx2) {
258 cpu_string += '2'; 258 cpu_string += '2';
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 30963a8bb..b74411c84 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -280,11 +280,14 @@ void Config::ReadValues() {
280 ReadSetting("Cpu", Settings::values.cpuopt_misc_ir); 280 ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
281 ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks); 281 ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
282 ReadSetting("Cpu", Settings::values.cpuopt_fastmem); 282 ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
283 ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
284 ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
283 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma); 285 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
284 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error); 286 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
285 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr); 287 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
286 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan); 288 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
287 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check); 289 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
290 ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
288 291
289 // Renderer 292 // Renderer
290 ReadSetting("Renderer", Settings::values.renderer_backend); 293 ReadSetting("Renderer", Settings::values.renderer_backend);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 6d613bf7a..34782c378 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -124,7 +124,11 @@ keyboard_enabled =
124[Core] 124[Core]
125# Whether to use multi-core for CPU emulation 125# Whether to use multi-core for CPU emulation
126# 0: Disabled, 1 (default): Enabled 126# 0: Disabled, 1 (default): Enabled
127use_multi_core= 127use_multi_core =
128
129# Enable extended guest system memory layout (6GB DRAM)
130# 0 (default): Disabled, 1: Enabled
131use_extended_memory_layout =
128 132
129[Cpu] 133[Cpu]
130# Adjusts various optimizations. 134# Adjusts various optimizations.
@@ -174,6 +178,14 @@ cpuopt_reduce_misalign_checks =
174# 0: Disabled, 1 (default): Enabled 178# 0: Disabled, 1 (default): Enabled
175cpuopt_fastmem = 179cpuopt_fastmem =
176 180
181# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
182# 0: Disabled, 1 (default): Enabled
183cpuopt_fastmem_exclusives =
184
185# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
186# 0: Disabled, 1 (default): Enabled
187cpuopt_recompile_exclusives =
188
177# Enable unfuse FMA (improve performance on CPUs without FMA) 189# Enable unfuse FMA (improve performance on CPUs without FMA)
178# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select. 190# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
179# 0: Disabled, 1 (default): Enabled 191# 0: Disabled, 1 (default): Enabled
@@ -199,6 +211,11 @@ cpuopt_unsafe_inaccurate_nan =
199# 0: Disabled, 1 (default): Enabled 211# 0: Disabled, 1 (default): Enabled
200cpuopt_unsafe_fastmem_check = 212cpuopt_unsafe_fastmem_check =
201 213
214# Enable faster exclusive instructions
215# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
216# 0: Disabled, 1 (default): Enabled
217cpuopt_unsafe_ignore_global_monitor =
218
202[Renderer] 219[Renderer]
203# Which backend API to use. 220# Which backend API to use.
204# 0 (default): OpenGL, 1: Vulkan 221# 0 (default): OpenGL, 1: Vulkan