diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/host_memory.cpp | 538 | ||||
| -rw-r--r-- | src/common/host_memory.h | 70 | ||||
| -rw-r--r-- | src/common/page_table.h | 2 | ||||
| -rw-r--r-- | src/common/settings.cpp | 8 | ||||
| -rw-r--r-- | src/common/settings.h | 4 | ||||
| -rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 | ||||
| -rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_64.cpp | 12 | ||||
| -rw-r--r-- | src/core/device_memory.cpp | 2 | ||||
| -rw-r--r-- | src/core/device_memory.h | 17 | ||||
| -rw-r--r-- | src/core/memory.cpp | 18 | ||||
| -rw-r--r-- | src/tests/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/tests/common/host_memory.cpp | 183 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 56 | ||||
| -rw-r--r-- | src/yuzu/configuration/config.cpp | 7 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_cpu.cpp | 9 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_cpu.h | 1 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_cpu.ui | 12 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_cpu_debug.cpp | 3 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_cpu_debug.ui | 14 | ||||
| -rw-r--r-- | src/yuzu_cmd/default_ini.h | 4 |
22 files changed, 950 insertions, 43 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2d403d471..97fbdcbf9 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -131,6 +131,8 @@ add_library(common STATIC | |||
| 131 | hash.h | 131 | hash.h |
| 132 | hex_util.cpp | 132 | hex_util.cpp |
| 133 | hex_util.h | 133 | hex_util.h |
| 134 | host_memory.cpp | ||
| 135 | host_memory.h | ||
| 134 | intrusive_red_black_tree.h | 136 | intrusive_red_black_tree.h |
| 135 | logging/backend.cpp | 137 | logging/backend.cpp |
| 136 | logging/backend.h | 138 | logging/backend.h |
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp new file mode 100644 index 000000000..8bd70abc7 --- /dev/null +++ b/src/common/host_memory.cpp | |||
| @@ -0,0 +1,538 @@ | |||
| 1 | #ifdef _WIN32 | ||
| 2 | |||
| 3 | #include <iterator> | ||
| 4 | #include <unordered_map> | ||
| 5 | #include <boost/icl/separate_interval_set.hpp> | ||
| 6 | #include <windows.h> | ||
| 7 | #include "common/dynamic_library.h" | ||
| 8 | |||
| 9 | #elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv | ||
| 10 | |||
| 11 | #ifndef _GNU_SOURCE | ||
| 12 | #define _GNU_SOURCE | ||
| 13 | #endif | ||
| 14 | #include <fcntl.h> | ||
| 15 | #include <sys/mman.h> | ||
| 16 | #include <unistd.h> | ||
| 17 | |||
| 18 | #endif // ^^^ Linux ^^^ | ||
| 19 | |||
| 20 | #include <mutex> | ||
| 21 | |||
| 22 | #include "common/alignment.h" | ||
| 23 | #include "common/assert.h" | ||
| 24 | #include "common/host_memory.h" | ||
| 25 | #include "common/logging/log.h" | ||
| 26 | #include "common/scope_exit.h" | ||
| 27 | |||
| 28 | namespace Common { | ||
| 29 | |||
| 30 | constexpr size_t PageAlignment = 0x1000; | ||
| 31 | constexpr size_t HugePageSize = 0x200000; | ||
| 32 | |||
| 33 | #ifdef _WIN32 | ||
| 34 | |||
| 35 | // Manually imported for MinGW compatibility | ||
| 36 | #ifndef MEM_RESERVE_PLACEHOLDER | ||
| 37 | #define MEM_RESERVE_PLACEHOLDER 0x0004000 | ||
| 38 | #endif | ||
| 39 | #ifndef MEM_REPLACE_PLACEHOLDER | ||
| 40 | #define MEM_REPLACE_PLACEHOLDER 0x00004000 | ||
| 41 | #endif | ||
| 42 | #ifndef MEM_COALESCE_PLACEHOLDERS | ||
| 43 | #define MEM_COALESCE_PLACEHOLDERS 0x00000001 | ||
| 44 | #endif | ||
| 45 | #ifndef MEM_PRESERVE_PLACEHOLDER | ||
| 46 | #define MEM_PRESERVE_PLACEHOLDER 0x00000002 | ||
| 47 | #endif | ||
| 48 | |||
| 49 | using PFN_CreateFileMapping2 = _Ret_maybenull_ HANDLE(WINAPI*)( | ||
| 50 | _In_ HANDLE File, _In_opt_ SECURITY_ATTRIBUTES* SecurityAttributes, _In_ ULONG DesiredAccess, | ||
| 51 | _In_ ULONG PageProtection, _In_ ULONG AllocationAttributes, _In_ ULONG64 MaximumSize, | ||
| 52 | _In_opt_ PCWSTR Name, | ||
| 53 | _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, | ||
| 54 | _In_ ULONG ParameterCount); | ||
| 55 | |||
| 56 | using PFN_VirtualAlloc2 = _Ret_maybenull_ PVOID(WINAPI*)( | ||
| 57 | _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, _In_ SIZE_T Size, | ||
| 58 | _In_ ULONG AllocationType, _In_ ULONG PageProtection, | ||
| 59 | _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, | ||
| 60 | _In_ ULONG ParameterCount); | ||
| 61 | |||
| 62 | using PFN_MapViewOfFile3 = _Ret_maybenull_ PVOID(WINAPI*)( | ||
| 63 | _In_ HANDLE FileMapping, _In_opt_ HANDLE Process, _In_opt_ PVOID BaseAddress, | ||
| 64 | _In_ ULONG64 Offset, _In_ SIZE_T ViewSize, _In_ ULONG AllocationType, _In_ ULONG PageProtection, | ||
| 65 | _Inout_updates_opt_(ParameterCount) MEM_EXTENDED_PARAMETER* ExtendedParameters, | ||
| 66 | _In_ ULONG ParameterCount); | ||
| 67 | |||
| 68 | using PFN_UnmapViewOfFile2 = BOOL(WINAPI*)(_In_ HANDLE Process, _In_ PVOID BaseAddress, | ||
| 69 | _In_ ULONG UnmapFlags); | ||
| 70 | |||
| 71 | template <typename T> | ||
| 72 | static void GetFuncAddress(Common::DynamicLibrary& dll, const char* name, T& pfn) { | ||
| 73 | if (!dll.GetSymbol(name, &pfn)) { | ||
| 74 | LOG_CRITICAL(HW_Memory, "Failed to load {}", name); | ||
| 75 | throw std::bad_alloc{}; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | class HostMemory::Impl { | ||
| 80 | public: | ||
| 81 | explicit Impl(size_t backing_size_, size_t virtual_size_) | ||
| 82 | : backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()}, | ||
| 83 | kernelbase_dll("Kernelbase") { | ||
| 84 | if (!kernelbase_dll.IsOpen()) { | ||
| 85 | LOG_CRITICAL(HW_Memory, "Failed to load Kernelbase.dll"); | ||
| 86 | throw std::bad_alloc{}; | ||
| 87 | } | ||
| 88 | GetFuncAddress(kernelbase_dll, "CreateFileMapping2", pfn_CreateFileMapping2); | ||
| 89 | GetFuncAddress(kernelbase_dll, "VirtualAlloc2", pfn_VirtualAlloc2); | ||
| 90 | GetFuncAddress(kernelbase_dll, "MapViewOfFile3", pfn_MapViewOfFile3); | ||
| 91 | GetFuncAddress(kernelbase_dll, "UnmapViewOfFile2", pfn_UnmapViewOfFile2); | ||
| 92 | |||
| 93 | // Allocate backing file map | ||
| 94 | backing_handle = | ||
| 95 | pfn_CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ, | ||
| 96 | PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0); | ||
| 97 | if (!backing_handle) { | ||
| 98 | LOG_CRITICAL(HW_Memory, "Failed to allocate {} MiB of backing memory", | ||
| 99 | backing_size >> 20); | ||
| 100 | throw std::bad_alloc{}; | ||
| 101 | } | ||
| 102 | // Allocate a virtual memory for the backing file map as placeholder | ||
| 103 | backing_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, backing_size, | ||
| 104 | MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, | ||
| 105 | PAGE_NOACCESS, nullptr, 0)); | ||
| 106 | if (!backing_base) { | ||
| 107 | Release(); | ||
| 108 | LOG_CRITICAL(HW_Memory, "Failed to reserve {} MiB of virtual memory", | ||
| 109 | backing_size >> 20); | ||
| 110 | throw std::bad_alloc{}; | ||
| 111 | } | ||
| 112 | // Map backing placeholder | ||
| 113 | void* const ret = pfn_MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size, | ||
| 114 | MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); | ||
| 115 | if (ret != backing_base) { | ||
| 116 | Release(); | ||
| 117 | LOG_CRITICAL(HW_Memory, "Failed to map {} MiB of virtual memory", backing_size >> 20); | ||
| 118 | throw std::bad_alloc{}; | ||
| 119 | } | ||
| 120 | // Allocate virtual address placeholder | ||
| 121 | virtual_base = static_cast<u8*>(pfn_VirtualAlloc2(process, nullptr, virtual_size, | ||
| 122 | MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, | ||
| 123 | PAGE_NOACCESS, nullptr, 0)); | ||
| 124 | if (!virtual_base) { | ||
| 125 | Release(); | ||
| 126 | LOG_CRITICAL(HW_Memory, "Failed to reserve {} GiB of virtual memory", | ||
| 127 | virtual_size >> 30); | ||
| 128 | throw std::bad_alloc{}; | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | ~Impl() { | ||
| 133 | Release(); | ||
| 134 | } | ||
| 135 | |||
| 136 | void Map(size_t virtual_offset, size_t host_offset, size_t length) { | ||
| 137 | std::unique_lock lock{placeholder_mutex}; | ||
| 138 | if (!IsNiechePlaceholder(virtual_offset, length)) { | ||
| 139 | Split(virtual_offset, length); | ||
| 140 | } | ||
| 141 | ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end()); | ||
| 142 | TrackPlaceholder(virtual_offset, host_offset, length); | ||
| 143 | |||
| 144 | MapView(virtual_offset, host_offset, length); | ||
| 145 | } | ||
| 146 | |||
| 147 | void Unmap(size_t virtual_offset, size_t length) { | ||
| 148 | std::lock_guard lock{placeholder_mutex}; | ||
| 149 | |||
| 150 | // Unmap until there are no more placeholders | ||
| 151 | while (UnmapOnePlaceholder(virtual_offset, length)) { | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 | void Protect(size_t virtual_offset, size_t length, bool read, bool write) { | ||
| 156 | DWORD new_flags{}; | ||
| 157 | if (read && write) { | ||
| 158 | new_flags = PAGE_READWRITE; | ||
| 159 | } else if (read && !write) { | ||
| 160 | new_flags = PAGE_READONLY; | ||
| 161 | } else if (!read && !write) { | ||
| 162 | new_flags = PAGE_NOACCESS; | ||
| 163 | } else { | ||
| 164 | UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write); | ||
| 165 | } | ||
| 166 | const size_t virtual_end = virtual_offset + length; | ||
| 167 | |||
| 168 | std::lock_guard lock{placeholder_mutex}; | ||
| 169 | auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end}); | ||
| 170 | while (it != end) { | ||
| 171 | const size_t offset = std::max(it->lower(), virtual_offset); | ||
| 172 | const size_t protect_length = std::min(it->upper(), virtual_end) - offset; | ||
| 173 | DWORD old_flags{}; | ||
| 174 | if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) { | ||
| 175 | LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules"); | ||
| 176 | } | ||
| 177 | ++it; | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | const size_t backing_size; ///< Size of the backing memory in bytes | ||
| 182 | const size_t virtual_size; ///< Size of the virtual address placeholder in bytes | ||
| 183 | |||
| 184 | u8* backing_base{}; | ||
| 185 | u8* virtual_base{}; | ||
| 186 | |||
| 187 | private: | ||
| 188 | /// Release all resources in the object | ||
| 189 | void Release() { | ||
| 190 | if (!placeholders.empty()) { | ||
| 191 | for (const auto& placeholder : placeholders) { | ||
| 192 | if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder.lower(), | ||
| 193 | MEM_PRESERVE_PLACEHOLDER)) { | ||
| 194 | LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder"); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | Coalesce(0, virtual_size); | ||
| 198 | } | ||
| 199 | if (virtual_base) { | ||
| 200 | if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) { | ||
| 201 | LOG_CRITICAL(HW_Memory, "Failed to free virtual memory"); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | if (backing_base) { | ||
| 205 | if (!pfn_UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) { | ||
| 206 | LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder"); | ||
| 207 | } | ||
| 208 | if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) { | ||
| 209 | LOG_CRITICAL(HW_Memory, "Failed to free backing memory"); | ||
| 210 | } | ||
| 211 | } | ||
| 212 | if (!CloseHandle(backing_handle)) { | ||
| 213 | LOG_CRITICAL(HW_Memory, "Failed to free backing memory file handle"); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | /// Unmap one placeholder in the given range (partial unmaps are supported) | ||
| 218 | /// Return true when there are no more placeholders to unmap | ||
| 219 | bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) { | ||
| 220 | const auto it = placeholders.find({virtual_offset, virtual_offset + length}); | ||
| 221 | const auto begin = placeholders.begin(); | ||
| 222 | const auto end = placeholders.end(); | ||
| 223 | if (it == end) { | ||
| 224 | return false; | ||
| 225 | } | ||
| 226 | const size_t placeholder_begin = it->lower(); | ||
| 227 | const size_t placeholder_end = it->upper(); | ||
| 228 | const size_t unmap_begin = std::max(virtual_offset, placeholder_begin); | ||
| 229 | const size_t unmap_end = std::min(virtual_offset + length, placeholder_end); | ||
| 230 | ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end); | ||
| 231 | ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin); | ||
| 232 | |||
| 233 | const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin); | ||
| 234 | ASSERT(host_pointer_it != placeholder_host_pointers.end()); | ||
| 235 | const size_t host_offset = host_pointer_it->second; | ||
| 236 | |||
| 237 | const bool split_left = unmap_begin > placeholder_begin; | ||
| 238 | const bool split_right = unmap_end < placeholder_end; | ||
| 239 | |||
| 240 | if (!pfn_UnmapViewOfFile2(process, virtual_base + placeholder_begin, | ||
| 241 | MEM_PRESERVE_PLACEHOLDER)) { | ||
| 242 | LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder"); | ||
| 243 | } | ||
| 244 | // If we have to remap memory regions due to partial unmaps, we are in a data race as | ||
| 245 | // Windows doesn't support remapping memory without unmapping first. Avoid adding any extra | ||
| 246 | // logic within the panic region described below. | ||
| 247 | |||
| 248 | // Panic region, we are in a data race right now | ||
| 249 | if (split_left || split_right) { | ||
| 250 | Split(unmap_begin, unmap_end - unmap_begin); | ||
| 251 | } | ||
| 252 | if (split_left) { | ||
| 253 | MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin); | ||
| 254 | } | ||
| 255 | if (split_right) { | ||
| 256 | MapView(unmap_end, host_offset + unmap_end - placeholder_begin, | ||
| 257 | placeholder_end - unmap_end); | ||
| 258 | } | ||
| 259 | // End panic region | ||
| 260 | |||
| 261 | size_t coalesce_begin = unmap_begin; | ||
| 262 | if (!split_left) { | ||
| 263 | // Try to coalesce pages to the left | ||
| 264 | coalesce_begin = it == begin ? 0 : std::prev(it)->upper(); | ||
| 265 | if (coalesce_begin != placeholder_begin) { | ||
| 266 | Coalesce(coalesce_begin, unmap_end - coalesce_begin); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | if (!split_right) { | ||
| 270 | // Try to coalesce pages to the right | ||
| 271 | const auto next = std::next(it); | ||
| 272 | const size_t next_begin = next == end ? virtual_size : next->lower(); | ||
| 273 | if (placeholder_end != next_begin) { | ||
| 274 | // We can coalesce to the right | ||
| 275 | Coalesce(coalesce_begin, next_begin - coalesce_begin); | ||
| 276 | } | ||
| 277 | } | ||
| 278 | // Remove and reinsert placeholder trackers | ||
| 279 | UntrackPlaceholder(it); | ||
| 280 | if (split_left) { | ||
| 281 | TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin); | ||
| 282 | } | ||
| 283 | if (split_right) { | ||
| 284 | TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin, | ||
| 285 | placeholder_end - unmap_end); | ||
| 286 | } | ||
| 287 | return true; | ||
| 288 | } | ||
| 289 | |||
| 290 | void MapView(size_t virtual_offset, size_t host_offset, size_t length) { | ||
| 291 | if (!pfn_MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset, | ||
| 292 | length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) { | ||
| 293 | LOG_CRITICAL(HW_Memory, "Failed to map placeholder"); | ||
| 294 | } | ||
| 295 | } | ||
| 296 | |||
| 297 | void Split(size_t virtual_offset, size_t length) { | ||
| 298 | if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, | ||
| 299 | MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) { | ||
| 300 | LOG_CRITICAL(HW_Memory, "Failed to split placeholder"); | ||
| 301 | } | ||
| 302 | } | ||
| 303 | |||
| 304 | void Coalesce(size_t virtual_offset, size_t length) { | ||
| 305 | if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length, | ||
| 306 | MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) { | ||
| 307 | LOG_CRITICAL(HW_Memory, "Failed to coalesce placeholders"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 311 | void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) { | ||
| 312 | placeholders.insert({virtual_offset, virtual_offset + length}); | ||
| 313 | placeholder_host_pointers.emplace(virtual_offset, host_offset); | ||
| 314 | } | ||
| 315 | |||
| 316 | void UntrackPlaceholder(boost::icl::separate_interval_set<size_t>::iterator it) { | ||
| 317 | placeholders.erase(it); | ||
| 318 | placeholder_host_pointers.erase(it->lower()); | ||
| 319 | } | ||
| 320 | |||
| 321 | /// Return true when a given memory region is a "nieche" and the placeholders don't have to be | ||
| 322 | /// splitted. | ||
| 323 | bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const { | ||
| 324 | const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length}); | ||
| 325 | if (it != placeholders.end() && it->lower() == virtual_offset + length) { | ||
| 326 | const bool is_root = it == placeholders.begin() && virtual_offset == 0; | ||
| 327 | return is_root || std::prev(it)->upper() == virtual_offset; | ||
| 328 | } | ||
| 329 | return false; | ||
| 330 | } | ||
| 331 | |||
| 332 | HANDLE process{}; ///< Current process handle | ||
| 333 | HANDLE backing_handle{}; ///< File based backing memory | ||
| 334 | |||
| 335 | DynamicLibrary kernelbase_dll; | ||
| 336 | PFN_CreateFileMapping2 pfn_CreateFileMapping2{}; | ||
| 337 | PFN_VirtualAlloc2 pfn_VirtualAlloc2{}; | ||
| 338 | PFN_MapViewOfFile3 pfn_MapViewOfFile3{}; | ||
| 339 | PFN_UnmapViewOfFile2 pfn_UnmapViewOfFile2{}; | ||
| 340 | |||
| 341 | std::mutex placeholder_mutex; ///< Mutex for placeholders | ||
| 342 | boost::icl::separate_interval_set<size_t> placeholders; ///< Mapped placeholders | ||
| 343 | std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset | ||
| 344 | }; | ||
| 345 | |||
| 346 | #elif defined(__linux__) // ^^^ Windows ^^^ vvv Linux vvv | ||
| 347 | |||
| 348 | class HostMemory::Impl { | ||
| 349 | public: | ||
| 350 | explicit Impl(size_t backing_size_, size_t virtual_size_) | ||
| 351 | : backing_size{backing_size_}, virtual_size{virtual_size_} { | ||
| 352 | bool good = false; | ||
| 353 | SCOPE_EXIT({ | ||
| 354 | if (!good) { | ||
| 355 | Release(); | ||
| 356 | } | ||
| 357 | }); | ||
| 358 | |||
| 359 | // Backing memory initialization | ||
| 360 | fd = memfd_create("HostMemory", 0); | ||
| 361 | if (fd == -1) { | ||
| 362 | LOG_CRITICAL(HW_Memory, "memfd_create failed: {}", strerror(errno)); | ||
| 363 | throw std::bad_alloc{}; | ||
| 364 | } | ||
| 365 | |||
| 366 | // Defined to extend the file with zeros | ||
| 367 | int ret = ftruncate(fd, backing_size); | ||
| 368 | if (ret != 0) { | ||
| 369 | LOG_CRITICAL(HW_Memory, "ftruncate failed with {}, are you out-of-memory?", | ||
| 370 | strerror(errno)); | ||
| 371 | throw std::bad_alloc{}; | ||
| 372 | } | ||
| 373 | |||
| 374 | backing_base = static_cast<u8*>( | ||
| 375 | mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); | ||
| 376 | if (backing_base == MAP_FAILED) { | ||
| 377 | LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); | ||
| 378 | throw std::bad_alloc{}; | ||
| 379 | } | ||
| 380 | |||
| 381 | // Virtual memory initialization | ||
| 382 | virtual_base = static_cast<u8*>( | ||
| 383 | mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); | ||
| 384 | if (virtual_base == MAP_FAILED) { | ||
| 385 | LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); | ||
| 386 | throw std::bad_alloc{}; | ||
| 387 | } | ||
| 388 | |||
| 389 | good = true; | ||
| 390 | } | ||
| 391 | |||
| 392 | ~Impl() { | ||
| 393 | Release(); | ||
| 394 | } | ||
| 395 | |||
| 396 | void Map(size_t virtual_offset, size_t host_offset, size_t length) { | ||
| 397 | |||
| 398 | void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, | ||
| 399 | MAP_SHARED | MAP_FIXED, fd, host_offset); | ||
| 400 | ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); | ||
| 401 | } | ||
| 402 | |||
| 403 | void Unmap(size_t virtual_offset, size_t length) { | ||
| 404 | // The method name is wrong. We're still talking about the virtual range. | ||
| 405 | // We don't want to unmap, we want to reserve this memory. | ||
| 406 | |||
| 407 | void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE, | ||
| 408 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | ||
| 409 | ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); | ||
| 410 | } | ||
| 411 | |||
| 412 | void Protect(size_t virtual_offset, size_t length, bool read, bool write) { | ||
| 413 | int flags = 0; | ||
| 414 | if (read) { | ||
| 415 | flags |= PROT_READ; | ||
| 416 | } | ||
| 417 | if (write) { | ||
| 418 | flags |= PROT_WRITE; | ||
| 419 | } | ||
| 420 | int ret = mprotect(virtual_base + virtual_offset, length, flags); | ||
| 421 | ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); | ||
| 422 | } | ||
| 423 | |||
| 424 | const size_t backing_size; ///< Size of the backing memory in bytes | ||
| 425 | const size_t virtual_size; ///< Size of the virtual address placeholder in bytes | ||
| 426 | |||
| 427 | u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)}; | ||
| 428 | u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)}; | ||
| 429 | |||
| 430 | private: | ||
| 431 | /// Release all resources in the object | ||
| 432 | void Release() { | ||
| 433 | if (virtual_base != MAP_FAILED) { | ||
| 434 | int ret = munmap(virtual_base, virtual_size); | ||
| 435 | ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); | ||
| 436 | } | ||
| 437 | |||
| 438 | if (backing_base != MAP_FAILED) { | ||
| 439 | int ret = munmap(backing_base, backing_size); | ||
| 440 | ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); | ||
| 441 | } | ||
| 442 | |||
| 443 | if (fd != -1) { | ||
| 444 | int ret = close(fd); | ||
| 445 | ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno)); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create | ||
| 450 | }; | ||
| 451 | |||
| 452 | #else // ^^^ Linux ^^^ vvv Generic vvv | ||
| 453 | |||
| 454 | class HostMemory::Impl { | ||
| 455 | public: | ||
| 456 | explicit Impl(size_t /*backing_size */, size_t /* virtual_size */) { | ||
| 457 | // This is just a place holder. | ||
| 458 | // Please implement fastmem in a propper way on your platform. | ||
| 459 | throw std::bad_alloc{}; | ||
| 460 | } | ||
| 461 | |||
| 462 | void Map(size_t virtual_offset, size_t host_offset, size_t length) {} | ||
| 463 | |||
| 464 | void Unmap(size_t virtual_offset, size_t length) {} | ||
| 465 | |||
| 466 | void Protect(size_t virtual_offset, size_t length, bool read, bool write) {} | ||
| 467 | |||
| 468 | u8* backing_base{nullptr}; | ||
| 469 | u8* virtual_base{nullptr}; | ||
| 470 | }; | ||
| 471 | |||
| 472 | #endif // ^^^ Generic ^^^ | ||
| 473 | |||
| 474 | HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_) | ||
| 475 | : backing_size(backing_size_), virtual_size(virtual_size_) { | ||
| 476 | try { | ||
| 477 | // Try to allocate a fastmem arena. | ||
| 478 | // The implementation will fail with std::bad_alloc on errors. | ||
| 479 | impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment), | ||
| 480 | AlignUp(virtual_size, PageAlignment) + | ||
| 481 | 3 * HugePageSize); | ||
| 482 | backing_base = impl->backing_base; | ||
| 483 | virtual_base = impl->virtual_base; | ||
| 484 | |||
| 485 | if (virtual_base) { | ||
| 486 | virtual_base += 2 * HugePageSize - 1; | ||
| 487 | virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); | ||
| 488 | virtual_base_offset = virtual_base - impl->virtual_base; | ||
| 489 | } | ||
| 490 | |||
| 491 | } catch (const std::bad_alloc&) { | ||
| 492 | LOG_CRITICAL(HW_Memory, | ||
| 493 | "Fastmem unavailable, falling back to VirtualBuffer for memory allocation"); | ||
| 494 | fallback_buffer = std::make_unique<Common::VirtualBuffer<u8>>(backing_size); | ||
| 495 | backing_base = fallback_buffer->data(); | ||
| 496 | virtual_base = nullptr; | ||
| 497 | } | ||
| 498 | } | ||
| 499 | |||
| 500 | HostMemory::~HostMemory() = default; | ||
| 501 | |||
| 502 | HostMemory::HostMemory(HostMemory&&) noexcept = default; | ||
| 503 | |||
| 504 | HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; | ||
| 505 | |||
| 506 | void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { | ||
| 507 | ASSERT(virtual_offset % PageAlignment == 0); | ||
| 508 | ASSERT(host_offset % PageAlignment == 0); | ||
| 509 | ASSERT(length % PageAlignment == 0); | ||
| 510 | ASSERT(virtual_offset + length <= virtual_size); | ||
| 511 | ASSERT(host_offset + length <= backing_size); | ||
| 512 | if (length == 0 || !virtual_base || !impl) { | ||
| 513 | return; | ||
| 514 | } | ||
| 515 | impl->Map(virtual_offset + virtual_base_offset, host_offset, length); | ||
| 516 | } | ||
| 517 | |||
| 518 | void HostMemory::Unmap(size_t virtual_offset, size_t length) { | ||
| 519 | ASSERT(virtual_offset % PageAlignment == 0); | ||
| 520 | ASSERT(length % PageAlignment == 0); | ||
| 521 | ASSERT(virtual_offset + length <= virtual_size); | ||
| 522 | if (length == 0 || !virtual_base || !impl) { | ||
| 523 | return; | ||
| 524 | } | ||
| 525 | impl->Unmap(virtual_offset + virtual_base_offset, length); | ||
| 526 | } | ||
| 527 | |||
| 528 | void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { | ||
| 529 | ASSERT(virtual_offset % PageAlignment == 0); | ||
| 530 | ASSERT(length % PageAlignment == 0); | ||
| 531 | ASSERT(virtual_offset + length <= virtual_size); | ||
| 532 | if (length == 0 || !virtual_base || !impl) { | ||
| 533 | return; | ||
| 534 | } | ||
| 535 | impl->Protect(virtual_offset + virtual_base_offset, length, read, write); | ||
| 536 | } | ||
| 537 | |||
| 538 | } // namespace Common | ||
diff --git a/src/common/host_memory.h b/src/common/host_memory.h new file mode 100644 index 000000000..9b8326d0f --- /dev/null +++ b/src/common/host_memory.h | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/virtual_buffer.h" | ||
| 10 | |||
| 11 | namespace Common { | ||
| 12 | |||
| 13 | /** | ||
| 14 | * A low level linear memory buffer, which supports multiple mappings | ||
| 15 | * Its purpose is to rebuild a given sparse memory layout, including mirrors. | ||
| 16 | */ | ||
| 17 | class HostMemory { | ||
| 18 | public: | ||
| 19 | explicit HostMemory(size_t backing_size_, size_t virtual_size_); | ||
| 20 | ~HostMemory(); | ||
| 21 | |||
| 22 | /** | ||
| 23 | * Copy constructors. They shall return a copy of the buffer without the mappings. | ||
| 24 | * TODO: Implement them with COW if needed. | ||
| 25 | */ | ||
| 26 | HostMemory(const HostMemory& other) = delete; | ||
| 27 | HostMemory& operator=(const HostMemory& other) = delete; | ||
| 28 | |||
| 29 | /** | ||
| 30 | * Move constructors. They will move the buffer and the mappings to the new object. | ||
| 31 | */ | ||
| 32 | HostMemory(HostMemory&& other) noexcept; | ||
| 33 | HostMemory& operator=(HostMemory&& other) noexcept; | ||
| 34 | |||
| 35 | void Map(size_t virtual_offset, size_t host_offset, size_t length); | ||
| 36 | |||
| 37 | void Unmap(size_t virtual_offset, size_t length); | ||
| 38 | |||
| 39 | void Protect(size_t virtual_offset, size_t length, bool read, bool write); | ||
| 40 | |||
| 41 | [[nodiscard]] u8* BackingBasePointer() noexcept { | ||
| 42 | return backing_base; | ||
| 43 | } | ||
| 44 | [[nodiscard]] const u8* BackingBasePointer() const noexcept { | ||
| 45 | return backing_base; | ||
| 46 | } | ||
| 47 | |||
| 48 | [[nodiscard]] u8* VirtualBasePointer() noexcept { | ||
| 49 | return virtual_base; | ||
| 50 | } | ||
| 51 | [[nodiscard]] const u8* VirtualBasePointer() const noexcept { | ||
| 52 | return virtual_base; | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | size_t backing_size{}; | ||
| 57 | size_t virtual_size{}; | ||
| 58 | |||
| 59 | // Low level handler for the platform dependent memory routines | ||
| 60 | class Impl; | ||
| 61 | std::unique_ptr<Impl> impl; | ||
| 62 | u8* backing_base{}; | ||
| 63 | u8* virtual_base{}; | ||
| 64 | size_t virtual_base_offset{}; | ||
| 65 | |||
| 66 | // Fallback if fastmem is not supported on this platform | ||
| 67 | std::unique_ptr<Common::VirtualBuffer<u8>> fallback_buffer; | ||
| 68 | }; | ||
| 69 | |||
| 70 | } // namespace Common | ||
diff --git a/src/common/page_table.h b/src/common/page_table.h index e92b66b2b..8267e8b4d 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -111,6 +111,8 @@ struct PageTable { | |||
| 111 | VirtualBuffer<u64> backing_addr; | 111 | VirtualBuffer<u64> backing_addr; |
| 112 | 112 | ||
| 113 | size_t current_address_space_width_in_bits; | 113 | size_t current_address_space_width_in_bits; |
| 114 | |||
| 115 | u8* fastmem_arena; | ||
| 114 | }; | 116 | }; |
| 115 | 117 | ||
| 116 | } // namespace Common | 118 | } // namespace Common |
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bcb4e4be1..360e878d6 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -90,6 +90,13 @@ bool IsGPULevelHigh() { | |||
| 90 | values.gpu_accuracy.GetValue() == GPUAccuracy::High; | 90 | values.gpu_accuracy.GetValue() == GPUAccuracy::High; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | bool IsFastmemEnabled() { | ||
| 94 | if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) { | ||
| 95 | return values.cpuopt_fastmem; | ||
| 96 | } | ||
| 97 | return true; | ||
| 98 | } | ||
| 99 | |||
| 93 | float Volume() { | 100 | float Volume() { |
| 94 | if (values.audio_muted) { | 101 | if (values.audio_muted) { |
| 95 | return 0.0f; | 102 | return 0.0f; |
| @@ -115,6 +122,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 115 | values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); | 122 | values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); |
| 116 | values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); | 123 | values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); |
| 117 | values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); | 124 | values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); |
| 125 | values.cpuopt_unsafe_fastmem_check.SetGlobal(true); | ||
| 118 | 126 | ||
| 119 | // Renderer | 127 | // Renderer |
| 120 | values.renderer_backend.SetGlobal(true); | 128 | values.renderer_backend.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index 48085b9a9..1af8c5ac2 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -125,10 +125,12 @@ struct Values { | |||
| 125 | bool cpuopt_const_prop; | 125 | bool cpuopt_const_prop; |
| 126 | bool cpuopt_misc_ir; | 126 | bool cpuopt_misc_ir; |
| 127 | bool cpuopt_reduce_misalign_checks; | 127 | bool cpuopt_reduce_misalign_checks; |
| 128 | bool cpuopt_fastmem; | ||
| 128 | 129 | ||
| 129 | Setting<bool> cpuopt_unsafe_unfuse_fma; | 130 | Setting<bool> cpuopt_unsafe_unfuse_fma; |
| 130 | Setting<bool> cpuopt_unsafe_reduce_fp_error; | 131 | Setting<bool> cpuopt_unsafe_reduce_fp_error; |
| 131 | Setting<bool> cpuopt_unsafe_inaccurate_nan; | 132 | Setting<bool> cpuopt_unsafe_inaccurate_nan; |
| 133 | Setting<bool> cpuopt_unsafe_fastmem_check; | ||
| 132 | 134 | ||
| 133 | // Renderer | 135 | // Renderer |
| 134 | Setting<RendererBackend> renderer_backend; | 136 | Setting<RendererBackend> renderer_backend; |
| @@ -249,6 +251,8 @@ void SetConfiguringGlobal(bool is_global); | |||
| 249 | bool IsGPULevelExtreme(); | 251 | bool IsGPULevelExtreme(); |
| 250 | bool IsGPULevelHigh(); | 252 | bool IsGPULevelHigh(); |
| 251 | 253 | ||
| 254 | bool IsFastmemEnabled(); | ||
| 255 | |||
| 252 | float Volume(); | 256 | float Volume(); |
| 253 | 257 | ||
| 254 | std::string GetTimeZoneString(); | 258 | std::string GetTimeZoneString(); |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index cea7f0fb1..c8f6dc765 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp | |||
| @@ -128,6 +128,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* | |||
| 128 | if (page_table) { | 128 | if (page_table) { |
| 129 | config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( | 129 | config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( |
| 130 | page_table->pointers.data()); | 130 | page_table->pointers.data()); |
| 131 | config.fastmem_pointer = page_table->fastmem_arena; | ||
| 131 | } | 132 | } |
| 132 | config.absolute_offset_page_table = true; | 133 | config.absolute_offset_page_table = true; |
| 133 | config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; | 134 | config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; |
| @@ -143,7 +144,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* | |||
| 143 | 144 | ||
| 144 | // Code cache size | 145 | // Code cache size |
| 145 | config.code_cache_size = 512 * 1024 * 1024; | 146 | config.code_cache_size = 512 * 1024 * 1024; |
| 146 | config.far_code_offset = 256 * 1024 * 1024; | 147 | config.far_code_offset = 400 * 1024 * 1024; |
| 147 | 148 | ||
| 148 | // Safe optimizations | 149 | // Safe optimizations |
| 149 | if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { | 150 | if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { |
| @@ -171,6 +172,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* | |||
| 171 | if (!Settings::values.cpuopt_reduce_misalign_checks) { | 172 | if (!Settings::values.cpuopt_reduce_misalign_checks) { |
| 172 | config.only_detect_misalignment_via_page_table_on_page_boundary = false; | 173 | config.only_detect_misalignment_via_page_table_on_page_boundary = false; |
| 173 | } | 174 | } |
| 175 | if (!Settings::values.cpuopt_fastmem) { | ||
| 176 | config.fastmem_pointer = nullptr; | ||
| 177 | } | ||
| 174 | } | 178 | } |
| 175 | 179 | ||
| 176 | // Unsafe optimizations | 180 | // Unsafe optimizations |
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 63193dcb1..ba524cd05 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp | |||
| @@ -160,6 +160,10 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* | |||
| 160 | config.absolute_offset_page_table = true; | 160 | config.absolute_offset_page_table = true; |
| 161 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; | 161 | config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; |
| 162 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; | 162 | config.only_detect_misalignment_via_page_table_on_page_boundary = true; |
| 163 | |||
| 164 | config.fastmem_pointer = page_table->fastmem_arena; | ||
| 165 | config.fastmem_address_space_bits = address_space_bits; | ||
| 166 | config.silently_mirror_fastmem = false; | ||
| 163 | } | 167 | } |
| 164 | 168 | ||
| 165 | // Multi-process state | 169 | // Multi-process state |
| @@ -181,7 +185,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* | |||
| 181 | 185 | ||
| 182 | // Code cache size | 186 | // Code cache size |
| 183 | config.code_cache_size = 512 * 1024 * 1024; | 187 | config.code_cache_size = 512 * 1024 * 1024; |
| 184 | config.far_code_offset = 256 * 1024 * 1024; | 188 | config.far_code_offset = 400 * 1024 * 1024; |
| 185 | 189 | ||
| 186 | // Safe optimizations | 190 | // Safe optimizations |
| 187 | if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { | 191 | if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { |
| @@ -209,6 +213,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* | |||
| 209 | if (!Settings::values.cpuopt_reduce_misalign_checks) { | 213 | if (!Settings::values.cpuopt_reduce_misalign_checks) { |
| 210 | config.only_detect_misalignment_via_page_table_on_page_boundary = false; | 214 | config.only_detect_misalignment_via_page_table_on_page_boundary = false; |
| 211 | } | 215 | } |
| 216 | if (!Settings::values.cpuopt_fastmem) { | ||
| 217 | config.fastmem_pointer = nullptr; | ||
| 218 | } | ||
| 212 | } | 219 | } |
| 213 | 220 | ||
| 214 | // Unsafe optimizations | 221 | // Unsafe optimizations |
| @@ -223,6 +230,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* | |||
| 223 | if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { | 230 | if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { |
| 224 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; | 231 | config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; |
| 225 | } | 232 | } |
| 233 | if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) { | ||
| 234 | config.fastmem_address_space_bits = 64; | ||
| 235 | } | ||
| 226 | } | 236 | } |
| 227 | 237 | ||
| 228 | return std::make_shared<Dynarmic::A64::Jit>(config); | 238 | return std::make_shared<Dynarmic::A64::Jit>(config); |
diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index 0c4b440ed..f19c0515f 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | namespace Core { | 7 | namespace Core { |
| 8 | 8 | ||
| 9 | DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size} {} | 9 | DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {} |
| 10 | DeviceMemory::~DeviceMemory() = default; | 10 | DeviceMemory::~DeviceMemory() = default; |
| 11 | 11 | ||
| 12 | } // namespace Core | 12 | } // namespace Core |
diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 5b1ae28f3..c4d17705f 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/virtual_buffer.h" | 8 | #include "common/host_memory.h" |
| 9 | 9 | ||
| 10 | namespace Core { | 10 | namespace Core { |
| 11 | 11 | ||
| @@ -21,27 +21,30 @@ enum : u64 { | |||
| 21 | }; | 21 | }; |
| 22 | }; // namespace DramMemoryMap | 22 | }; // namespace DramMemoryMap |
| 23 | 23 | ||
| 24 | class DeviceMemory : NonCopyable { | 24 | class DeviceMemory { |
| 25 | public: | 25 | public: |
| 26 | explicit DeviceMemory(); | 26 | explicit DeviceMemory(); |
| 27 | ~DeviceMemory(); | 27 | ~DeviceMemory(); |
| 28 | 28 | ||
| 29 | DeviceMemory& operator=(const DeviceMemory&) = delete; | ||
| 30 | DeviceMemory(const DeviceMemory&) = delete; | ||
| 31 | |||
| 29 | template <typename T> | 32 | template <typename T> |
| 30 | PAddr GetPhysicalAddr(const T* ptr) const { | 33 | PAddr GetPhysicalAddr(const T* ptr) const { |
| 31 | return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) + | 34 | return (reinterpret_cast<uintptr_t>(ptr) - |
| 35 | reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())) + | ||
| 32 | DramMemoryMap::Base; | 36 | DramMemoryMap::Base; |
| 33 | } | 37 | } |
| 34 | 38 | ||
| 35 | u8* GetPointer(PAddr addr) { | 39 | u8* GetPointer(PAddr addr) { |
| 36 | return buffer.data() + (addr - DramMemoryMap::Base); | 40 | return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); |
| 37 | } | 41 | } |
| 38 | 42 | ||
| 39 | const u8* GetPointer(PAddr addr) const { | 43 | const u8* GetPointer(PAddr addr) const { |
| 40 | return buffer.data() + (addr - DramMemoryMap::Base); | 44 | return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base); |
| 41 | } | 45 | } |
| 42 | 46 | ||
| 43 | private: | 47 | Common::HostMemory buffer; |
| 44 | Common::VirtualBuffer<u8> buffer; | ||
| 45 | }; | 48 | }; |
| 46 | 49 | ||
| 47 | } // namespace Core | 50 | } // namespace Core |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9857278f6..f285c6f63 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "common/page_table.h" | 14 | #include "common/page_table.h" |
| 15 | #include "common/settings.h" | ||
| 15 | #include "common/swap.h" | 16 | #include "common/swap.h" |
| 16 | #include "core/arm/arm_interface.h" | 17 | #include "core/arm/arm_interface.h" |
| 17 | #include "core/core.h" | 18 | #include "core/core.h" |
| @@ -32,6 +33,7 @@ struct Memory::Impl { | |||
| 32 | 33 | ||
| 33 | void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { | 34 | void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { |
| 34 | current_page_table = &process.PageTable().PageTableImpl(); | 35 | current_page_table = &process.PageTable().PageTableImpl(); |
| 36 | current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); | ||
| 35 | 37 | ||
| 36 | const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); | 38 | const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); |
| 37 | 39 | ||
| @@ -41,13 +43,23 @@ struct Memory::Impl { | |||
| 41 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { | 43 | void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { |
| 42 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); | 44 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); |
| 43 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); | 45 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); |
| 46 | ASSERT_MSG(target >= DramMemoryMap::Base && target < DramMemoryMap::End, | ||
| 47 | "Out of bounds target: {:016X}", target); | ||
| 44 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); | 48 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); |
| 49 | |||
| 50 | if (Settings::IsFastmemEnabled()) { | ||
| 51 | system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size); | ||
| 52 | } | ||
| 45 | } | 53 | } |
| 46 | 54 | ||
| 47 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { | 55 | void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { |
| 48 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); | 56 | ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); |
| 49 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); | 57 | ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); |
| 50 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); | 58 | MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); |
| 59 | |||
| 60 | if (Settings::IsFastmemEnabled()) { | ||
| 61 | system.DeviceMemory().buffer.Unmap(base, size); | ||
| 62 | } | ||
| 51 | } | 63 | } |
| 52 | 64 | ||
| 53 | bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { | 65 | bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { |
| @@ -466,6 +478,12 @@ struct Memory::Impl { | |||
| 466 | if (vaddr == 0) { | 478 | if (vaddr == 0) { |
| 467 | return; | 479 | return; |
| 468 | } | 480 | } |
| 481 | |||
| 482 | if (Settings::IsFastmemEnabled()) { | ||
| 483 | const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; | ||
| 484 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | ||
| 485 | } | ||
| 486 | |||
| 469 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU | 487 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU |
| 470 | // address space, marking the region as un/cached. The region is marked un/cached at a | 488 | // address space, marking the region as un/cached. The region is marked un/cached at a |
| 471 | // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size | 489 | // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index d875c4fee..96bc30cac 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -2,6 +2,7 @@ add_executable(tests | |||
| 2 | common/bit_field.cpp | 2 | common/bit_field.cpp |
| 3 | common/cityhash.cpp | 3 | common/cityhash.cpp |
| 4 | common/fibers.cpp | 4 | common/fibers.cpp |
| 5 | common/host_memory.cpp | ||
| 5 | common/param_package.cpp | 6 | common/param_package.cpp |
| 6 | common/ring_buffer.cpp | 7 | common/ring_buffer.cpp |
| 7 | core/core_timing.cpp | 8 | core/core_timing.cpp |
diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp new file mode 100644 index 000000000..e241f8be5 --- /dev/null +++ b/src/tests/common/host_memory.cpp | |||
| @@ -0,0 +1,183 @@ | |||
| 1 | // Copyright 2021 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <catch2/catch.hpp> | ||
| 6 | |||
| 7 | #include "common/host_memory.h" | ||
| 8 | |||
| 9 | using Common::HostMemory; | ||
| 10 | |||
| 11 | static constexpr size_t VIRTUAL_SIZE = 1ULL << 39; | ||
| 12 | static constexpr size_t BACKING_SIZE = 4ULL * 1024 * 1024 * 1024; | ||
| 13 | |||
| 14 | TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { | ||
| 15 | { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } | ||
| 16 | { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } | ||
| 17 | } | ||
| 18 | |||
| 19 | TEST_CASE("HostMemory: Simple map", "[common]") { | ||
| 20 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 21 | mem.Map(0x5000, 0x8000, 0x1000); | ||
| 22 | |||
| 23 | volatile u8* const data = mem.VirtualBasePointer() + 0x5000; | ||
| 24 | data[0] = 50; | ||
| 25 | REQUIRE(data[0] == 50); | ||
| 26 | } | ||
| 27 | |||
| 28 | TEST_CASE("HostMemory: Simple mirror map", "[common]") { | ||
| 29 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 30 | mem.Map(0x5000, 0x3000, 0x2000); | ||
| 31 | mem.Map(0x8000, 0x4000, 0x1000); | ||
| 32 | |||
| 33 | volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000; | ||
| 34 | volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000; | ||
| 35 | mirror_b[0] = 76; | ||
| 36 | REQUIRE(mirror_a[0x1000] == 76); | ||
| 37 | } | ||
| 38 | |||
| 39 | TEST_CASE("HostMemory: Simple unmap", "[common]") { | ||
| 40 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 41 | mem.Map(0x5000, 0x3000, 0x2000); | ||
| 42 | |||
| 43 | volatile u8* const data = mem.VirtualBasePointer() + 0x5000; | ||
| 44 | data[75] = 50; | ||
| 45 | REQUIRE(data[75] == 50); | ||
| 46 | |||
| 47 | mem.Unmap(0x5000, 0x2000); | ||
| 48 | } | ||
| 49 | |||
| 50 | TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { | ||
| 51 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 52 | mem.Map(0x5000, 0x3000, 0x2000); | ||
| 53 | |||
| 54 | volatile u8* const data = mem.VirtualBasePointer() + 0x5000; | ||
| 55 | data[0] = 50; | ||
| 56 | REQUIRE(data[0] == 50); | ||
| 57 | |||
| 58 | mem.Unmap(0x5000, 0x2000); | ||
| 59 | |||
| 60 | mem.Map(0x5000, 0x3000, 0x2000); | ||
| 61 | REQUIRE(data[0] == 50); | ||
| 62 | |||
| 63 | mem.Map(0x7000, 0x2000, 0x5000); | ||
| 64 | REQUIRE(data[0x3000] == 50); | ||
| 65 | } | ||
| 66 | |||
| 67 | TEST_CASE("HostMemory: Nieche allocation", "[common]") { | ||
| 68 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 69 | mem.Map(0x0000, 0, 0x20000); | ||
| 70 | mem.Unmap(0x0000, 0x4000); | ||
| 71 | mem.Map(0x1000, 0, 0x2000); | ||
| 72 | mem.Map(0x3000, 0, 0x1000); | ||
| 73 | mem.Map(0, 0, 0x1000); | ||
| 74 | } | ||
| 75 | |||
| 76 | TEST_CASE("HostMemory: Full unmap", "[common]") { | ||
| 77 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 78 | mem.Map(0x8000, 0, 0x4000); | ||
| 79 | mem.Unmap(0x8000, 0x4000); | ||
| 80 | mem.Map(0x6000, 0, 0x16000); | ||
| 81 | } | ||
| 82 | |||
| 83 | TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") { | ||
| 84 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 85 | mem.Map(0x0000, 0, 0x4000); | ||
| 86 | mem.Unmap(0x2000, 0x4000); | ||
| 87 | mem.Map(0x2000, 0x80000, 0x4000); | ||
| 88 | } | ||
| 89 | |||
| 90 | TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") { | ||
| 91 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 92 | mem.Map(0x8000, 0, 0x4000); | ||
| 93 | mem.Unmap(0x6000, 0x4000); | ||
| 94 | mem.Map(0x8000, 0, 0x2000); | ||
| 95 | } | ||
| 96 | |||
| 97 | TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") { | ||
| 98 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 99 | mem.Map(0x0000, 0, 0x4000); | ||
| 100 | mem.Map(0x4000, 0, 0x1b000); | ||
| 101 | mem.Unmap(0x3000, 0x1c000); | ||
| 102 | mem.Map(0x3000, 0, 0x20000); | ||
| 103 | } | ||
| 104 | |||
| 105 | TEST_CASE("HostMemory: Unmap between placeholders", "[common]") { | ||
| 106 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 107 | mem.Map(0x0000, 0, 0x4000); | ||
| 108 | mem.Map(0x4000, 0, 0x4000); | ||
| 109 | mem.Unmap(0x2000, 0x4000); | ||
| 110 | mem.Map(0x2000, 0, 0x4000); | ||
| 111 | } | ||
| 112 | |||
| 113 | TEST_CASE("HostMemory: Unmap to origin", "[common]") { | ||
| 114 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 115 | mem.Map(0x4000, 0, 0x4000); | ||
| 116 | mem.Map(0x8000, 0, 0x4000); | ||
| 117 | mem.Unmap(0x4000, 0x4000); | ||
| 118 | mem.Map(0, 0, 0x4000); | ||
| 119 | mem.Map(0x4000, 0, 0x4000); | ||
| 120 | } | ||
| 121 | |||
| 122 | TEST_CASE("HostMemory: Unmap to right", "[common]") { | ||
| 123 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 124 | mem.Map(0x4000, 0, 0x4000); | ||
| 125 | mem.Map(0x8000, 0, 0x4000); | ||
| 126 | mem.Unmap(0x8000, 0x4000); | ||
| 127 | mem.Map(0x8000, 0, 0x4000); | ||
| 128 | } | ||
| 129 | |||
| 130 | TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { | ||
| 131 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 132 | mem.Map(0x4000, 0x10000, 0x4000); | ||
| 133 | |||
| 134 | volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; | ||
| 135 | ptr[0x1000] = 17; | ||
| 136 | |||
| 137 | mem.Unmap(0x6000, 0x2000); | ||
| 138 | |||
| 139 | REQUIRE(ptr[0x1000] == 17); | ||
| 140 | } | ||
| 141 | |||
| 142 | TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { | ||
| 143 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 144 | mem.Map(0x4000, 0x10000, 0x4000); | ||
| 145 | |||
| 146 | volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; | ||
| 147 | ptr[0x3000] = 19; | ||
| 148 | ptr[0x3fff] = 12; | ||
| 149 | |||
| 150 | mem.Unmap(0x4000, 0x2000); | ||
| 151 | |||
| 152 | REQUIRE(ptr[0x3000] == 19); | ||
| 153 | REQUIRE(ptr[0x3fff] == 12); | ||
| 154 | } | ||
| 155 | |||
| 156 | TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { | ||
| 157 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 158 | mem.Map(0x4000, 0x10000, 0x4000); | ||
| 159 | |||
| 160 | volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; | ||
| 161 | ptr[0x0000] = 19; | ||
| 162 | ptr[0x3fff] = 12; | ||
| 163 | |||
| 164 | mem.Unmap(0x1000, 0x2000); | ||
| 165 | |||
| 166 | REQUIRE(ptr[0x0000] == 19); | ||
| 167 | REQUIRE(ptr[0x3fff] == 12); | ||
| 168 | } | ||
| 169 | |||
| 170 | TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") { | ||
| 171 | HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); | ||
| 172 | mem.Map(0x4000, 0x10000, 0x2000); | ||
| 173 | mem.Map(0x6000, 0x20000, 0x2000); | ||
| 174 | |||
| 175 | volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; | ||
| 176 | ptr[0x0000] = 19; | ||
| 177 | ptr[0x3fff] = 12; | ||
| 178 | |||
| 179 | mem.Unmap(0x5000, 0x2000); | ||
| 180 | |||
| 181 | REQUIRE(ptr[0x0000] == 19); | ||
| 182 | REQUIRE(ptr[0x3fff] == 12); | ||
| 183 | } | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index cd1fbb9bf..46f642b19 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -99,25 +99,13 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 99 | PushCommand(FlushRegionCommand(addr, size)); | 99 | PushCommand(FlushRegionCommand(addr, size)); |
| 100 | return; | 100 | return; |
| 101 | } | 101 | } |
| 102 | 102 | if (!Settings::IsGPULevelExtreme()) { | |
| 103 | // Asynchronous GPU mode | 103 | return; |
| 104 | switch (Settings::values.gpu_accuracy.GetValue()) { | ||
| 105 | case Settings::GPUAccuracy::Normal: | ||
| 106 | PushCommand(FlushRegionCommand(addr, size)); | ||
| 107 | break; | ||
| 108 | case Settings::GPUAccuracy::High: | ||
| 109 | // TODO(bunnei): Is this right? Preserving existing behavior for now | ||
| 110 | break; | ||
| 111 | case Settings::GPUAccuracy::Extreme: { | ||
| 112 | auto& gpu = system.GPU(); | ||
| 113 | u64 fence = gpu.RequestFlush(addr, size); | ||
| 114 | PushCommand(GPUTickCommand(), true); | ||
| 115 | ASSERT(fence <= gpu.CurrentFlushRequestFence()); | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | default: | ||
| 119 | UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); | ||
| 120 | } | 104 | } |
| 105 | auto& gpu = system.GPU(); | ||
| 106 | u64 fence = gpu.RequestFlush(addr, size); | ||
| 107 | PushCommand(GPUTickCommand(), true); | ||
| 108 | ASSERT(fence <= gpu.CurrentFlushRequestFence()); | ||
| 121 | } | 109 | } |
| 122 | 110 | ||
| 123 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 111 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 6decd2546..4c9524702 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | |||
| 5 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 6 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 7 | #include "common/div_ceil.h" | 9 | #include "common/div_ceil.h" |
| @@ -10,35 +12,59 @@ | |||
| 10 | 12 | ||
| 11 | namespace VideoCore { | 13 | namespace VideoCore { |
| 12 | 14 | ||
| 13 | RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) | 15 | using namespace Core::Memory; |
| 14 | : cpu_memory{cpu_memory_} {} | 16 | |
| 17 | RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} | ||
| 15 | 18 | ||
| 16 | RasterizerAccelerated::~RasterizerAccelerated() = default; | 19 | RasterizerAccelerated::~RasterizerAccelerated() = default; |
| 17 | 20 | ||
| 18 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 21 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 19 | const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE); | 22 | u64 uncache_begin = 0; |
| 20 | for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { | 23 | u64 cache_begin = 0; |
| 21 | auto& count = cached_pages.at(page >> 2).Count(page); | 24 | u64 uncache_bytes = 0; |
| 25 | u64 cache_bytes = 0; | ||
| 26 | |||
| 27 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 28 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); | ||
| 29 | for (u64 page = addr >> PAGE_BITS; page != page_end; ++page) { | ||
| 30 | std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page); | ||
| 22 | 31 | ||
| 23 | if (delta > 0) { | 32 | if (delta > 0) { |
| 24 | ASSERT_MSG(count < UINT16_MAX, "Count may overflow!"); | 33 | ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); |
| 25 | } else if (delta < 0) { | 34 | } else if (delta < 0) { |
| 26 | ASSERT_MSG(count > 0, "Count may underflow!"); | 35 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); |
| 27 | } else { | 36 | } else { |
| 28 | ASSERT_MSG(true, "Delta must be non-zero!"); | 37 | ASSERT_MSG(false, "Delta must be non-zero!"); |
| 29 | } | 38 | } |
| 30 | 39 | ||
| 31 | // Adds or subtracts 1, as count is a unsigned 8-bit value | 40 | // Adds or subtracts 1, as count is a unsigned 8-bit value |
| 32 | count += static_cast<u16>(delta); | 41 | count.fetch_add(static_cast<u16>(delta), std::memory_order_release); |
| 33 | 42 | ||
| 34 | // Assume delta is either -1 or 1 | 43 | // Assume delta is either -1 or 1 |
| 35 | if (count == 0) { | 44 | if (count.load(std::memory_order::relaxed) == 0) { |
| 36 | cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, | 45 | if (uncache_bytes == 0) { |
| 37 | Core::Memory::PAGE_SIZE, false); | 46 | uncache_begin = page; |
| 38 | } else if (count == 1 && delta > 0) { | 47 | } |
| 39 | cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, | 48 | uncache_bytes += PAGE_SIZE; |
| 40 | Core::Memory::PAGE_SIZE, true); | 49 | } else if (uncache_bytes > 0) { |
| 50 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false); | ||
| 51 | uncache_bytes = 0; | ||
| 41 | } | 52 | } |
| 53 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||
| 54 | if (cache_bytes == 0) { | ||
| 55 | cache_begin = page; | ||
| 56 | } | ||
| 57 | cache_bytes += PAGE_SIZE; | ||
| 58 | } else if (cache_bytes > 0) { | ||
| 59 | cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true); | ||
| 60 | cache_bytes = 0; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | if (uncache_bytes > 0) { | ||
| 64 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false); | ||
| 65 | } | ||
| 66 | if (cache_bytes > 0) { | ||
| 67 | cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true); | ||
| 42 | } | 68 | } |
| 43 | } | 69 | } |
| 44 | 70 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9d4bef60..a59b36e13 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -756,6 +756,8 @@ void Config::ReadCpuValues() { | |||
| 756 | QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); | 756 | QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); |
| 757 | ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, | 757 | ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, |
| 758 | QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); | 758 | QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); |
| 759 | ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check, | ||
| 760 | QStringLiteral("cpuopt_unsafe_fastmem_check"), true); | ||
| 759 | 761 | ||
| 760 | if (global) { | 762 | if (global) { |
| 761 | Settings::values.cpuopt_page_tables = | 763 | Settings::values.cpuopt_page_tables = |
| @@ -774,6 +776,8 @@ void Config::ReadCpuValues() { | |||
| 774 | ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool(); | 776 | ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool(); |
| 775 | Settings::values.cpuopt_reduce_misalign_checks = | 777 | Settings::values.cpuopt_reduce_misalign_checks = |
| 776 | ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool(); | 778 | ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool(); |
| 779 | Settings::values.cpuopt_fastmem = | ||
| 780 | ReadSetting(QStringLiteral("cpuopt_fastmem"), true).toBool(); | ||
| 777 | } | 781 | } |
| 778 | 782 | ||
| 779 | qt_config->endGroup(); | 783 | qt_config->endGroup(); |
| @@ -1332,6 +1336,8 @@ void Config::SaveCpuValues() { | |||
| 1332 | Settings::values.cpuopt_unsafe_reduce_fp_error, true); | 1336 | Settings::values.cpuopt_unsafe_reduce_fp_error, true); |
| 1333 | WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), | 1337 | WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), |
| 1334 | Settings::values.cpuopt_unsafe_inaccurate_nan, true); | 1338 | Settings::values.cpuopt_unsafe_inaccurate_nan, true); |
| 1339 | WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"), | ||
| 1340 | Settings::values.cpuopt_unsafe_fastmem_check, true); | ||
| 1335 | 1341 | ||
| 1336 | if (global) { | 1342 | if (global) { |
| 1337 | WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables, | 1343 | WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables, |
| @@ -1348,6 +1354,7 @@ void Config::SaveCpuValues() { | |||
| 1348 | WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true); | 1354 | WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true); |
| 1349 | WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), | 1355 | WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), |
| 1350 | Settings::values.cpuopt_reduce_misalign_checks, true); | 1356 | Settings::values.cpuopt_reduce_misalign_checks, true); |
| 1357 | WriteSetting(QStringLiteral("cpuopt_fastmem"), Settings::values.cpuopt_fastmem, true); | ||
| 1351 | } | 1358 | } |
| 1352 | 1359 | ||
| 1353 | qt_config->endGroup(); | 1360 | qt_config->endGroup(); |
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 525c42ff0..22219cbad 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp | |||
| @@ -35,12 +35,15 @@ void ConfigureCpu::SetConfiguration() { | |||
| 35 | ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); | 35 | ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); |
| 36 | ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); | 36 | ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); |
| 37 | ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); | 37 | ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); |
| 38 | ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); | ||
| 38 | 39 | ||
| 39 | ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); | 40 | ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); |
| 40 | ui->cpuopt_unsafe_reduce_fp_error->setChecked( | 41 | ui->cpuopt_unsafe_reduce_fp_error->setChecked( |
| 41 | Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); | 42 | Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); |
| 42 | ui->cpuopt_unsafe_inaccurate_nan->setChecked( | 43 | ui->cpuopt_unsafe_inaccurate_nan->setChecked( |
| 43 | Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); | 44 | Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); |
| 45 | ui->cpuopt_unsafe_fastmem_check->setChecked( | ||
| 46 | Settings::values.cpuopt_unsafe_fastmem_check.GetValue()); | ||
| 44 | 47 | ||
| 45 | if (Settings::IsConfiguringGlobal()) { | 48 | if (Settings::IsConfiguringGlobal()) { |
| 46 | ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); | 49 | ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); |
| @@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() { | |||
| 84 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, | 87 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, |
| 85 | ui->cpuopt_unsafe_inaccurate_nan, | 88 | ui->cpuopt_unsafe_inaccurate_nan, |
| 86 | cpuopt_unsafe_inaccurate_nan); | 89 | cpuopt_unsafe_inaccurate_nan); |
| 90 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check, | ||
| 91 | ui->cpuopt_unsafe_fastmem_check, | ||
| 92 | cpuopt_unsafe_fastmem_check); | ||
| 87 | 93 | ||
| 88 | if (Settings::IsConfiguringGlobal()) { | 94 | if (Settings::IsConfiguringGlobal()) { |
| 89 | // Guard if during game and set to game-specific value | 95 | // Guard if during game and set to game-specific value |
| @@ -134,4 +140,7 @@ void ConfigureCpu::SetupPerGameUI() { | |||
| 134 | ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, | 140 | ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, |
| 135 | Settings::values.cpuopt_unsafe_inaccurate_nan, | 141 | Settings::values.cpuopt_unsafe_inaccurate_nan, |
| 136 | cpuopt_unsafe_inaccurate_nan); | 142 | cpuopt_unsafe_inaccurate_nan); |
| 143 | ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check, | ||
| 144 | Settings::values.cpuopt_unsafe_fastmem_check, | ||
| 145 | cpuopt_unsafe_fastmem_check); | ||
| 137 | } | 146 | } |
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index 8e2eeb7a6..57ff2772a 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h | |||
| @@ -41,4 +41,5 @@ private: | |||
| 41 | ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; | 41 | ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; |
| 42 | ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; | 42 | ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; |
| 43 | ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; | 43 | ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; |
| 44 | ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; | ||
| 44 | }; | 45 | }; |
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 99b573640..31ef9e3f5 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui | |||
| @@ -123,6 +123,18 @@ | |||
| 123 | </property> | 123 | </property> |
| 124 | </widget> | 124 | </widget> |
| 125 | </item> | 125 | </item> |
| 126 | <item> | ||
| 127 | <widget class="QCheckBox" name="cpuopt_unsafe_fastmem_check"> | ||
| 128 | <property name="toolTip"> | ||
| 129 | <string> | ||
| 130 | <div>This option improves speed by eliminating a safety check before every memory read/write in guest. Disabling it may allow a game to read/write the emulator's memory.</div> | ||
| 131 | </string> | ||
| 132 | </property> | ||
| 133 | <property name="text"> | ||
| 134 | <string>Disable address space checks</string> | ||
| 135 | </property> | ||
| 136 | </widget> | ||
| 137 | </item> | ||
| 126 | </layout> | 138 | </layout> |
| 127 | </widget> | 139 | </widget> |
| 128 | </item> | 140 | </item> |
diff --git a/src/yuzu/configuration/configure_cpu_debug.cpp b/src/yuzu/configuration/configure_cpu_debug.cpp index c925c023c..e25c52baf 100644 --- a/src/yuzu/configuration/configure_cpu_debug.cpp +++ b/src/yuzu/configuration/configure_cpu_debug.cpp | |||
| @@ -39,6 +39,8 @@ void ConfigureCpuDebug::SetConfiguration() { | |||
| 39 | ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir); | 39 | ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir); |
| 40 | ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock); | 40 | ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock); |
| 41 | ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks); | 41 | ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks); |
| 42 | ui->cpuopt_fastmem->setEnabled(runtime_lock); | ||
| 43 | ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem); | ||
| 42 | } | 44 | } |
| 43 | 45 | ||
| 44 | void ConfigureCpuDebug::ApplyConfiguration() { | 46 | void ConfigureCpuDebug::ApplyConfiguration() { |
| @@ -50,6 +52,7 @@ void ConfigureCpuDebug::ApplyConfiguration() { | |||
| 50 | Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked(); | 52 | Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked(); |
| 51 | Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); | 53 | Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); |
| 52 | Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); | 54 | Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); |
| 55 | Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked(); | ||
| 53 | } | 56 | } |
| 54 | 57 | ||
| 55 | void ConfigureCpuDebug::changeEvent(QEvent* event) { | 58 | void ConfigureCpuDebug::changeEvent(QEvent* event) { |
diff --git a/src/yuzu/configuration/configure_cpu_debug.ui b/src/yuzu/configuration/configure_cpu_debug.ui index a90dc64fe..11ee19a12 100644 --- a/src/yuzu/configuration/configure_cpu_debug.ui +++ b/src/yuzu/configuration/configure_cpu_debug.ui | |||
| @@ -139,6 +139,20 @@ | |||
| 139 | </property> | 139 | </property> |
| 140 | </widget> | 140 | </widget> |
| 141 | </item> | 141 | </item> |
| 142 | <item> | ||
| 143 | <widget class="QCheckBox" name="cpuopt_fastmem"> | ||
| 144 | <property name="text"> | ||
| 145 | <string>Enable Host MMU Emulation</string> | ||
| 146 | </property> | ||
| 147 | <property name="toolTip"> | ||
| 148 | <string> | ||
| 149 | <div style="white-space: nowrap">This optimization speeds up memory accesses by the guest program.</div> | ||
| 150 | <div style="white-space: nowrap">Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.</div> | ||
| 151 | <div style="white-space: nowrap">Disabling this forces all memory accesses to use Software MMU Emulation.</div> | ||
| 152 | </string> | ||
| 153 | </property> | ||
| 154 | </widget> | ||
| 155 | </item> | ||
| 142 | </layout> | 156 | </layout> |
| 143 | </widget> | 157 | </widget> |
| 144 | </item> | 158 | </item> |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 8ce2967ac..f48d935a1 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -150,6 +150,10 @@ cpuopt_misc_ir = | |||
| 150 | # 0: Disabled, 1 (default): Enabled | 150 | # 0: Disabled, 1 (default): Enabled |
| 151 | cpuopt_reduce_misalign_checks = | 151 | cpuopt_reduce_misalign_checks = |
| 152 | 152 | ||
| 153 | # Enable Host MMU Emulation (faster guest memory access) | ||
| 154 | # 0: Disabled, 1 (default): Enabled | ||
| 155 | cpuopt_fastmem = | ||
| 156 | |||
| 153 | [Renderer] | 157 | [Renderer] |
| 154 | # Which backend API to use. | 158 | # Which backend API to use. |
| 155 | # 0 (default): OpenGL, 1: Vulkan | 159 | # 0 (default): OpenGL, 1: Vulkan |