diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/device_memory_manager.h | 51 | ||||
| -rw-r--r-- | src/core/device_memory_manager.inc | 82 | ||||
| -rw-r--r-- | src/video_core/host1x/gpu_device_memory_manager.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/host1x/gpu_device_memory_manager.h | 3 |
4 files changed, 141 insertions, 6 deletions
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 0861b792d..71b95016c 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <deque> | 6 | #include <deque> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <array> | ||
| 9 | #include <atomic> | ||
| 8 | 10 | ||
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | #include "common/virtual_buffer.h" | 12 | #include "common/virtual_buffer.h" |
| @@ -23,6 +25,7 @@ struct DeviceMemoryManagerAllocator; | |||
| 23 | template <typename Traits> | 25 | template <typename Traits> |
| 24 | class DeviceMemoryManager { | 26 | class DeviceMemoryManager { |
| 25 | using DeviceInterface = typename Traits::DeviceInterface; | 27 | using DeviceInterface = typename Traits::DeviceInterface; |
| 28 | using DeviceMethods = Traits::DeviceMethods; | ||
| 26 | 29 | ||
| 27 | public: | 30 | public: |
| 28 | DeviceMemoryManager(const DeviceMemory& device_memory); | 31 | DeviceMemoryManager(const DeviceMemory& device_memory); |
| @@ -35,7 +38,7 @@ public: | |||
| 35 | DAddr AllocatePinned(size_t size); | 38 | DAddr AllocatePinned(size_t size); |
| 36 | void Free(DAddr start, size_t size); | 39 | void Free(DAddr start, size_t size); |
| 37 | 40 | ||
| 38 | void Map(DAddr address, VAddr virtual_address, size_t size, size_t p_id); | 41 | void Map(DAddr address, VAddr virtual_address, size_t size, size_t process_id); |
| 39 | void Unmap(DAddr address, size_t size); | 42 | void Unmap(DAddr address, size_t size); |
| 40 | 43 | ||
| 41 | // Write / Read | 44 | // Write / Read |
| @@ -57,6 +60,8 @@ public: | |||
| 57 | size_t RegisterProcess(Memory::Memory* memory); | 60 | size_t RegisterProcess(Memory::Memory* memory); |
| 58 | void UnregisterProcess(size_t id); | 61 | void UnregisterProcess(size_t id); |
| 59 | 62 | ||
| 63 | void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | ||
| 64 | |||
| 60 | private: | 65 | private: |
| 61 | static constexpr bool supports_pinning = Traits::supports_pinning; | 66 | static constexpr bool supports_pinning = Traits::supports_pinning; |
| 62 | static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | 67 | static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; |
| @@ -90,8 +95,52 @@ private: | |||
| 90 | Common::VirtualBuffer<u32> compressed_physical_ptr; | 95 | Common::VirtualBuffer<u32> compressed_physical_ptr; |
| 91 | Common::VirtualBuffer<u32> compressed_device_addr; | 96 | Common::VirtualBuffer<u32> compressed_device_addr; |
| 92 | 97 | ||
| 98 | // Process memory interfaces | ||
| 99 | |||
| 93 | std::deque<size_t> id_pool; | 100 | std::deque<size_t> id_pool; |
| 94 | std::deque<Memory::Memory*> registered_processes; | 101 | std::deque<Memory::Memory*> registered_processes; |
| 102 | |||
| 103 | // Memory protection management | ||
| 104 | |||
| 105 | static constexpr size_t guest_max_as_bits = 39; | ||
| 106 | static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits; | ||
| 107 | static constexpr size_t guest_mask = guest_as_size - 1ULL; | ||
| 108 | static constexpr size_t process_id_start_bit = guest_max_as_bits; | ||
| 109 | |||
| 110 | std::pair<size_t, VAddr> ExtractCPUBacking(size_t page_index) { | ||
| 111 | auto content = cpu_backing_address[page_index]; | ||
| 112 | const VAddr address = content & guest_mask; | ||
| 113 | const size_t process_id = static_cast<size_t>(content >> process_id_start_bit); | ||
| 114 | return std::make_pair(process_id, address); | ||
| 115 | } | ||
| 116 | |||
| 117 | void InsertCPUBacking(size_t page_index, VAddr address, size_t process_id) { | ||
| 118 | cpu_backing_address[page_index] = address | (process_id << page_index); | ||
| 119 | } | ||
| 120 | |||
| 121 | Common::VirtualBuffer<VAddr> cpu_backing_address; | ||
| 122 | static constexpr size_t subentries = 4; | ||
| 123 | static constexpr size_t subentries_mask = subentries - 1; | ||
| 124 | class CounterEntry final { | ||
| 125 | public: | ||
| 126 | CounterEntry() = default; | ||
| 127 | |||
| 128 | std::atomic_uint16_t& Count(std::size_t page) { | ||
| 129 | return values[page & subentries_mask]; | ||
| 130 | } | ||
| 131 | |||
| 132 | const std::atomic_uint16_t& Count(std::size_t page) const { | ||
| 133 | return values[page & subentries_mask]; | ||
| 134 | } | ||
| 135 | |||
| 136 | private: | ||
| 137 | std::array<std::atomic_uint16_t, subentries> values{}; | ||
| 138 | }; | ||
| 139 | static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); | ||
| 140 | |||
| 141 | static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||
| 142 | using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||
| 143 | std::unique_ptr<CachedPages> cached_pages; | ||
| 95 | }; | 144 | }; |
| 96 | 145 | ||
| 97 | } // namespace Core \ No newline at end of file | 146 | } // namespace Core \ No newline at end of file |
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 1f52b92d5..77410f72f 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc | |||
| @@ -2,12 +2,15 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <atomic> | 4 | #include <atomic> |
| 5 | #include <limits> | ||
| 5 | #include <memory> | 6 | #include <memory> |
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | #include "common/address_space.h" | 9 | #include "common/address_space.h" |
| 9 | #include "common/address_space.inc" | 10 | #include "common/address_space.inc" |
| 10 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 11 | #include "common/scope_exit.h" | 14 | #include "common/scope_exit.h" |
| 12 | #include "core/device_memory.h" | 15 | #include "core/device_memory.h" |
| 13 | #include "core/device_memory_manager.h" | 16 | #include "core/device_memory_manager.h" |
| @@ -51,7 +54,11 @@ struct DeviceMemoryManagerAllocator { | |||
| 51 | } | 54 | } |
| 52 | 55 | ||
| 53 | DAddr AllocatePinned(size_t size) { | 56 | DAddr AllocatePinned(size_t size) { |
| 54 | return pin_allocator.Allocate(size); | 57 | if constexpr (supports_pinning) { |
| 58 | return pin_allocator.Allocate(size); | ||
| 59 | } else { | ||
| 60 | return DAddr{}; | ||
| 61 | } | ||
| 55 | } | 62 | } |
| 56 | 63 | ||
| 57 | void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { | 64 | void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { |
| @@ -100,6 +107,7 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo | |||
| 100 | interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | 107 | interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), |
| 101 | compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { | 108 | compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { |
| 102 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | 109 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |
| 110 | cached_pages = std::make_unique<CachedPages>(); | ||
| 103 | } | 111 | } |
| 104 | 112 | ||
| 105 | template <typename Traits> | 113 | template <typename Traits> |
| @@ -132,14 +140,14 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { | |||
| 132 | 140 | ||
| 133 | template <typename Traits> | 141 | template <typename Traits> |
| 134 | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | 142 | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, |
| 135 | size_t p_id) { | 143 | size_t process_id) { |
| 136 | Core::Memory::Memory* process_memory = registered_processes[p_id]; | 144 | Core::Memory::Memory* process_memory = registered_processes[process_id]; |
| 137 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | 145 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |
| 138 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | 146 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |
| 139 | std::atomic_thread_fence(std::memory_order_acquire); | 147 | std::atomic_thread_fence(std::memory_order_acquire); |
| 140 | for (size_t i = 0; i < num_pages; i++) { | 148 | for (size_t i = 0; i < num_pages; i++) { |
| 141 | auto* ptr = process_memory->GetPointer( | 149 | const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; |
| 142 | Common::ProcessAddress(virtual_address + i * Memory::YUZU_PAGESIZE)); | 150 | auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); |
| 143 | if (ptr == nullptr) [[unlikely]] { | 151 | if (ptr == nullptr) [[unlikely]] { |
| 144 | compressed_physical_ptr[start_page_d + i] = 0; | 152 | compressed_physical_ptr[start_page_d + i] = 0; |
| 145 | continue; | 153 | continue; |
| @@ -147,6 +155,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
| 147 | auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; | 155 | auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; |
| 148 | compressed_physical_ptr[start_page_d + i] = phys_addr; | 156 | compressed_physical_ptr[start_page_d + i] = phys_addr; |
| 149 | compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | 157 | compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); |
| 158 | InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | ||
| 150 | } | 159 | } |
| 151 | std::atomic_thread_fence(std::memory_order_release); | 160 | std::atomic_thread_fence(std::memory_order_release); |
| 152 | } | 161 | } |
| @@ -159,6 +168,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | |||
| 159 | for (size_t i = 0; i < num_pages; i++) { | 168 | for (size_t i = 0; i < num_pages; i++) { |
| 160 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; | 169 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; |
| 161 | compressed_physical_ptr[start_page_d + i] = 0; | 170 | compressed_physical_ptr[start_page_d + i] = 0; |
| 171 | cpu_backing_address[start_page_d + i] = 0; | ||
| 162 | if (phys_addr != 0) { | 172 | if (phys_addr != 0) { |
| 163 | compressed_device_addr[phys_addr - 1] = 0; | 173 | compressed_device_addr[phys_addr - 1] = 0; |
| 164 | } | 174 | } |
| @@ -301,4 +311,66 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) { | |||
| 301 | id_pool.push_front(id); | 311 | id_pool.push_front(id); |
| 302 | } | 312 | } |
| 303 | 313 | ||
| 314 | template <typename Traits> | ||
| 315 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||
| 316 | u64 uncache_begin = 0; | ||
| 317 | u64 cache_begin = 0; | ||
| 318 | u64 uncache_bytes = 0; | ||
| 319 | u64 cache_bytes = 0; | ||
| 320 | const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; | ||
| 321 | |||
| 322 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 323 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | ||
| 324 | size_t page = addr >> Memory::YUZU_PAGEBITS; | ||
| 325 | auto [process_id, base_vaddress] = ExtractCPUBacking(page); | ||
| 326 | size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; | ||
| 327 | auto* memory_interface = registered_processes[process_id]; | ||
| 328 | for (; page != page_end; ++page) { | ||
| 329 | std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||
| 330 | |||
| 331 | if (delta > 0) { | ||
| 332 | ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u16>::max(), | ||
| 333 | "Count may overflow!"); | ||
| 334 | } else if (delta < 0) { | ||
| 335 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||
| 336 | } else { | ||
| 337 | ASSERT_MSG(false, "Delta must be non-zero!"); | ||
| 338 | } | ||
| 339 | |||
| 340 | // Adds or subtracts 1, as count is a unsigned 8-bit value | ||
| 341 | count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | ||
| 342 | |||
| 343 | // Assume delta is either -1 or 1 | ||
| 344 | if (count.load(std::memory_order::relaxed) == 0) { | ||
| 345 | if (uncache_bytes == 0) { | ||
| 346 | uncache_begin = vpage; | ||
| 347 | } | ||
| 348 | uncache_bytes += Memory::YUZU_PAGESIZE; | ||
| 349 | } else if (uncache_bytes > 0) { | ||
| 350 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, | ||
| 351 | uncache_bytes, false); | ||
| 352 | uncache_bytes = 0; | ||
| 353 | } | ||
| 354 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||
| 355 | if (cache_bytes == 0) { | ||
| 356 | cache_begin = vpage; | ||
| 357 | } | ||
| 358 | cache_bytes += Memory::YUZU_PAGESIZE; | ||
| 359 | } else if (cache_bytes > 0) { | ||
| 360 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||
| 361 | true); | ||
| 362 | cache_bytes = 0; | ||
| 363 | } | ||
| 364 | vpage++; | ||
| 365 | } | ||
| 366 | if (uncache_bytes > 0) { | ||
| 367 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||
| 368 | false); | ||
| 369 | } | ||
| 370 | if (cache_bytes > 0) { | ||
| 371 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||
| 372 | true); | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 304 | } // namespace Core \ No newline at end of file | 376 | } // namespace Core \ No newline at end of file |
diff --git a/src/video_core/host1x/gpu_device_memory_manager.cpp b/src/video_core/host1x/gpu_device_memory_manager.cpp index 2ca445081..668c2f08b 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.cpp +++ b/src/video_core/host1x/gpu_device_memory_manager.cpp | |||
| @@ -5,6 +5,17 @@ | |||
| 5 | #include "video_core/host1x/gpu_device_memory_manager.h" | 5 | #include "video_core/host1x/gpu_device_memory_manager.h" |
| 6 | #include "video_core/rasterizer_interface.h" | 6 | #include "video_core/rasterizer_interface.h" |
| 7 | 7 | ||
| 8 | namespace Tegra { | ||
| 9 | |||
| 10 | struct MaxwellDeviceMethods { | ||
| 11 | static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address, | ||
| 12 | size_t size, bool caching) { | ||
| 13 | interface->RasterizerMarkRegionCached(address, size, caching); | ||
| 14 | } | ||
| 15 | }; | ||
| 16 | |||
| 17 | } // namespace Tegra | ||
| 18 | |||
| 8 | template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; | 19 | template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; |
| 9 | template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; | 20 | template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; |
| 10 | 21 | ||
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index 30ad52017..2fb77605e 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h | |||
| @@ -9,10 +9,13 @@ class RasterizerInterface; | |||
| 9 | 9 | ||
| 10 | namespace Tegra { | 10 | namespace Tegra { |
| 11 | 11 | ||
| 12 | struct MaxwellDeviceMethods; | ||
| 13 | |||
| 12 | struct MaxwellDeviceTraits { | 14 | struct MaxwellDeviceTraits { |
| 13 | static constexpr bool supports_pinning = true; | 15 | static constexpr bool supports_pinning = true; |
| 14 | static constexpr size_t device_virtual_bits = 34; | 16 | static constexpr size_t device_virtual_bits = 34; |
| 15 | using DeviceInterface = typename VideoCore::RasterizerInterface; | 17 | using DeviceInterface = typename VideoCore::RasterizerInterface; |
| 18 | using DeviceMethods = typename MaxwellDeviceMethods; | ||
| 16 | }; | 19 | }; |
| 17 | 20 | ||
| 18 | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; | 21 | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; |