summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/device_memory_manager.h51
-rw-r--r--src/core/device_memory_manager.inc82
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.cpp11
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.h3
4 files changed, 141 insertions, 6 deletions
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index 0861b792d..71b95016c 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -5,6 +5,8 @@
5 5
6#include <deque> 6#include <deque>
7#include <memory> 7#include <memory>
8#include <array>
9#include <atomic>
8 10
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "common/virtual_buffer.h" 12#include "common/virtual_buffer.h"
@@ -23,6 +25,7 @@ struct DeviceMemoryManagerAllocator;
23template <typename Traits> 25template <typename Traits>
24class DeviceMemoryManager { 26class DeviceMemoryManager {
25 using DeviceInterface = typename Traits::DeviceInterface; 27 using DeviceInterface = typename Traits::DeviceInterface;
28 using DeviceMethods = Traits::DeviceMethods;
26 29
27public: 30public:
28 DeviceMemoryManager(const DeviceMemory& device_memory); 31 DeviceMemoryManager(const DeviceMemory& device_memory);
@@ -35,7 +38,7 @@ public:
35 DAddr AllocatePinned(size_t size); 38 DAddr AllocatePinned(size_t size);
36 void Free(DAddr start, size_t size); 39 void Free(DAddr start, size_t size);
37 40
38 void Map(DAddr address, VAddr virtual_address, size_t size, size_t p_id); 41 void Map(DAddr address, VAddr virtual_address, size_t size, size_t process_id);
39 void Unmap(DAddr address, size_t size); 42 void Unmap(DAddr address, size_t size);
40 43
41 // Write / Read 44 // Write / Read
@@ -57,6 +60,8 @@ public:
57 size_t RegisterProcess(Memory::Memory* memory); 60 size_t RegisterProcess(Memory::Memory* memory);
58 void UnregisterProcess(size_t id); 61 void UnregisterProcess(size_t id);
59 62
63 void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
64
60private: 65private:
61 static constexpr bool supports_pinning = Traits::supports_pinning; 66 static constexpr bool supports_pinning = Traits::supports_pinning;
62 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; 67 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
@@ -90,8 +95,52 @@ private:
90 Common::VirtualBuffer<u32> compressed_physical_ptr; 95 Common::VirtualBuffer<u32> compressed_physical_ptr;
91 Common::VirtualBuffer<u32> compressed_device_addr; 96 Common::VirtualBuffer<u32> compressed_device_addr;
92 97
98 // Process memory interfaces
99
93 std::deque<size_t> id_pool; 100 std::deque<size_t> id_pool;
94 std::deque<Memory::Memory*> registered_processes; 101 std::deque<Memory::Memory*> registered_processes;
102
103 // Memory protection management
104
105 static constexpr size_t guest_max_as_bits = 39;
106 static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits;
107 static constexpr size_t guest_mask = guest_as_size - 1ULL;
108 static constexpr size_t process_id_start_bit = guest_max_as_bits;
109
110 std::pair<size_t, VAddr> ExtractCPUBacking(size_t page_index) {
111 auto content = cpu_backing_address[page_index];
112 const VAddr address = content & guest_mask;
113 const size_t process_id = static_cast<size_t>(content >> process_id_start_bit);
114 return std::make_pair(process_id, address);
115 }
116
117 void InsertCPUBacking(size_t page_index, VAddr address, size_t process_id) {
118 cpu_backing_address[page_index] = address | (process_id << page_index);
119 }
120
121 Common::VirtualBuffer<VAddr> cpu_backing_address;
122 static constexpr size_t subentries = 4;
123 static constexpr size_t subentries_mask = subentries - 1;
124 class CounterEntry final {
125 public:
126 CounterEntry() = default;
127
128 std::atomic_uint16_t& Count(std::size_t page) {
129 return values[page & subentries_mask];
130 }
131
132 const std::atomic_uint16_t& Count(std::size_t page) const {
133 return values[page & subentries_mask];
134 }
135
136 private:
137 std::array<std::atomic_uint16_t, subentries> values{};
138 };
139 static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!");
140
141 static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries;
142 using CachedPages = std::array<CounterEntry, num_counter_entries>;
143 std::unique_ptr<CachedPages> cached_pages;
95}; 144};
96 145
97} // namespace Core \ No newline at end of file 146} // namespace Core \ No newline at end of file
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 1f52b92d5..77410f72f 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -2,12 +2,15 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <atomic> 4#include <atomic>
5#include <limits>
5#include <memory> 6#include <memory>
6#include <type_traits> 7#include <type_traits>
7 8
8#include "common/address_space.h" 9#include "common/address_space.h"
9#include "common/address_space.inc" 10#include "common/address_space.inc"
10#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/div_ceil.h"
11#include "common/scope_exit.h" 14#include "common/scope_exit.h"
12#include "core/device_memory.h" 15#include "core/device_memory.h"
13#include "core/device_memory_manager.h" 16#include "core/device_memory_manager.h"
@@ -51,7 +54,11 @@ struct DeviceMemoryManagerAllocator {
51 } 54 }
52 55
53 DAddr AllocatePinned(size_t size) { 56 DAddr AllocatePinned(size_t size) {
54 return pin_allocator.Allocate(size); 57 if constexpr (supports_pinning) {
58 return pin_allocator.Allocate(size);
59 } else {
60 return DAddr{};
61 }
55 } 62 }
56 63
57 void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { 64 void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) {
@@ -100,6 +107,7 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
100 interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), 107 interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
101 compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { 108 compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) {
102 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); 109 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
110 cached_pages = std::make_unique<CachedPages>();
103} 111}
104 112
105template <typename Traits> 113template <typename Traits>
@@ -132,14 +140,14 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
132 140
133template <typename Traits> 141template <typename Traits>
134void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, 142void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
135 size_t p_id) { 143 size_t process_id) {
136 Core::Memory::Memory* process_memory = registered_processes[p_id]; 144 Core::Memory::Memory* process_memory = registered_processes[process_id];
137 size_t start_page_d = address >> Memory::YUZU_PAGEBITS; 145 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
138 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; 146 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
139 std::atomic_thread_fence(std::memory_order_acquire); 147 std::atomic_thread_fence(std::memory_order_acquire);
140 for (size_t i = 0; i < num_pages; i++) { 148 for (size_t i = 0; i < num_pages; i++) {
141 auto* ptr = process_memory->GetPointer( 149 const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
142 Common::ProcessAddress(virtual_address + i * Memory::YUZU_PAGESIZE)); 150 auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress));
143 if (ptr == nullptr) [[unlikely]] { 151 if (ptr == nullptr) [[unlikely]] {
144 compressed_physical_ptr[start_page_d + i] = 0; 152 compressed_physical_ptr[start_page_d + i] = 0;
145 continue; 153 continue;
@@ -147,6 +155,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
147 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; 155 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
148 compressed_physical_ptr[start_page_d + i] = phys_addr; 156 compressed_physical_ptr[start_page_d + i] = phys_addr;
149 compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); 157 compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
158 InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
150 } 159 }
151 std::atomic_thread_fence(std::memory_order_release); 160 std::atomic_thread_fence(std::memory_order_release);
152} 161}
@@ -159,6 +168,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
159 for (size_t i = 0; i < num_pages; i++) { 168 for (size_t i = 0; i < num_pages; i++) {
160 auto phys_addr = compressed_physical_ptr[start_page_d + i]; 169 auto phys_addr = compressed_physical_ptr[start_page_d + i];
161 compressed_physical_ptr[start_page_d + i] = 0; 170 compressed_physical_ptr[start_page_d + i] = 0;
171 cpu_backing_address[start_page_d + i] = 0;
162 if (phys_addr != 0) { 172 if (phys_addr != 0) {
163 compressed_device_addr[phys_addr - 1] = 0; 173 compressed_device_addr[phys_addr - 1] = 0;
164 } 174 }
@@ -301,4 +311,66 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) {
301 id_pool.push_front(id); 311 id_pool.push_front(id);
302} 312}
303 313
314template <typename Traits>
315void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
316 u64 uncache_begin = 0;
317 u64 cache_begin = 0;
318 u64 uncache_bytes = 0;
319 u64 cache_bytes = 0;
320 const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
321
322 std::atomic_thread_fence(std::memory_order_acquire);
323 const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
324 size_t page = addr >> Memory::YUZU_PAGEBITS;
325 auto [process_id, base_vaddress] = ExtractCPUBacking(page);
326 size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
327 auto* memory_interface = registered_processes[process_id];
328 for (; page != page_end; ++page) {
329 std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
330
331 if (delta > 0) {
332 ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u16>::max(),
333 "Count may overflow!");
334 } else if (delta < 0) {
335 ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
336 } else {
337 ASSERT_MSG(false, "Delta must be non-zero!");
338 }
339
340 // Adds or subtracts 1, as count is a unsigned 8-bit value
341 count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
342
343 // Assume delta is either -1 or 1
344 if (count.load(std::memory_order::relaxed) == 0) {
345 if (uncache_bytes == 0) {
346 uncache_begin = vpage;
347 }
348 uncache_bytes += Memory::YUZU_PAGESIZE;
349 } else if (uncache_bytes > 0) {
350 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS,
351 uncache_bytes, false);
352 uncache_bytes = 0;
353 }
354 if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
355 if (cache_bytes == 0) {
356 cache_begin = vpage;
357 }
358 cache_bytes += Memory::YUZU_PAGESIZE;
359 } else if (cache_bytes > 0) {
360 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
361 true);
362 cache_bytes = 0;
363 }
364 vpage++;
365 }
366 if (uncache_bytes > 0) {
367 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
368 false);
369 }
370 if (cache_bytes > 0) {
371 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
372 true);
373 }
374}
375
304} // namespace Core \ No newline at end of file 376} // namespace Core \ No newline at end of file
diff --git a/src/video_core/host1x/gpu_device_memory_manager.cpp b/src/video_core/host1x/gpu_device_memory_manager.cpp
index 2ca445081..668c2f08b 100644
--- a/src/video_core/host1x/gpu_device_memory_manager.cpp
+++ b/src/video_core/host1x/gpu_device_memory_manager.cpp
@@ -5,6 +5,17 @@
5#include "video_core/host1x/gpu_device_memory_manager.h" 5#include "video_core/host1x/gpu_device_memory_manager.h"
6#include "video_core/rasterizer_interface.h" 6#include "video_core/rasterizer_interface.h"
7 7
8namespace Tegra {
9
10struct MaxwellDeviceMethods {
11 static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address,
12 size_t size, bool caching) {
13 interface->RasterizerMarkRegionCached(address, size, caching);
14 }
15};
16
17} // namespace Tegra
18
8template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; 19template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>;
9template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; 20template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>;
10 21
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h
index 30ad52017..2fb77605e 100644
--- a/src/video_core/host1x/gpu_device_memory_manager.h
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@@ -9,10 +9,13 @@ class RasterizerInterface;
9 9
10namespace Tegra { 10namespace Tegra {
11 11
12struct MaxwellDeviceMethods;
13
12struct MaxwellDeviceTraits { 14struct MaxwellDeviceTraits {
13 static constexpr bool supports_pinning = true; 15 static constexpr bool supports_pinning = true;
14 static constexpr size_t device_virtual_bits = 34; 16 static constexpr size_t device_virtual_bits = 34;
15 using DeviceInterface = typename VideoCore::RasterizerInterface; 17 using DeviceInterface = typename VideoCore::RasterizerInterface;
18 using DeviceMethods = typename MaxwellDeviceMethods;
16}; 19};
17 20
18using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; 21using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;