summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/multi_level_page_table.inc3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp45
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h1
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp10
-rw-r--r--src/video_core/memory_manager.cpp436
-rw-r--r--src/video_core/memory_manager.h57
6 files changed, 343 insertions, 209 deletions
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc
index 7fbcb908a..9a68cad93 100644
--- a/src/common/multi_level_page_table.inc
+++ b/src/common/multi_level_page_table.inc
@@ -19,6 +19,9 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit
19 std::size_t page_bits_) 19 std::size_t page_bits_)
20 : address_space_bits{address_space_bits_}, 20 : address_space_bits{address_space_bits_},
21 first_level_bits{first_level_bits_}, page_bits{page_bits_} { 21 first_level_bits{first_level_bits_}, page_bits{page_bits_} {
22 if (page_bits == 0) {
23 return;
24 }
22 first_level_shift = address_space_bits - first_level_bits; 25 first_level_shift = address_space_bits - first_level_bits;
23 first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); 26 first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr);
24 alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); 27 alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index db2a6c3b2..d95a88393 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -133,7 +133,8 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
133 const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; 133 const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
134 vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); 134 vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
135 135
136 gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); 136 gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits,
137 VM::PAGE_SIZE_BITS);
137 system.GPU().InitAddressSpace(*gmmu); 138 system.GPU().InitAddressSpace(*gmmu);
138 vm.initialised = true; 139 vm.initialised = true;
139 140
@@ -189,6 +190,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
189 .size = size, 190 .size = size,
190 .page_size = params.page_size, 191 .page_size = params.page_size,
191 .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, 192 .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
193 .big_pages = params.page_size != VM::YUZU_PAGESIZE,
192 }; 194 };
193 195
194 std::memcpy(output.data(), &params, output.size()); 196 std::memcpy(output.data(), &params, output.size());
@@ -209,7 +211,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
209 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state 211 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
210 // Only FreeSpace can unmap them fully 212 // Only FreeSpace can unmap them fully
211 if (mapping->sparse_alloc) 213 if (mapping->sparse_alloc)
212 gmmu->MapSparse(offset, mapping->size); 214 gmmu->MapSparse(offset, mapping->size, mapping->big_page);
213 else 215 else
214 gmmu->Unmap(offset, mapping->size); 216 gmmu->Unmap(offset, mapping->size);
215 217
@@ -294,8 +296,9 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
294 return NvResult::BadValue; 296 return NvResult::BadValue;
295 } 297 }
296 298
299 const bool use_big_pages = alloc->second.big_pages;
297 if (!entry.handle) { 300 if (!entry.handle) {
298 gmmu->MapSparse(virtual_address, size); 301 gmmu->MapSparse(virtual_address, size, use_big_pages);
299 } else { 302 } else {
300 auto handle{nvmap.GetHandle(entry.handle)}; 303 auto handle{nvmap.GetHandle(entry.handle)};
301 if (!handle) { 304 if (!handle) {
@@ -306,7 +309,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
306 handle->address + 309 handle->address +
307 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; 310 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
308 311
309 gmmu->Map(virtual_address, cpu_address, size); 312 gmmu->Map(virtual_address, cpu_address, size, use_big_pages);
310 } 313 }
311 } 314 }
312 315
@@ -345,7 +348,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
345 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; 348 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
346 VAddr cpu_address{mapping->ptr + params.buffer_offset}; 349 VAddr cpu_address{mapping->ptr + params.buffer_offset};
347 350
348 gmmu->Map(gpu_address, cpu_address, params.mapping_size); 351 gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page);
349 352
350 return NvResult::Success; 353 return NvResult::Success;
351 } catch ([[maybe_unused]] const std::out_of_range& e) { 354 } catch ([[maybe_unused]] const std::out_of_range& e) {
@@ -363,6 +366,17 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
363 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; 366 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
364 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; 367 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
365 368
369 bool big_page{[&]() {
370 if (Common::IsAligned(handle->align, vm.big_page_size))
371 return true;
372 else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
373 return false;
374 else {
375 UNREACHABLE();
376 return false;
377 }
378 }()};
379
366 if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { 380 if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
367 auto alloc{allocation_map.upper_bound(params.offset)}; 381 auto alloc{allocation_map.upper_bound(params.offset)};
368 382
@@ -372,23 +386,14 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
372 return NvResult::BadValue; 386 return NvResult::BadValue;
373 } 387 }
374 388
375 gmmu->Map(params.offset, cpu_address, size); 389 const bool use_big_pages = alloc->second.big_pages && big_page;
390 gmmu->Map(params.offset, cpu_address, size, use_big_pages);
376 391
377 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false, 392 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
378 alloc->second.sparse)}; 393 use_big_pages, alloc->second.sparse)};
379 alloc->second.mappings.push_back(mapping); 394 alloc->second.mappings.push_back(mapping);
380 mapping_map[params.offset] = mapping; 395 mapping_map[params.offset] = mapping;
381 } else { 396 } else {
382 bool big_page{[&]() {
383 if (Common::IsAligned(handle->align, vm.big_page_size))
384 return true;
385 else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
386 return false;
387 else {
388 UNREACHABLE();
389 return false;
390 }
391 }()};
392 397
393 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; 398 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
394 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; 399 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
@@ -402,7 +407,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
402 return NvResult::InsufficientMemory; 407 return NvResult::InsufficientMemory;
403 } 408 }
404 409
405 gmmu->Map(params.offset, cpu_address, size); 410 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page);
406 411
407 auto mapping{ 412 auto mapping{
408 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; 413 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
@@ -439,7 +444,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
439 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state 444 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
440 // Only FreeSpace can unmap them fully 445 // Only FreeSpace can unmap them fully
441 if (mapping->sparse_alloc) { 446 if (mapping->sparse_alloc) {
442 gmmu->MapSparse(params.offset, mapping->size); 447 gmmu->MapSparse(params.offset, mapping->size, mapping->big_page);
443 } else { 448 } else {
444 gmmu->Unmap(params.offset, mapping->size); 449 gmmu->Unmap(params.offset, mapping->size);
445 } 450 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 1d27739e2..12e881f0d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -177,6 +177,7 @@ private:
177 std::list<std::shared_ptr<Mapping>> mappings; 177 std::list<std::shared_ptr<Mapping>> mappings;
178 u32 page_size; 178 u32 page_size;
179 bool sparse; 179 bool sparse;
180 bool big_pages;
180 }; 181 };
181 182
182 std::map<u64, std::shared_ptr<Mapping>> 183 std::map<u64, std::shared_ptr<Mapping>>
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 279997e81..992c117f1 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -9,6 +9,8 @@
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "core/core.h" 11#include "core/core.h"
12#include "core/hle/kernel/k_page_table.h"
13#include "core/hle/kernel/k_process.h"
12#include "core/hle/service/nvdrv/core/container.h" 14#include "core/hle/service/nvdrv/core/container.h"
13#include "core/hle/service/nvdrv/core/nvmap.h" 15#include "core/hle/service/nvdrv/core/nvmap.h"
14#include "core/hle/service/nvdrv/devices/nvmap.h" 16#include "core/hle/service/nvdrv/devices/nvmap.h"
@@ -136,6 +138,10 @@ NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output)
136 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); 138 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
137 return result; 139 return result;
138 } 140 }
141 ASSERT(system.CurrentProcess()
142 ->PageTable()
143 .LockForDeviceAddressSpace(handle_description->address, handle_description->size)
144 .IsSuccess());
139 std::memcpy(output.data(), &params, sizeof(params)); 145 std::memcpy(output.data(), &params, sizeof(params));
140 return result; 146 return result;
141} 147}
@@ -256,6 +262,10 @@ NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) {
256 } 262 }
257 263
258 if (auto freeInfo{file.FreeHandle(params.handle, false)}) { 264 if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
265 ASSERT(system.CurrentProcess()
266 ->PageTable()
267 .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
268 .IsSuccess());
259 params.address = freeInfo->address; 269 params.address = freeInfo->address;
260 params.size = static_cast<u32>(freeInfo->size); 270 params.size = static_cast<u32>(freeInfo->size);
261 params.flags.raw = 0; 271 params.flags.raw = 0;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index b36067613..836ece136 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -7,6 +7,7 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/device_memory.h"
10#include "core/hle/kernel/k_page_table.h" 11#include "core/hle/kernel/k_page_table.h"
11#include "core/hle/kernel/k_process.h" 12#include "core/hle/kernel/k_process.h"
12#include "core/memory.h" 13#include "core/memory.h"
@@ -14,40 +15,69 @@
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
16 17
18#pragma optimize("", off)
19
17namespace Tegra { 20namespace Tegra {
18 21
19std::atomic<size_t> MemoryManager::unique_identifier_generator{}; 22std::atomic<size_t> MemoryManager::unique_identifier_generator{};
20 23
21MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) 24MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
22 : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, 25 u64 page_bits_)
23 page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits}, 26 : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()},
27 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
28 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
29 page_bits != big_page_bits ? page_bits : 0},
24 unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { 30 unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} {
25 address_space_size = 1ULL << address_space_bits; 31 address_space_size = 1ULL << address_space_bits;
26 allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0;
27 page_size = 1ULL << page_bits; 32 page_size = 1ULL << page_bits;
28 page_mask = page_size - 1ULL; 33 page_mask = page_size - 1ULL;
29 const u64 page_table_bits = address_space_bits - cpu_page_bits; 34 big_page_size = 1ULL << big_page_bits;
35 big_page_mask = big_page_size - 1ULL;
36 const u64 page_table_bits = address_space_bits - page_bits;
37 const u64 big_page_table_bits = address_space_bits - big_page_bits;
30 const u64 page_table_size = 1ULL << page_table_bits; 38 const u64 page_table_size = 1ULL << page_table_bits;
39 const u64 big_page_table_size = 1ULL << big_page_table_bits;
31 page_table_mask = page_table_size - 1; 40 page_table_mask = page_table_size - 1;
41 big_page_table_mask = big_page_table_size - 1;
32 42
43 big_entries.resize(big_page_table_size / 32, 0);
44 big_page_table_cpu.resize(big_page_table_size);
45 big_page_table_physical.resize(big_page_table_size);
33 entries.resize(page_table_size / 32, 0); 46 entries.resize(page_table_size / 32, 0);
34} 47}
35 48
36MemoryManager::~MemoryManager() = default; 49MemoryManager::~MemoryManager() = default;
37 50
51template <bool is_big_page>
38MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { 52MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const {
39 position = position >> page_bits; 53 if constexpr (is_big_page) {
40 const u64 entry_mask = entries[position / 32]; 54 position = position >> big_page_bits;
41 const size_t sub_index = position % 32; 55 const u64 entry_mask = big_entries[position / 32];
42 return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); 56 const size_t sub_index = position % 32;
57 return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
58 } else {
59 position = position >> page_bits;
60 const u64 entry_mask = entries[position / 32];
61 const size_t sub_index = position % 32;
62 return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL);
63 }
43} 64}
44 65
66template <bool is_big_page>
45void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { 67void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
46 position = position >> page_bits; 68 if constexpr (is_big_page) {
47 const u64 entry_mask = entries[position / 32]; 69 position = position >> big_page_bits;
48 const size_t sub_index = position % 32; 70 const u64 entry_mask = big_entries[position / 32];
49 entries[position / 32] = 71 const size_t sub_index = position % 32;
50 (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); 72 big_entries[position / 32] =
73 (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
74 } else {
75 position = position >> page_bits;
76 const u64 entry_mask = entries[position / 32];
77 const size_t sub_index = position % 32;
78 entries[position / 32] =
79 (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2);
80 }
51} 81}
52 82
53template <MemoryManager::EntryType entry_type> 83template <MemoryManager::EntryType entry_type>
@@ -59,48 +89,66 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
59 } 89 }
60 for (u64 offset{}; offset < size; offset += page_size) { 90 for (u64 offset{}; offset < size; offset += page_size) {
61 const GPUVAddr current_gpu_addr = gpu_addr + offset; 91 const GPUVAddr current_gpu_addr = gpu_addr + offset;
62 [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); 92 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
63 SetEntry(current_gpu_addr, entry_type); 93 SetEntry<false>(current_gpu_addr, entry_type);
64 if (current_entry_type != entry_type) { 94 if (current_entry_type != entry_type) {
65 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); 95 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
66 } 96 }
67 if constexpr (entry_type == EntryType::Mapped) { 97 if constexpr (entry_type == EntryType::Mapped) {
68 const VAddr current_cpu_addr = cpu_addr + offset; 98 const VAddr current_cpu_addr = cpu_addr + offset;
69 const auto index = PageEntryIndex(current_gpu_addr); 99 const auto index = PageEntryIndex<false>(current_gpu_addr);
70 const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL); 100 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
71 if (current_entry_type == entry_type && sub_value != page_table[index]) { 101 page_table[index] = sub_value;
72 rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL,
73 page_size);
74 }
75 page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL);
76 } 102 }
77 remaining_size -= page_size; 103 remaining_size -= page_size;
78 } 104 }
79 return gpu_addr; 105 return gpu_addr;
80} 106}
81 107
108template <MemoryManager::EntryType entry_type>
109GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
110 size_t size) {
111 u64 remaining_size{size};
112 for (u64 offset{}; offset < size; offset += big_page_size) {
113 const GPUVAddr current_gpu_addr = gpu_addr + offset;
114 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
115 SetEntry<true>(current_gpu_addr, entry_type);
116 if (current_entry_type != entry_type) {
117 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
118 }
119 if constexpr (entry_type == EntryType::Mapped) {
120 const VAddr current_cpu_addr = cpu_addr + offset;
121 const auto index = PageEntryIndex<true>(current_gpu_addr);
122 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits);
123 big_page_table_cpu[index] = sub_value;
124 const PAddr phys_address =
125 device_memory.GetPhysicalAddr(memory.GetPointer(current_cpu_addr));
126 big_page_table_physical[index] = static_cast<u32>(phys_address);
127 }
128 remaining_size -= big_page_size;
129 }
130 return gpu_addr;
131}
132
82void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { 133void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
83 rasterizer = rasterizer_; 134 rasterizer = rasterizer_;
84} 135}
85 136
86GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { 137GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
138 bool is_big_pages) {
139 if (is_big_pages) [[likely]] {
140 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
141 }
87 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); 142 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
88} 143}
89 144
90GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { 145GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
146 if (is_big_pages) [[likely]] {
147 return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
148 }
91 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); 149 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
92} 150}
93 151
94GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
95 return Map(*FindFreeRange(size, align), cpu_addr, size);
96}
97
98GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
99 const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
100 ASSERT(gpu_addr);
101 return Map(*gpu_addr, cpu_addr, size);
102}
103
104void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { 152void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
105 if (size == 0) { 153 if (size == 0) {
106 return; 154 return;
@@ -115,61 +163,24 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
115 rasterizer->UnmapMemory(*cpu_addr, map_size); 163 rasterizer->UnmapMemory(*cpu_addr, map_size);
116 } 164 }
117 165
166 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size);
118 PageTableOp<EntryType::Free>(gpu_addr, 0, size); 167 PageTableOp<EntryType::Free>(gpu_addr, 0, size);
119} 168}
120 169
121std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { 170std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
122 for (u64 offset{}; offset < size; offset += page_size) { 171 if (GetEntry<true>(gpu_addr) != EntryType::Mapped) [[unlikely]] {
123 if (GetEntry(gpu_addr + offset) != EntryType::Free) { 172 if (GetEntry<false>(gpu_addr) != EntryType::Mapped) {
124 return std::nullopt; 173 return std::nullopt;
125 } 174 }
126 }
127 175
128 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); 176 const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)])
129} 177 << cpu_page_bits;
130 178 return cpu_addr_base + (gpu_addr & page_mask);
131GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) {
132 return *AllocateFixed(*FindFreeRange(size, align), size);
133}
134
135std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
136 bool start_32bit_address) const {
137 if (!align) {
138 align = page_size;
139 } else {
140 align = Common::AlignUp(align, page_size);
141 }
142
143 u64 available_size{};
144 GPUVAddr gpu_addr{start_32bit_address ? 0 : allocate_start};
145 while (gpu_addr + available_size < address_space_size) {
146 if (GetEntry(gpu_addr + available_size) == EntryType::Free) {
147 available_size += page_size;
148
149 if (available_size >= size) {
150 return gpu_addr;
151 }
152 } else {
153 gpu_addr += available_size + page_size;
154 available_size = 0;
155
156 const auto remainder{gpu_addr % align};
157 if (remainder) {
158 gpu_addr = (gpu_addr - remainder) + align;
159 }
160 }
161 }
162
163 return std::nullopt;
164}
165
166std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
167 if (GetEntry(gpu_addr) != EntryType::Mapped) {
168 return std::nullopt;
169 } 179 }
170 180
171 const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex(gpu_addr)]) << 12ULL; 181 const VAddr cpu_addr_base =
172 return cpu_addr_base + (gpu_addr & page_mask); 182 static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits;
183 return cpu_addr_base + (gpu_addr & big_page_mask);
173} 184}
174 185
175std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { 186std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
@@ -225,7 +236,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
225 return {}; 236 return {};
226 } 237 }
227 238
228 return system.Memory().GetPointer(*address); 239 return memory.GetPointer(*address);
229} 240}
230 241
231const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { 242const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
@@ -234,98 +245,161 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
234 return {}; 245 return {};
235 } 246 }
236 247
237 return system.Memory().GetPointer(*address); 248 return memory.GetPointer(*address);
238} 249}
239 250
240void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 251#pragma inline_recursion(on)
241 bool is_safe) const { 252
253template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
254inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size,
255 FuncMapped&& func_mapped, FuncReserved&& func_reserved,
256 FuncUnmapped&& func_unmapped) const {
257 u64 used_page_size;
258 u64 used_page_mask;
259 u64 used_page_bits;
260 if constexpr (is_big_pages) {
261 used_page_size = big_page_size;
262 used_page_mask = big_page_mask;
263 used_page_bits = big_page_bits;
264 } else {
265 used_page_size = page_size;
266 used_page_mask = page_mask;
267 used_page_bits = page_bits;
268 }
242 std::size_t remaining_size{size}; 269 std::size_t remaining_size{size};
243 std::size_t page_index{gpu_src_addr >> page_bits}; 270 std::size_t page_index{gpu_src_addr >> used_page_bits};
244 std::size_t page_offset{gpu_src_addr & page_mask}; 271 std::size_t page_offset{gpu_src_addr & used_page_mask};
272 GPUVAddr current_address = gpu_src_addr;
245 273
246 while (remaining_size > 0) { 274 while (remaining_size > 0) {
247 const std::size_t copy_amount{ 275 const std::size_t copy_amount{
248 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 276 std::min(static_cast<std::size_t>(used_page_size) - page_offset, remaining_size)};
249 const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; 277 auto entry = GetEntry<is_big_pages>(current_address);
250 if (page_addr) { 278 if (entry == EntryType::Mapped) [[likely]] {
251 const auto src_addr{*page_addr + page_offset}; 279 func_mapped(page_index, page_offset, copy_amount);
252 if (is_safe) { 280 } else if (entry == EntryType::Reserved) {
253 // Flush must happen on the rasterizer interface, such that memory is always 281 func_reserved(page_index, page_offset, copy_amount);
254 // synchronous when it is read (even when in asynchronous GPU mode). 282 } else [[unlikely]] {
255 // Fixes Dead Cells title menu. 283 func_unmapped(page_index, page_offset, copy_amount);
256 rasterizer->FlushRegion(src_addr, copy_amount);
257 }
258 system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
259 } else {
260 std::memset(dest_buffer, 0, copy_amount);
261 } 284 }
262
263 page_index++; 285 page_index++;
264 page_offset = 0; 286 page_offset = 0;
265 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
266 remaining_size -= copy_amount; 287 remaining_size -= copy_amount;
288 current_address += copy_amount;
267 } 289 }
268} 290}
269 291
292template <bool is_safe>
293void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer,
294 std::size_t size) const {
295 auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
296 [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
297 std::memset(dest_buffer, 0, copy_amount);
298 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
299 };
300 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
301 const VAddr cpu_addr_base =
302 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
303 if constexpr (is_safe) {
304 rasterizer->FlushRegion(cpu_addr_base, copy_amount);
305 }
306 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
307 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
308 };
309 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
310 const VAddr cpu_addr_base =
311 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
312 if constexpr (is_safe) {
313 rasterizer->FlushRegion(cpu_addr_base, copy_amount);
314 }
315 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
316 // u8* physical = device_memory.GetPointer(big_page_table_physical[page_index] + offset);
317 // std::memcpy(dest_buffer, physical, copy_amount);
318 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
319 };
320 auto read_short_pages = [&](std::size_t page_index, std::size_t offset,
321 std::size_t copy_amount) {
322 GPUVAddr base = (page_index << big_page_bits) + offset;
323 MemoryOperation<false>(base, copy_amount, mapped_normal, set_to_zero, set_to_zero);
324 };
325 MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages);
326}
327
270void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { 328void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const {
271 ReadBlockImpl(gpu_src_addr, dest_buffer, size, true); 329 ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size);
272} 330}
273 331
274void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, 332void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
275 const std::size_t size) const { 333 const std::size_t size) const {
276 ReadBlockImpl(gpu_src_addr, dest_buffer, size, false); 334 ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size);
277} 335}
278 336
279void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, 337template <bool is_safe>
280 bool is_safe) { 338void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer,
281 std::size_t remaining_size{size}; 339 std::size_t size) {
282 std::size_t page_index{gpu_dest_addr >> page_bits}; 340 auto just_advance = [&]([[maybe_unused]] std::size_t page_index,
283 std::size_t page_offset{gpu_dest_addr & page_mask}; 341 [[maybe_unused]] std::size_t offset, std::size_t copy_amount) {
284 342 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
285 while (remaining_size > 0) { 343 };
286 const std::size_t copy_amount{ 344 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
287 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 345 const VAddr cpu_addr_base =
288 const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; 346 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
289 if (page_addr) { 347 if constexpr (is_safe) {
290 const auto dest_addr{*page_addr + page_offset}; 348 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
291
292 if (is_safe) {
293 // Invalidate must happen on the rasterizer interface, such that memory is always
294 // synchronous when it is written (even when in asynchronous GPU mode).
295 rasterizer->InvalidateRegion(dest_addr, copy_amount);
296 }
297 system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
298 } 349 }
299 350 memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
300 page_index++;
301 page_offset = 0;
302 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 351 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
303 remaining_size -= copy_amount; 352 };
304 } 353 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
354 const VAddr cpu_addr_base =
355 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
356 if constexpr (is_safe) {
357 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount);
358 }
359 memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount);
360 /*u8* physical =
361 device_memory.GetPointer(big_page_table_physical[page_index] << cpu_page_bits) + offset;
362 std::memcpy(physical, src_buffer, copy_amount);*/
363 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
364 };
365 auto write_short_pages = [&](std::size_t page_index, std::size_t offset,
366 std::size_t copy_amount) {
367 GPUVAddr base = (page_index << big_page_bits) + offset;
368 MemoryOperation<false>(base, copy_amount, mapped_normal, just_advance, just_advance);
369 };
370 MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages);
305} 371}
306 372
307void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { 373void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) {
308 WriteBlockImpl(gpu_dest_addr, src_buffer, size, true); 374 WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size);
309} 375}
310 376
311void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, 377void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
312 std::size_t size) { 378 std::size_t size) {
313 WriteBlockImpl(gpu_dest_addr, src_buffer, size, false); 379 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size);
314} 380}
315 381
316void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { 382void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const {
317 size_t remaining_size{size}; 383 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
318 size_t page_index{gpu_addr >> page_bits}; 384 [[maybe_unused]] std::size_t offset,
319 size_t page_offset{gpu_addr & page_mask}; 385 [[maybe_unused]] std::size_t copy_amount) {};
320 while (remaining_size > 0) { 386
321 const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; 387 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
322 if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { 388 const VAddr cpu_addr_base =
323 rasterizer->FlushRegion(*page_addr + page_offset, num_bytes); 389 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
324 } 390 rasterizer->FlushRegion(cpu_addr_base, copy_amount);
325 ++page_index; 391 };
326 page_offset = 0; 392 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
327 remaining_size -= num_bytes; 393 const VAddr cpu_addr_base =
328 } 394 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
395 rasterizer->FlushRegion(cpu_addr_base, copy_amount);
396 };
397 auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
398 std::size_t copy_amount) {
399 GPUVAddr base = (page_index << big_page_bits) + offset;
400 MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing);
401 };
402 MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages);
329} 403}
330 404
331void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { 405void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) {
@@ -348,7 +422,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
348} 422}
349 423
350bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { 424bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
351 size_t page_index{gpu_addr >> page_bits}; 425 size_t page_index{gpu_addr >> big_page_bits};
352 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; 426 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
353 std::optional<VAddr> old_page_addr{}; 427 std::optional<VAddr> old_page_addr{};
354 while (page_index != page_last) { 428 while (page_index != page_last) {
@@ -371,7 +445,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
371 size_t page_index{gpu_addr >> page_bits}; 445 size_t page_index{gpu_addr >> page_bits};
372 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; 446 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
373 while (page_index < page_last) { 447 while (page_index < page_last) {
374 if (GetEntry(page_index << page_bits) == EntryType::Free) { 448 if (GetEntry<false>(page_index << page_bits) == EntryType::Free) {
375 return false; 449 return false;
376 } 450 }
377 ++page_index; 451 ++page_index;
@@ -379,47 +453,63 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
379 return true; 453 return true;
380} 454}
381 455
456#pragma inline_recursion(on)
457
382std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 458std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
383 GPUVAddr gpu_addr, std::size_t size) const { 459 GPUVAddr gpu_addr, std::size_t size) const {
384 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 460 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
385 size_t page_index{gpu_addr >> page_bits};
386 size_t remaining_size{size};
387 size_t page_offset{gpu_addr & page_mask};
388 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; 461 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
389 std::optional<VAddr> old_page_addr{}; 462 std::optional<VAddr> old_page_addr{};
390 const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) { 463 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
391 if (!last_segment) { 464 [[maybe_unused]] std::size_t offset,
392 const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset; 465 [[maybe_unused]] std::size_t copy_amount) {
393 last_segment = {new_base_addr, bytes};
394 } else {
395 last_segment->second += bytes;
396 }
397 };
398 const auto split = [&last_segment, &result] {
399 if (last_segment) { 466 if (last_segment) {
400 result.push_back(*last_segment); 467 result.push_back(*last_segment);
401 last_segment = std::nullopt; 468 last_segment = std::nullopt;
402 } 469 }
403 }; 470 };
404 while (remaining_size > 0) { 471 const auto extend_size_big = [this, &split, &old_page_addr,
405 const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; 472 &last_segment](std::size_t page_index, std::size_t offset,
406 const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; 473 std::size_t copy_amount) {
407 if (!page_addr || *page_addr == 0) { 474 const VAddr cpu_addr_base =
408 split(); 475 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset;
409 } else if (old_page_addr) { 476 if (old_page_addr) {
410 if (*old_page_addr + page_size != *page_addr) { 477 if (*old_page_addr != cpu_addr_base) {
411 split(); 478 split(0, 0, 0);
412 } 479 }
413 extend_size(num_bytes); 480 }
481 old_page_addr = {cpu_addr_base + copy_amount};
482 if (!last_segment) {
483 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
484 last_segment = {new_base_addr, copy_amount};
414 } else { 485 } else {
415 extend_size(num_bytes); 486 last_segment->second += copy_amount;
416 } 487 }
417 ++page_index; 488 };
418 page_offset = 0; 489 const auto extend_size_short = [this, &split, &old_page_addr,
419 remaining_size -= num_bytes; 490 &last_segment](std::size_t page_index, std::size_t offset,
420 old_page_addr = page_addr; 491 std::size_t copy_amount) {
421 } 492 const VAddr cpu_addr_base =
422 split(); 493 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset;
494 if (old_page_addr) {
495 if (*old_page_addr != cpu_addr_base) {
496 split(0, 0, 0);
497 }
498 }
499 old_page_addr = {cpu_addr_base + copy_amount};
500 if (!last_segment) {
501 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
502 last_segment = {new_base_addr, copy_amount};
503 } else {
504 last_segment->second += copy_amount;
505 }
506 };
507 auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
508 GPUVAddr base = (page_index << big_page_bits) + offset;
509 MemoryOperation<false>(base, copy_amount, extend_size_short, split, split);
510 };
511 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
512 split(0, 0, 0);
423 return result; 513 return result;
424} 514}
425 515
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 56604ef3e..9c388a06e 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,21 +10,26 @@
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/multi_level_page_table.h" 12#include "common/multi_level_page_table.h"
13#include "common/virtual_buffer.h"
13 14
14namespace VideoCore { 15namespace VideoCore {
15class RasterizerInterface; 16class RasterizerInterface;
16} 17}
17 18
18namespace Core { 19namespace Core {
20class DeviceMemory;
21namespace Memory {
22class Memory;
23} // namespace Memory
19class System; 24class System;
20} 25} // namespace Core
21 26
22namespace Tegra { 27namespace Tegra {
23 28
24class MemoryManager final { 29class MemoryManager final {
25public: 30public:
26 explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, 31 explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
27 u64 page_bits_ = 16); 32 u64 big_page_bits_ = 16, u64 page_bits_ = 12);
28 ~MemoryManager(); 33 ~MemoryManager();
29 34
30 size_t GetID() const { 35 size_t GetID() const {
@@ -93,12 +98,8 @@ public:
93 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 98 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
94 std::size_t size) const; 99 std::size_t size) const;
95 100
96 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); 101 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true);
97 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); 102 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
98 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
99 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
100 [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
101 [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
102 void Unmap(GPUVAddr gpu_addr, std::size_t size); 103 void Unmap(GPUVAddr gpu_addr, std::size_t size);
103 104
104 void FlushRegion(GPUVAddr gpu_addr, size_t size) const; 105 void FlushRegion(GPUVAddr gpu_addr, size_t size) const;
@@ -107,26 +108,42 @@ private:
107 [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, 108 [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align,
108 bool start_32bit_address = false) const; 109 bool start_32bit_address = false) const;
109 110
110 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 111 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
111 bool is_safe) const; 112 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
112 void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, 113 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
113 bool is_safe); 114
115 template <bool is_safe>
116 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
117
118 template <bool is_safe>
119 void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
114 120
121 template <bool is_big_page>
115 [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { 122 [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const {
116 return (gpu_addr >> page_bits) & page_table_mask; 123 if constexpr (is_big_page) {
124 return (gpu_addr >> big_page_bits) & big_page_table_mask;
125 } else {
126 return (gpu_addr >> page_bits) & page_table_mask;
127 }
117 } 128 }
118 129
119 Core::System& system; 130 Core::System& system;
131 Core::Memory::Memory& memory;
132 Core::DeviceMemory& device_memory;
120 133
121 const u64 address_space_bits; 134 const u64 address_space_bits;
122 const u64 page_bits; 135 const u64 page_bits;
123 u64 address_space_size; 136 u64 address_space_size;
124 u64 allocate_start;
125 u64 page_size; 137 u64 page_size;
126 u64 page_mask; 138 u64 page_mask;
127 u64 page_table_mask; 139 u64 page_table_mask;
128 static constexpr u64 cpu_page_bits{12}; 140 static constexpr u64 cpu_page_bits{12};
129 141
142 const u64 big_page_bits;
143 u64 big_page_size;
144 u64 big_page_mask;
145 u64 big_page_table_mask;
146
130 VideoCore::RasterizerInterface* rasterizer = nullptr; 147 VideoCore::RasterizerInterface* rasterizer = nullptr;
131 148
132 enum class EntryType : u64 { 149 enum class EntryType : u64 {
@@ -136,15 +153,23 @@ private:
136 }; 153 };
137 154
138 std::vector<u64> entries; 155 std::vector<u64> entries;
156 std::vector<u64> big_entries;
139 157
140 template <EntryType entry_type> 158 template <EntryType entry_type>
141 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); 159 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
142 160
143 EntryType GetEntry(size_t position) const; 161 template <EntryType entry_type>
162 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size);
163
164 template <bool is_big_page>
165 inline EntryType GetEntry(size_t position) const;
144 166
145 void SetEntry(size_t position, EntryType entry); 167 template <bool is_big_page>
168 inline void SetEntry(size_t position, EntryType entry);
146 169
147 Common::MultiLevelPageTable<u32> page_table; 170 Common::MultiLevelPageTable<u32> page_table;
171 Common::VirtualBuffer<u32> big_page_table_cpu;
172 Common::VirtualBuffer<u32> big_page_table_physical;
148 173
149 const size_t unique_identifier; 174 const size_t unique_identifier;
150 175