diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/multi_level_page_table.inc | 3 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 45 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 1 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvmap.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 436 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 57 |
6 files changed, 343 insertions, 209 deletions
diff --git a/src/common/multi_level_page_table.inc b/src/common/multi_level_page_table.inc index 7fbcb908a..9a68cad93 100644 --- a/src/common/multi_level_page_table.inc +++ b/src/common/multi_level_page_table.inc | |||
| @@ -19,6 +19,9 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit | |||
| 19 | std::size_t page_bits_) | 19 | std::size_t page_bits_) |
| 20 | : address_space_bits{address_space_bits_}, | 20 | : address_space_bits{address_space_bits_}, |
| 21 | first_level_bits{first_level_bits_}, page_bits{page_bits_} { | 21 | first_level_bits{first_level_bits_}, page_bits{page_bits_} { |
| 22 | if (page_bits == 0) { | ||
| 23 | return; | ||
| 24 | } | ||
| 22 | first_level_shift = address_space_bits - first_level_bits; | 25 | first_level_shift = address_space_bits - first_level_bits; |
| 23 | first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); | 26 | first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); |
| 24 | alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); | 27 | alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index db2a6c3b2..d95a88393 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -133,7 +133,8 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>& | |||
| 133 | const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; | 133 | const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; |
| 134 | vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); | 134 | vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); |
| 135 | 135 | ||
| 136 | gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); | 136 | gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits, |
| 137 | VM::PAGE_SIZE_BITS); | ||
| 137 | system.GPU().InitAddressSpace(*gmmu); | 138 | system.GPU().InitAddressSpace(*gmmu); |
| 138 | vm.initialised = true; | 139 | vm.initialised = true; |
| 139 | 140 | ||
| @@ -189,6 +190,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector< | |||
| 189 | .size = size, | 190 | .size = size, |
| 190 | .page_size = params.page_size, | 191 | .page_size = params.page_size, |
| 191 | .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, | 192 | .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, |
| 193 | .big_pages = params.page_size != VM::YUZU_PAGESIZE, | ||
| 192 | }; | 194 | }; |
| 193 | 195 | ||
| 194 | std::memcpy(output.data(), ¶ms, output.size()); | 196 | std::memcpy(output.data(), ¶ms, output.size()); |
| @@ -209,7 +211,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | |||
| 209 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | 211 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |
| 210 | // Only FreeSpace can unmap them fully | 212 | // Only FreeSpace can unmap them fully |
| 211 | if (mapping->sparse_alloc) | 213 | if (mapping->sparse_alloc) |
| 212 | gmmu->MapSparse(offset, mapping->size); | 214 | gmmu->MapSparse(offset, mapping->size, mapping->big_page); |
| 213 | else | 215 | else |
| 214 | gmmu->Unmap(offset, mapping->size); | 216 | gmmu->Unmap(offset, mapping->size); |
| 215 | 217 | ||
| @@ -294,8 +296,9 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||
| 294 | return NvResult::BadValue; | 296 | return NvResult::BadValue; |
| 295 | } | 297 | } |
| 296 | 298 | ||
| 299 | const bool use_big_pages = alloc->second.big_pages; | ||
| 297 | if (!entry.handle) { | 300 | if (!entry.handle) { |
| 298 | gmmu->MapSparse(virtual_address, size); | 301 | gmmu->MapSparse(virtual_address, size, use_big_pages); |
| 299 | } else { | 302 | } else { |
| 300 | auto handle{nvmap.GetHandle(entry.handle)}; | 303 | auto handle{nvmap.GetHandle(entry.handle)}; |
| 301 | if (!handle) { | 304 | if (!handle) { |
| @@ -306,7 +309,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||
| 306 | handle->address + | 309 | handle->address + |
| 307 | (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | 310 | (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |
| 308 | 311 | ||
| 309 | gmmu->Map(virtual_address, cpu_address, size); | 312 | gmmu->Map(virtual_address, cpu_address, size, use_big_pages); |
| 310 | } | 313 | } |
| 311 | } | 314 | } |
| 312 | 315 | ||
| @@ -345,7 +348,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 345 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | 348 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |
| 346 | VAddr cpu_address{mapping->ptr + params.buffer_offset}; | 349 | VAddr cpu_address{mapping->ptr + params.buffer_offset}; |
| 347 | 350 | ||
| 348 | gmmu->Map(gpu_address, cpu_address, params.mapping_size); | 351 | gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page); |
| 349 | 352 | ||
| 350 | return NvResult::Success; | 353 | return NvResult::Success; |
| 351 | } catch ([[maybe_unused]] const std::out_of_range& e) { | 354 | } catch ([[maybe_unused]] const std::out_of_range& e) { |
| @@ -363,6 +366,17 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 363 | VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; | 366 | VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; |
| 364 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | 367 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |
| 365 | 368 | ||
| 369 | bool big_page{[&]() { | ||
| 370 | if (Common::IsAligned(handle->align, vm.big_page_size)) | ||
| 371 | return true; | ||
| 372 | else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) | ||
| 373 | return false; | ||
| 374 | else { | ||
| 375 | UNREACHABLE(); | ||
| 376 | return false; | ||
| 377 | } | ||
| 378 | }()}; | ||
| 379 | |||
| 366 | if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { | 380 | if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { |
| 367 | auto alloc{allocation_map.upper_bound(params.offset)}; | 381 | auto alloc{allocation_map.upper_bound(params.offset)}; |
| 368 | 382 | ||
| @@ -372,23 +386,14 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 372 | return NvResult::BadValue; | 386 | return NvResult::BadValue; |
| 373 | } | 387 | } |
| 374 | 388 | ||
| 375 | gmmu->Map(params.offset, cpu_address, size); | 389 | const bool use_big_pages = alloc->second.big_pages && big_page; |
| 390 | gmmu->Map(params.offset, cpu_address, size, use_big_pages); | ||
| 376 | 391 | ||
| 377 | auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false, | 392 | auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, |
| 378 | alloc->second.sparse)}; | 393 | use_big_pages, alloc->second.sparse)}; |
| 379 | alloc->second.mappings.push_back(mapping); | 394 | alloc->second.mappings.push_back(mapping); |
| 380 | mapping_map[params.offset] = mapping; | 395 | mapping_map[params.offset] = mapping; |
| 381 | } else { | 396 | } else { |
| 382 | bool big_page{[&]() { | ||
| 383 | if (Common::IsAligned(handle->align, vm.big_page_size)) | ||
| 384 | return true; | ||
| 385 | else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) | ||
| 386 | return false; | ||
| 387 | else { | ||
| 388 | UNREACHABLE(); | ||
| 389 | return false; | ||
| 390 | } | ||
| 391 | }()}; | ||
| 392 | 397 | ||
| 393 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | 398 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; |
| 394 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | 399 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; |
| @@ -402,7 +407,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 402 | return NvResult::InsufficientMemory; | 407 | return NvResult::InsufficientMemory; |
| 403 | } | 408 | } |
| 404 | 409 | ||
| 405 | gmmu->Map(params.offset, cpu_address, size); | 410 | gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page); |
| 406 | 411 | ||
| 407 | auto mapping{ | 412 | auto mapping{ |
| 408 | std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | 413 | std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; |
| @@ -439,7 +444,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 | |||
| 439 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | 444 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |
| 440 | // Only FreeSpace can unmap them fully | 445 | // Only FreeSpace can unmap them fully |
| 441 | if (mapping->sparse_alloc) { | 446 | if (mapping->sparse_alloc) { |
| 442 | gmmu->MapSparse(params.offset, mapping->size); | 447 | gmmu->MapSparse(params.offset, mapping->size, mapping->big_page); |
| 443 | } else { | 448 | } else { |
| 444 | gmmu->Unmap(params.offset, mapping->size); | 449 | gmmu->Unmap(params.offset, mapping->size); |
| 445 | } | 450 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 1d27739e2..12e881f0d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -177,6 +177,7 @@ private: | |||
| 177 | std::list<std::shared_ptr<Mapping>> mappings; | 177 | std::list<std::shared_ptr<Mapping>> mappings; |
| 178 | u32 page_size; | 178 | u32 page_size; |
| 179 | bool sparse; | 179 | bool sparse; |
| 180 | bool big_pages; | ||
| 180 | }; | 181 | }; |
| 181 | 182 | ||
| 182 | std::map<u64, std::shared_ptr<Mapping>> | 183 | std::map<u64, std::shared_ptr<Mapping>> |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 279997e81..992c117f1 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "core/core.h" | 11 | #include "core/core.h" |
| 12 | #include "core/hle/kernel/k_page_table.h" | ||
| 13 | #include "core/hle/kernel/k_process.h" | ||
| 12 | #include "core/hle/service/nvdrv/core/container.h" | 14 | #include "core/hle/service/nvdrv/core/container.h" |
| 13 | #include "core/hle/service/nvdrv/core/nvmap.h" | 15 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 14 | #include "core/hle/service/nvdrv/devices/nvmap.h" | 16 | #include "core/hle/service/nvdrv/devices/nvmap.h" |
| @@ -136,6 +138,10 @@ NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 136 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | 138 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); |
| 137 | return result; | 139 | return result; |
| 138 | } | 140 | } |
| 141 | ASSERT(system.CurrentProcess() | ||
| 142 | ->PageTable() | ||
| 143 | .LockForDeviceAddressSpace(handle_description->address, handle_description->size) | ||
| 144 | .IsSuccess()); | ||
| 139 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 145 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 140 | return result; | 146 | return result; |
| 141 | } | 147 | } |
| @@ -256,6 +262,10 @@ NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) { | |||
| 256 | } | 262 | } |
| 257 | 263 | ||
| 258 | if (auto freeInfo{file.FreeHandle(params.handle, false)}) { | 264 | if (auto freeInfo{file.FreeHandle(params.handle, false)}) { |
| 265 | ASSERT(system.CurrentProcess() | ||
| 266 | ->PageTable() | ||
| 267 | .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) | ||
| 268 | .IsSuccess()); | ||
| 259 | params.address = freeInfo->address; | 269 | params.address = freeInfo->address; |
| 260 | params.size = static_cast<u32>(freeInfo->size); | 270 | params.size = static_cast<u32>(freeInfo->size); |
| 261 | params.flags.raw = 0; | 271 | params.flags.raw = 0; |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index b36067613..836ece136 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/device_memory.h" | ||
| 10 | #include "core/hle/kernel/k_page_table.h" | 11 | #include "core/hle/kernel/k_page_table.h" |
| 11 | #include "core/hle/kernel/k_process.h" | 12 | #include "core/hle/kernel/k_process.h" |
| 12 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| @@ -14,40 +15,69 @@ | |||
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| 16 | 17 | ||
| 18 | #pragma optimize("", off) | ||
| 19 | |||
| 17 | namespace Tegra { | 20 | namespace Tegra { |
| 18 | 21 | ||
| 19 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | 22 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; |
| 20 | 23 | ||
| 21 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) | 24 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, |
| 22 | : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, | 25 | u64 page_bits_) |
| 23 | page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits}, | 26 | : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()}, |
| 27 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | ||
| 28 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | ||
| 29 | page_bits != big_page_bits ? page_bits : 0}, | ||
| 24 | unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { | 30 | unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { |
| 25 | address_space_size = 1ULL << address_space_bits; | 31 | address_space_size = 1ULL << address_space_bits; |
| 26 | allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0; | ||
| 27 | page_size = 1ULL << page_bits; | 32 | page_size = 1ULL << page_bits; |
| 28 | page_mask = page_size - 1ULL; | 33 | page_mask = page_size - 1ULL; |
| 29 | const u64 page_table_bits = address_space_bits - cpu_page_bits; | 34 | big_page_size = 1ULL << big_page_bits; |
| 35 | big_page_mask = big_page_size - 1ULL; | ||
| 36 | const u64 page_table_bits = address_space_bits - page_bits; | ||
| 37 | const u64 big_page_table_bits = address_space_bits - big_page_bits; | ||
| 30 | const u64 page_table_size = 1ULL << page_table_bits; | 38 | const u64 page_table_size = 1ULL << page_table_bits; |
| 39 | const u64 big_page_table_size = 1ULL << big_page_table_bits; | ||
| 31 | page_table_mask = page_table_size - 1; | 40 | page_table_mask = page_table_size - 1; |
| 41 | big_page_table_mask = big_page_table_size - 1; | ||
| 32 | 42 | ||
| 43 | big_entries.resize(big_page_table_size / 32, 0); | ||
| 44 | big_page_table_cpu.resize(big_page_table_size); | ||
| 45 | big_page_table_physical.resize(big_page_table_size); | ||
| 33 | entries.resize(page_table_size / 32, 0); | 46 | entries.resize(page_table_size / 32, 0); |
| 34 | } | 47 | } |
| 35 | 48 | ||
| 36 | MemoryManager::~MemoryManager() = default; | 49 | MemoryManager::~MemoryManager() = default; |
| 37 | 50 | ||
| 51 | template <bool is_big_page> | ||
| 38 | MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { | 52 | MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { |
| 39 | position = position >> page_bits; | 53 | if constexpr (is_big_page) { |
| 40 | const u64 entry_mask = entries[position / 32]; | 54 | position = position >> big_page_bits; |
| 41 | const size_t sub_index = position % 32; | 55 | const u64 entry_mask = big_entries[position / 32]; |
| 42 | return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); | 56 | const size_t sub_index = position % 32; |
| 57 | return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); | ||
| 58 | } else { | ||
| 59 | position = position >> page_bits; | ||
| 60 | const u64 entry_mask = entries[position / 32]; | ||
| 61 | const size_t sub_index = position % 32; | ||
| 62 | return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); | ||
| 63 | } | ||
| 43 | } | 64 | } |
| 44 | 65 | ||
| 66 | template <bool is_big_page> | ||
| 45 | void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { | 67 | void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { |
| 46 | position = position >> page_bits; | 68 | if constexpr (is_big_page) { |
| 47 | const u64 entry_mask = entries[position / 32]; | 69 | position = position >> big_page_bits; |
| 48 | const size_t sub_index = position % 32; | 70 | const u64 entry_mask = big_entries[position / 32]; |
| 49 | entries[position / 32] = | 71 | const size_t sub_index = position % 32; |
| 50 | (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); | 72 | big_entries[position / 32] = |
| 73 | (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); | ||
| 74 | } else { | ||
| 75 | position = position >> page_bits; | ||
| 76 | const u64 entry_mask = entries[position / 32]; | ||
| 77 | const size_t sub_index = position % 32; | ||
| 78 | entries[position / 32] = | ||
| 79 | (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); | ||
| 80 | } | ||
| 51 | } | 81 | } |
| 52 | 82 | ||
| 53 | template <MemoryManager::EntryType entry_type> | 83 | template <MemoryManager::EntryType entry_type> |
| @@ -59,48 +89,66 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 59 | } | 89 | } |
| 60 | for (u64 offset{}; offset < size; offset += page_size) { | 90 | for (u64 offset{}; offset < size; offset += page_size) { |
| 61 | const GPUVAddr current_gpu_addr = gpu_addr + offset; | 91 | const GPUVAddr current_gpu_addr = gpu_addr + offset; |
| 62 | [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); | 92 | [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); |
| 63 | SetEntry(current_gpu_addr, entry_type); | 93 | SetEntry<false>(current_gpu_addr, entry_type); |
| 64 | if (current_entry_type != entry_type) { | 94 | if (current_entry_type != entry_type) { |
| 65 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); | 95 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); |
| 66 | } | 96 | } |
| 67 | if constexpr (entry_type == EntryType::Mapped) { | 97 | if constexpr (entry_type == EntryType::Mapped) { |
| 68 | const VAddr current_cpu_addr = cpu_addr + offset; | 98 | const VAddr current_cpu_addr = cpu_addr + offset; |
| 69 | const auto index = PageEntryIndex(current_gpu_addr); | 99 | const auto index = PageEntryIndex<false>(current_gpu_addr); |
| 70 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL); | 100 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); |
| 71 | if (current_entry_type == entry_type && sub_value != page_table[index]) { | 101 | page_table[index] = sub_value; |
| 72 | rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL, | ||
| 73 | page_size); | ||
| 74 | } | ||
| 75 | page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL); | ||
| 76 | } | 102 | } |
| 77 | remaining_size -= page_size; | 103 | remaining_size -= page_size; |
| 78 | } | 104 | } |
| 79 | return gpu_addr; | 105 | return gpu_addr; |
| 80 | } | 106 | } |
| 81 | 107 | ||
| 108 | template <MemoryManager::EntryType entry_type> | ||
| 109 | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | ||
| 110 | size_t size) { | ||
| 111 | u64 remaining_size{size}; | ||
| 112 | for (u64 offset{}; offset < size; offset += big_page_size) { | ||
| 113 | const GPUVAddr current_gpu_addr = gpu_addr + offset; | ||
| 114 | [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); | ||
| 115 | SetEntry<true>(current_gpu_addr, entry_type); | ||
| 116 | if (current_entry_type != entry_type) { | ||
| 117 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); | ||
| 118 | } | ||
| 119 | if constexpr (entry_type == EntryType::Mapped) { | ||
| 120 | const VAddr current_cpu_addr = cpu_addr + offset; | ||
| 121 | const auto index = PageEntryIndex<true>(current_gpu_addr); | ||
| 122 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | ||
| 123 | big_page_table_cpu[index] = sub_value; | ||
| 124 | const PAddr phys_address = | ||
| 125 | device_memory.GetPhysicalAddr(memory.GetPointer(current_cpu_addr)); | ||
| 126 | big_page_table_physical[index] = static_cast<u32>(phys_address); | ||
| 127 | } | ||
| 128 | remaining_size -= big_page_size; | ||
| 129 | } | ||
| 130 | return gpu_addr; | ||
| 131 | } | ||
| 132 | |||
| 82 | void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | 133 | void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { |
| 83 | rasterizer = rasterizer_; | 134 | rasterizer = rasterizer_; |
| 84 | } | 135 | } |
| 85 | 136 | ||
| 86 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { | 137 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, |
| 138 | bool is_big_pages) { | ||
| 139 | if (is_big_pages) [[likely]] { | ||
| 140 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); | ||
| 141 | } | ||
| 87 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); | 142 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); |
| 88 | } | 143 | } |
| 89 | 144 | ||
| 90 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { | 145 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { |
| 146 | if (is_big_pages) [[likely]] { | ||
| 147 | return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | ||
| 148 | } | ||
| 91 | return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | 149 | return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); |
| 92 | } | 150 | } |
| 93 | 151 | ||
| 94 | GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { | ||
| 95 | return Map(*FindFreeRange(size, align), cpu_addr, size); | ||
| 96 | } | ||
| 97 | |||
| 98 | GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { | ||
| 99 | const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); | ||
| 100 | ASSERT(gpu_addr); | ||
| 101 | return Map(*gpu_addr, cpu_addr, size); | ||
| 102 | } | ||
| 103 | |||
| 104 | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | 152 | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { |
| 105 | if (size == 0) { | 153 | if (size == 0) { |
| 106 | return; | 154 | return; |
| @@ -115,61 +163,24 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 115 | rasterizer->UnmapMemory(*cpu_addr, map_size); | 163 | rasterizer->UnmapMemory(*cpu_addr, map_size); |
| 116 | } | 164 | } |
| 117 | 165 | ||
| 166 | BigPageTableOp<EntryType::Free>(gpu_addr, 0, size); | ||
| 118 | PageTableOp<EntryType::Free>(gpu_addr, 0, size); | 167 | PageTableOp<EntryType::Free>(gpu_addr, 0, size); |
| 119 | } | 168 | } |
| 120 | 169 | ||
| 121 | std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { | 170 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { |
| 122 | for (u64 offset{}; offset < size; offset += page_size) { | 171 | if (GetEntry<true>(gpu_addr) != EntryType::Mapped) [[unlikely]] { |
| 123 | if (GetEntry(gpu_addr + offset) != EntryType::Free) { | 172 | if (GetEntry<false>(gpu_addr) != EntryType::Mapped) { |
| 124 | return std::nullopt; | 173 | return std::nullopt; |
| 125 | } | 174 | } |
| 126 | } | ||
| 127 | 175 | ||
| 128 | return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | 176 | const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) |
| 129 | } | 177 | << cpu_page_bits; |
| 130 | 178 | return cpu_addr_base + (gpu_addr & page_mask); | |
| 131 | GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { | ||
| 132 | return *AllocateFixed(*FindFreeRange(size, align), size); | ||
| 133 | } | ||
| 134 | |||
| 135 | std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, | ||
| 136 | bool start_32bit_address) const { | ||
| 137 | if (!align) { | ||
| 138 | align = page_size; | ||
| 139 | } else { | ||
| 140 | align = Common::AlignUp(align, page_size); | ||
| 141 | } | ||
| 142 | |||
| 143 | u64 available_size{}; | ||
| 144 | GPUVAddr gpu_addr{start_32bit_address ? 0 : allocate_start}; | ||
| 145 | while (gpu_addr + available_size < address_space_size) { | ||
| 146 | if (GetEntry(gpu_addr + available_size) == EntryType::Free) { | ||
| 147 | available_size += page_size; | ||
| 148 | |||
| 149 | if (available_size >= size) { | ||
| 150 | return gpu_addr; | ||
| 151 | } | ||
| 152 | } else { | ||
| 153 | gpu_addr += available_size + page_size; | ||
| 154 | available_size = 0; | ||
| 155 | |||
| 156 | const auto remainder{gpu_addr % align}; | ||
| 157 | if (remainder) { | ||
| 158 | gpu_addr = (gpu_addr - remainder) + align; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | return std::nullopt; | ||
| 164 | } | ||
| 165 | |||
| 166 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||
| 167 | if (GetEntry(gpu_addr) != EntryType::Mapped) { | ||
| 168 | return std::nullopt; | ||
| 169 | } | 179 | } |
| 170 | 180 | ||
| 171 | const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex(gpu_addr)]) << 12ULL; | 181 | const VAddr cpu_addr_base = |
| 172 | return cpu_addr_base + (gpu_addr & page_mask); | 182 | static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; |
| 183 | return cpu_addr_base + (gpu_addr & big_page_mask); | ||
| 173 | } | 184 | } |
| 174 | 185 | ||
| 175 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | 186 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { |
| @@ -225,7 +236,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | |||
| 225 | return {}; | 236 | return {}; |
| 226 | } | 237 | } |
| 227 | 238 | ||
| 228 | return system.Memory().GetPointer(*address); | 239 | return memory.GetPointer(*address); |
| 229 | } | 240 | } |
| 230 | 241 | ||
| 231 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | 242 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { |
| @@ -234,98 +245,161 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | |||
| 234 | return {}; | 245 | return {}; |
| 235 | } | 246 | } |
| 236 | 247 | ||
| 237 | return system.Memory().GetPointer(*address); | 248 | return memory.GetPointer(*address); |
| 238 | } | 249 | } |
| 239 | 250 | ||
| 240 | void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | 251 | #pragma inline_recursion(on) |
| 241 | bool is_safe) const { | 252 | |
| 253 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||
| 254 | inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, | ||
| 255 | FuncMapped&& func_mapped, FuncReserved&& func_reserved, | ||
| 256 | FuncUnmapped&& func_unmapped) const { | ||
| 257 | u64 used_page_size; | ||
| 258 | u64 used_page_mask; | ||
| 259 | u64 used_page_bits; | ||
| 260 | if constexpr (is_big_pages) { | ||
| 261 | used_page_size = big_page_size; | ||
| 262 | used_page_mask = big_page_mask; | ||
| 263 | used_page_bits = big_page_bits; | ||
| 264 | } else { | ||
| 265 | used_page_size = page_size; | ||
| 266 | used_page_mask = page_mask; | ||
| 267 | used_page_bits = page_bits; | ||
| 268 | } | ||
| 242 | std::size_t remaining_size{size}; | 269 | std::size_t remaining_size{size}; |
| 243 | std::size_t page_index{gpu_src_addr >> page_bits}; | 270 | std::size_t page_index{gpu_src_addr >> used_page_bits}; |
| 244 | std::size_t page_offset{gpu_src_addr & page_mask}; | 271 | std::size_t page_offset{gpu_src_addr & used_page_mask}; |
| 272 | GPUVAddr current_address = gpu_src_addr; | ||
| 245 | 273 | ||
| 246 | while (remaining_size > 0) { | 274 | while (remaining_size > 0) { |
| 247 | const std::size_t copy_amount{ | 275 | const std::size_t copy_amount{ |
| 248 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 276 | std::min(static_cast<std::size_t>(used_page_size) - page_offset, remaining_size)}; |
| 249 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | 277 | auto entry = GetEntry<is_big_pages>(current_address); |
| 250 | if (page_addr) { | 278 | if (entry == EntryType::Mapped) [[likely]] { |
| 251 | const auto src_addr{*page_addr + page_offset}; | 279 | func_mapped(page_index, page_offset, copy_amount); |
| 252 | if (is_safe) { | 280 | } else if (entry == EntryType::Reserved) { |
| 253 | // Flush must happen on the rasterizer interface, such that memory is always | 281 | func_reserved(page_index, page_offset, copy_amount); |
| 254 | // synchronous when it is read (even when in asynchronous GPU mode). | 282 | } else [[unlikely]] { |
| 255 | // Fixes Dead Cells title menu. | 283 | func_unmapped(page_index, page_offset, copy_amount); |
| 256 | rasterizer->FlushRegion(src_addr, copy_amount); | ||
| 257 | } | ||
| 258 | system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | ||
| 259 | } else { | ||
| 260 | std::memset(dest_buffer, 0, copy_amount); | ||
| 261 | } | 284 | } |
| 262 | |||
| 263 | page_index++; | 285 | page_index++; |
| 264 | page_offset = 0; | 286 | page_offset = 0; |
| 265 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 266 | remaining_size -= copy_amount; | 287 | remaining_size -= copy_amount; |
| 288 | current_address += copy_amount; | ||
| 267 | } | 289 | } |
| 268 | } | 290 | } |
| 269 | 291 | ||
| 292 | template <bool is_safe> | ||
| 293 | void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, | ||
| 294 | std::size_t size) const { | ||
| 295 | auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, | ||
| 296 | [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { | ||
| 297 | std::memset(dest_buffer, 0, copy_amount); | ||
| 298 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 299 | }; | ||
| 300 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 301 | const VAddr cpu_addr_base = | ||
| 302 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||
| 303 | if constexpr (is_safe) { | ||
| 304 | rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||
| 305 | } | ||
| 306 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | ||
| 307 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 308 | }; | ||
| 309 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 310 | const VAddr cpu_addr_base = | ||
| 311 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||
| 312 | if constexpr (is_safe) { | ||
| 313 | rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||
| 314 | } | ||
| 315 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | ||
| 316 | // u8* physical = device_memory.GetPointer(big_page_table_physical[page_index] + offset); | ||
| 317 | // std::memcpy(dest_buffer, physical, copy_amount); | ||
| 318 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 319 | }; | ||
| 320 | auto read_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 321 | std::size_t copy_amount) { | ||
| 322 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 323 | MemoryOperation<false>(base, copy_amount, mapped_normal, set_to_zero, set_to_zero); | ||
| 324 | }; | ||
| 325 | MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); | ||
| 326 | } | ||
| 327 | |||
| 270 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { | 328 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { |
| 271 | ReadBlockImpl(gpu_src_addr, dest_buffer, size, true); | 329 | ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size); |
| 272 | } | 330 | } |
| 273 | 331 | ||
| 274 | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | 332 | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, |
| 275 | const std::size_t size) const { | 333 | const std::size_t size) const { |
| 276 | ReadBlockImpl(gpu_src_addr, dest_buffer, size, false); | 334 | ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size); |
| 277 | } | 335 | } |
| 278 | 336 | ||
| 279 | void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, | 337 | template <bool is_safe> |
| 280 | bool is_safe) { | 338 | void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, |
| 281 | std::size_t remaining_size{size}; | 339 | std::size_t size) { |
| 282 | std::size_t page_index{gpu_dest_addr >> page_bits}; | 340 | auto just_advance = [&]([[maybe_unused]] std::size_t page_index, |
| 283 | std::size_t page_offset{gpu_dest_addr & page_mask}; | 341 | [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { |
| 284 | 342 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | |
| 285 | while (remaining_size > 0) { | 343 | }; |
| 286 | const std::size_t copy_amount{ | 344 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 287 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 345 | const VAddr cpu_addr_base = |
| 288 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | 346 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 289 | if (page_addr) { | 347 | if constexpr (is_safe) { |
| 290 | const auto dest_addr{*page_addr + page_offset}; | 348 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); |
| 291 | |||
| 292 | if (is_safe) { | ||
| 293 | // Invalidate must happen on the rasterizer interface, such that memory is always | ||
| 294 | // synchronous when it is written (even when in asynchronous GPU mode). | ||
| 295 | rasterizer->InvalidateRegion(dest_addr, copy_amount); | ||
| 296 | } | ||
| 297 | system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | ||
| 298 | } | 349 | } |
| 299 | 350 | memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | |
| 300 | page_index++; | ||
| 301 | page_offset = 0; | ||
| 302 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 351 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| 303 | remaining_size -= copy_amount; | 352 | }; |
| 304 | } | 353 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 354 | const VAddr cpu_addr_base = | ||
| 355 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||
| 356 | if constexpr (is_safe) { | ||
| 357 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); | ||
| 358 | } | ||
| 359 | memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | ||
| 360 | /*u8* physical = | ||
| 361 | device_memory.GetPointer(big_page_table_physical[page_index] << cpu_page_bits) + offset; | ||
| 362 | std::memcpy(physical, src_buffer, copy_amount);*/ | ||
| 363 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||
| 364 | }; | ||
| 365 | auto write_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 366 | std::size_t copy_amount) { | ||
| 367 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 368 | MemoryOperation<false>(base, copy_amount, mapped_normal, just_advance, just_advance); | ||
| 369 | }; | ||
| 370 | MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); | ||
| 305 | } | 371 | } |
| 306 | 372 | ||
| 307 | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { | 373 | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { |
| 308 | WriteBlockImpl(gpu_dest_addr, src_buffer, size, true); | 374 | WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size); |
| 309 | } | 375 | } |
| 310 | 376 | ||
| 311 | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, | 377 | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, |
| 312 | std::size_t size) { | 378 | std::size_t size) { |
| 313 | WriteBlockImpl(gpu_dest_addr, src_buffer, size, false); | 379 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size); |
| 314 | } | 380 | } |
| 315 | 381 | ||
| 316 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { | 382 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { |
| 317 | size_t remaining_size{size}; | 383 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |
| 318 | size_t page_index{gpu_addr >> page_bits}; | 384 | [[maybe_unused]] std::size_t offset, |
| 319 | size_t page_offset{gpu_addr & page_mask}; | 385 | [[maybe_unused]] std::size_t copy_amount) {}; |
| 320 | while (remaining_size > 0) { | 386 | |
| 321 | const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; | 387 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 322 | if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { | 388 | const VAddr cpu_addr_base = |
| 323 | rasterizer->FlushRegion(*page_addr + page_offset, num_bytes); | 389 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 324 | } | 390 | rasterizer->FlushRegion(cpu_addr_base, copy_amount); |
| 325 | ++page_index; | 391 | }; |
| 326 | page_offset = 0; | 392 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 327 | remaining_size -= num_bytes; | 393 | const VAddr cpu_addr_base = |
| 328 | } | 394 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |
| 395 | rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||
| 396 | }; | ||
| 397 | auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 398 | std::size_t copy_amount) { | ||
| 399 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 400 | MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing); | ||
| 401 | }; | ||
| 402 | MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); | ||
| 329 | } | 403 | } |
| 330 | 404 | ||
| 331 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { | 405 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { |
| @@ -348,7 +422,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| 348 | } | 422 | } |
| 349 | 423 | ||
| 350 | bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { | 424 | bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { |
| 351 | size_t page_index{gpu_addr >> page_bits}; | 425 | size_t page_index{gpu_addr >> big_page_bits}; |
| 352 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | 426 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; |
| 353 | std::optional<VAddr> old_page_addr{}; | 427 | std::optional<VAddr> old_page_addr{}; |
| 354 | while (page_index != page_last) { | 428 | while (page_index != page_last) { |
| @@ -371,7 +445,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 371 | size_t page_index{gpu_addr >> page_bits}; | 445 | size_t page_index{gpu_addr >> page_bits}; |
| 372 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | 446 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; |
| 373 | while (page_index < page_last) { | 447 | while (page_index < page_last) { |
| 374 | if (GetEntry(page_index << page_bits) == EntryType::Free) { | 448 | if (GetEntry<false>(page_index << page_bits) == EntryType::Free) { |
| 375 | return false; | 449 | return false; |
| 376 | } | 450 | } |
| 377 | ++page_index; | 451 | ++page_index; |
| @@ -379,47 +453,63 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 379 | return true; | 453 | return true; |
| 380 | } | 454 | } |
| 381 | 455 | ||
| 456 | #pragma inline_recursion(on) | ||
| 457 | |||
| 382 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | 458 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
| 383 | GPUVAddr gpu_addr, std::size_t size) const { | 459 | GPUVAddr gpu_addr, std::size_t size) const { |
| 384 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | 460 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |
| 385 | size_t page_index{gpu_addr >> page_bits}; | ||
| 386 | size_t remaining_size{size}; | ||
| 387 | size_t page_offset{gpu_addr & page_mask}; | ||
| 388 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | 461 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; |
| 389 | std::optional<VAddr> old_page_addr{}; | 462 | std::optional<VAddr> old_page_addr{}; |
| 390 | const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) { | 463 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
| 391 | if (!last_segment) { | 464 | [[maybe_unused]] std::size_t offset, |
| 392 | const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset; | 465 | [[maybe_unused]] std::size_t copy_amount) { |
| 393 | last_segment = {new_base_addr, bytes}; | ||
| 394 | } else { | ||
| 395 | last_segment->second += bytes; | ||
| 396 | } | ||
| 397 | }; | ||
| 398 | const auto split = [&last_segment, &result] { | ||
| 399 | if (last_segment) { | 466 | if (last_segment) { |
| 400 | result.push_back(*last_segment); | 467 | result.push_back(*last_segment); |
| 401 | last_segment = std::nullopt; | 468 | last_segment = std::nullopt; |
| 402 | } | 469 | } |
| 403 | }; | 470 | }; |
| 404 | while (remaining_size > 0) { | 471 | const auto extend_size_big = [this, &split, &old_page_addr, |
| 405 | const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; | 472 | &last_segment](std::size_t page_index, std::size_t offset, |
| 406 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | 473 | std::size_t copy_amount) { |
| 407 | if (!page_addr || *page_addr == 0) { | 474 | const VAddr cpu_addr_base = |
| 408 | split(); | 475 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |
| 409 | } else if (old_page_addr) { | 476 | if (old_page_addr) { |
| 410 | if (*old_page_addr + page_size != *page_addr) { | 477 | if (*old_page_addr != cpu_addr_base) { |
| 411 | split(); | 478 | split(0, 0, 0); |
| 412 | } | 479 | } |
| 413 | extend_size(num_bytes); | 480 | } |
| 481 | old_page_addr = {cpu_addr_base + copy_amount}; | ||
| 482 | if (!last_segment) { | ||
| 483 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | ||
| 484 | last_segment = {new_base_addr, copy_amount}; | ||
| 414 | } else { | 485 | } else { |
| 415 | extend_size(num_bytes); | 486 | last_segment->second += copy_amount; |
| 416 | } | 487 | } |
| 417 | ++page_index; | 488 | }; |
| 418 | page_offset = 0; | 489 | const auto extend_size_short = [this, &split, &old_page_addr, |
| 419 | remaining_size -= num_bytes; | 490 | &last_segment](std::size_t page_index, std::size_t offset, |
| 420 | old_page_addr = page_addr; | 491 | std::size_t copy_amount) { |
| 421 | } | 492 | const VAddr cpu_addr_base = |
| 422 | split(); | 493 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 494 | if (old_page_addr) { | ||
| 495 | if (*old_page_addr != cpu_addr_base) { | ||
| 496 | split(0, 0, 0); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | old_page_addr = {cpu_addr_base + copy_amount}; | ||
| 500 | if (!last_segment) { | ||
| 501 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | ||
| 502 | last_segment = {new_base_addr, copy_amount}; | ||
| 503 | } else { | ||
| 504 | last_segment->second += copy_amount; | ||
| 505 | } | ||
| 506 | }; | ||
| 507 | auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 508 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 509 | MemoryOperation<false>(base, copy_amount, extend_size_short, split, split); | ||
| 510 | }; | ||
| 511 | MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); | ||
| 512 | split(0, 0, 0); | ||
| 423 | return result; | 513 | return result; |
| 424 | } | 514 | } |
| 425 | 515 | ||
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 56604ef3e..9c388a06e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -10,21 +10,26 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/multi_level_page_table.h" | 12 | #include "common/multi_level_page_table.h" |
| 13 | #include "common/virtual_buffer.h" | ||
| 13 | 14 | ||
| 14 | namespace VideoCore { | 15 | namespace VideoCore { |
| 15 | class RasterizerInterface; | 16 | class RasterizerInterface; |
| 16 | } | 17 | } |
| 17 | 18 | ||
| 18 | namespace Core { | 19 | namespace Core { |
| 20 | class DeviceMemory; | ||
| 21 | namespace Memory { | ||
| 22 | class Memory; | ||
| 23 | } // namespace Memory | ||
| 19 | class System; | 24 | class System; |
| 20 | } | 25 | } // namespace Core |
| 21 | 26 | ||
| 22 | namespace Tegra { | 27 | namespace Tegra { |
| 23 | 28 | ||
| 24 | class MemoryManager final { | 29 | class MemoryManager final { |
| 25 | public: | 30 | public: |
| 26 | explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, | 31 | explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, |
| 27 | u64 page_bits_ = 16); | 32 | u64 big_page_bits_ = 16, u64 page_bits_ = 12); |
| 28 | ~MemoryManager(); | 33 | ~MemoryManager(); |
| 29 | 34 | ||
| 30 | size_t GetID() const { | 35 | size_t GetID() const { |
| @@ -93,12 +98,8 @@ public: | |||
| 93 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | 98 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, |
| 94 | std::size_t size) const; | 99 | std::size_t size) const; |
| 95 | 100 | ||
| 96 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); | 101 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true); |
| 97 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); | 102 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); |
| 98 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); | ||
| 99 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); | ||
| 100 | [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); | ||
| 101 | [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); | ||
| 102 | void Unmap(GPUVAddr gpu_addr, std::size_t size); | 103 | void Unmap(GPUVAddr gpu_addr, std::size_t size); |
| 103 | 104 | ||
| 104 | void FlushRegion(GPUVAddr gpu_addr, size_t size) const; | 105 | void FlushRegion(GPUVAddr gpu_addr, size_t size) const; |
| @@ -107,26 +108,42 @@ private: | |||
| 107 | [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, | 108 | [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, |
| 108 | bool start_32bit_address = false) const; | 109 | bool start_32bit_address = false) const; |
| 109 | 110 | ||
| 110 | void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | 111 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
| 111 | bool is_safe) const; | 112 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |
| 112 | void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, | 113 | FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; |
| 113 | bool is_safe); | 114 | |
| 115 | template <bool is_safe> | ||
| 116 | void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; | ||
| 117 | |||
| 118 | template <bool is_safe> | ||
| 119 | void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||
| 114 | 120 | ||
| 121 | template <bool is_big_page> | ||
| 115 | [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { | 122 | [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { |
| 116 | return (gpu_addr >> page_bits) & page_table_mask; | 123 | if constexpr (is_big_page) { |
| 124 | return (gpu_addr >> big_page_bits) & big_page_table_mask; | ||
| 125 | } else { | ||
| 126 | return (gpu_addr >> page_bits) & page_table_mask; | ||
| 127 | } | ||
| 117 | } | 128 | } |
| 118 | 129 | ||
| 119 | Core::System& system; | 130 | Core::System& system; |
| 131 | Core::Memory::Memory& memory; | ||
| 132 | Core::DeviceMemory& device_memory; | ||
| 120 | 133 | ||
| 121 | const u64 address_space_bits; | 134 | const u64 address_space_bits; |
| 122 | const u64 page_bits; | 135 | const u64 page_bits; |
| 123 | u64 address_space_size; | 136 | u64 address_space_size; |
| 124 | u64 allocate_start; | ||
| 125 | u64 page_size; | 137 | u64 page_size; |
| 126 | u64 page_mask; | 138 | u64 page_mask; |
| 127 | u64 page_table_mask; | 139 | u64 page_table_mask; |
| 128 | static constexpr u64 cpu_page_bits{12}; | 140 | static constexpr u64 cpu_page_bits{12}; |
| 129 | 141 | ||
| 142 | const u64 big_page_bits; | ||
| 143 | u64 big_page_size; | ||
| 144 | u64 big_page_mask; | ||
| 145 | u64 big_page_table_mask; | ||
| 146 | |||
| 130 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 147 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 131 | 148 | ||
| 132 | enum class EntryType : u64 { | 149 | enum class EntryType : u64 { |
| @@ -136,15 +153,23 @@ private: | |||
| 136 | }; | 153 | }; |
| 137 | 154 | ||
| 138 | std::vector<u64> entries; | 155 | std::vector<u64> entries; |
| 156 | std::vector<u64> big_entries; | ||
| 139 | 157 | ||
| 140 | template <EntryType entry_type> | 158 | template <EntryType entry_type> |
| 141 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); | 159 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); |
| 142 | 160 | ||
| 143 | EntryType GetEntry(size_t position) const; | 161 | template <EntryType entry_type> |
| 162 | GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); | ||
| 163 | |||
| 164 | template <bool is_big_page> | ||
| 165 | inline EntryType GetEntry(size_t position) const; | ||
| 144 | 166 | ||
| 145 | void SetEntry(size_t position, EntryType entry); | 167 | template <bool is_big_page> |
| 168 | inline void SetEntry(size_t position, EntryType entry); | ||
| 146 | 169 | ||
| 147 | Common::MultiLevelPageTable<u32> page_table; | 170 | Common::MultiLevelPageTable<u32> page_table; |
| 171 | Common::VirtualBuffer<u32> big_page_table_cpu; | ||
| 172 | Common::VirtualBuffer<u32> big_page_table_physical; | ||
| 148 | 173 | ||
| 149 | const size_t unique_identifier; | 174 | const size_t unique_identifier; |
| 150 | 175 | ||