diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 202 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 79 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvmap.cpp | 33 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvmap.h | 6 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 532 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 172 |
6 files changed, 431 insertions, 593 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 195421cc0..d4ba88147 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -16,11 +16,12 @@ | |||
| 16 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| 17 | 17 | ||
| 18 | namespace Service::Nvidia::Devices { | 18 | namespace Service::Nvidia::Devices { |
| 19 | |||
| 19 | namespace NvErrCodes { | 20 | namespace NvErrCodes { |
| 20 | enum { | 21 | constexpr u32 Success{}; |
| 21 | InvalidNmapHandle = -22, | 22 | constexpr u32 OutOfMemory{static_cast<u32>(-12)}; |
| 22 | }; | 23 | constexpr u32 InvalidInput{static_cast<u32>(-22)}; |
| 23 | } | 24 | } // namespace NvErrCodes |
| 24 | 25 | ||
| 25 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | 26 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 26 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | 27 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} |
| @@ -49,8 +50,9 @@ u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std: | |||
| 49 | break; | 50 | break; |
| 50 | } | 51 | } |
| 51 | 52 | ||
| 52 | if (static_cast<IoctlCommand>(command.cmd.Value()) == IoctlCommand::IocRemapCommand) | 53 | if (static_cast<IoctlCommand>(command.cmd.Value()) == IoctlCommand::IocRemapCommand) { |
| 53 | return Remap(input, output); | 54 | return Remap(input, output); |
| 55 | } | ||
| 54 | 56 | ||
| 55 | UNIMPLEMENTED_MSG("Unimplemented ioctl command"); | 57 | UNIMPLEMENTED_MSG("Unimplemented ioctl command"); |
| 56 | return 0; | 58 | return 0; |
| @@ -59,6 +61,7 @@ u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std: | |||
| 59 | u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) { | 61 | u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output) { |
| 60 | IoctlInitalizeEx params{}; | 62 | IoctlInitalizeEx params{}; |
| 61 | std::memcpy(¶ms, input.data(), input.size()); | 63 | std::memcpy(¶ms, input.data(), input.size()); |
| 64 | |||
| 62 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); | 65 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); |
| 63 | 66 | ||
| 64 | return 0; | 67 | return 0; |
| @@ -67,53 +70,61 @@ u32 nvhost_as_gpu::InitalizeEx(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 67 | u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) { | 70 | u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output) { |
| 68 | IoctlAllocSpace params{}; | 71 | IoctlAllocSpace params{}; |
| 69 | std::memcpy(¶ms, input.data(), input.size()); | 72 | std::memcpy(¶ms, input.data(), input.size()); |
| 73 | |||
| 70 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, | 74 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, |
| 71 | params.page_size, params.flags); | 75 | params.page_size, params.flags); |
| 72 | 76 | ||
| 73 | auto& gpu = system.GPU(); | 77 | const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; |
| 74 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 78 | if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { |
| 75 | if (params.flags & 1) { | 79 | params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size); |
| 76 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); | ||
| 77 | } else { | 80 | } else { |
| 78 | params.offset = gpu.MemoryManager().AllocateSpace(size, params.align); | 81 | params.offset = system.GPU().MemoryManager().Allocate(size, params.align); |
| 82 | } | ||
| 83 | |||
| 84 | auto result{NvErrCodes::Success}; | ||
| 85 | if (!params.offset) { | ||
| 86 | LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size); | ||
| 87 | result = NvErrCodes::OutOfMemory; | ||
| 79 | } | 88 | } |
| 80 | 89 | ||
| 81 | std::memcpy(output.data(), ¶ms, output.size()); | 90 | std::memcpy(output.data(), ¶ms, output.size()); |
| 82 | return 0; | 91 | return result; |
| 83 | } | 92 | } |
| 84 | 93 | ||
| 85 | u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) { | 94 | u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) { |
| 86 | std::size_t num_entries = input.size() / sizeof(IoctlRemapEntry); | 95 | const auto num_entries = input.size() / sizeof(IoctlRemapEntry); |
| 87 | 96 | ||
| 88 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, num_entries=0x{:X}", num_entries); | 97 | LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); |
| 89 | 98 | ||
| 99 | auto result{NvErrCodes::Success}; | ||
| 90 | std::vector<IoctlRemapEntry> entries(num_entries); | 100 | std::vector<IoctlRemapEntry> entries(num_entries); |
| 91 | std::memcpy(entries.data(), input.data(), input.size()); | 101 | std::memcpy(entries.data(), input.data(), input.size()); |
| 92 | 102 | ||
| 93 | auto& gpu = system.GPU(); | ||
| 94 | for (const auto& entry : entries) { | 103 | for (const auto& entry : entries) { |
| 95 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", | 104 | LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", |
| 96 | entry.offset, entry.nvmap_handle, entry.pages); | 105 | entry.offset, entry.nvmap_handle, entry.pages); |
| 97 | GPUVAddr offset = static_cast<GPUVAddr>(entry.offset) << 0x10; | 106 | |
| 98 | auto object = nvmap_dev->GetObject(entry.nvmap_handle); | 107 | const auto object{nvmap_dev->GetObject(entry.nvmap_handle)}; |
| 99 | if (!object) { | 108 | if (!object) { |
| 100 | LOG_CRITICAL(Service_NVDRV, "nvmap {} is an invalid handle!", entry.nvmap_handle); | 109 | LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); |
| 101 | std::memcpy(output.data(), entries.data(), output.size()); | 110 | result = NvErrCodes::InvalidInput; |
| 102 | return static_cast<u32>(NvErrCodes::InvalidNmapHandle); | 111 | break; |
| 103 | } | 112 | } |
| 104 | 113 | ||
| 105 | ASSERT(object->status == nvmap::Object::Status::Allocated); | 114 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; |
| 115 | const auto size{static_cast<u64>(entry.pages) << 0x10}; | ||
| 116 | const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; | ||
| 117 | const auto addr{system.GPU().MemoryManager().Map(object->addr + map_offset, offset, size)}; | ||
| 106 | 118 | ||
| 107 | const u64 size = static_cast<u64>(entry.pages) << 0x10; | 119 | if (!addr) { |
| 108 | ASSERT(size <= object->size); | 120 | LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); |
| 109 | const u64 map_offset = static_cast<u64>(entry.map_offset) << 0x10; | 121 | result = NvErrCodes::InvalidInput; |
| 110 | 122 | break; | |
| 111 | const GPUVAddr returned = | 123 | } |
| 112 | gpu.MemoryManager().MapBufferEx(object->addr + map_offset, offset, size); | ||
| 113 | ASSERT(returned == offset); | ||
| 114 | } | 124 | } |
| 125 | |||
| 115 | std::memcpy(output.data(), entries.data(), output.size()); | 126 | std::memcpy(output.data(), entries.data(), output.size()); |
| 116 | return 0; | 127 | return result; |
| 117 | } | 128 | } |
| 118 | 129 | ||
| 119 | u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { | 130 | u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { |
| @@ -126,44 +137,76 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 126 | params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, | 137 | params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, |
| 127 | params.offset); | 138 | params.offset); |
| 128 | 139 | ||
| 129 | if (!params.nvmap_handle) { | 140 | const auto object{nvmap_dev->GetObject(params.nvmap_handle)}; |
| 130 | return 0; | 141 | if (!object) { |
| 142 | LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); | ||
| 143 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 144 | return NvErrCodes::InvalidInput; | ||
| 131 | } | 145 | } |
| 132 | 146 | ||
| 133 | auto object = nvmap_dev->GetObject(params.nvmap_handle); | ||
| 134 | ASSERT(object); | ||
| 135 | |||
| 136 | // We can only map objects that have already been assigned a CPU address. | ||
| 137 | ASSERT(object->status == nvmap::Object::Status::Allocated); | ||
| 138 | |||
| 139 | ASSERT(params.buffer_offset == 0); | ||
| 140 | |||
| 141 | // The real nvservices doesn't make a distinction between handles and ids, and | 147 | // The real nvservices doesn't make a distinction between handles and ids, and |
| 142 | // object can only have one handle and it will be the same as its id. Assert that this is the | 148 | // object can only have one handle and it will be the same as its id. Assert that this is the |
| 143 | // case to prevent unexpected behavior. | 149 | // case to prevent unexpected behavior. |
| 144 | ASSERT(object->id == params.nvmap_handle); | 150 | ASSERT(object->id == params.nvmap_handle); |
| 145 | |||
| 146 | auto& gpu = system.GPU(); | 151 | auto& gpu = system.GPU(); |
| 147 | 152 | ||
| 148 | if (params.flags & 1) { | 153 | u64 page_size{params.page_size}; |
| 149 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); | 154 | if (!page_size) { |
| 150 | } else { | 155 | page_size = object->align; |
| 151 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, object->size); | 156 | } |
| 157 | |||
| 158 | if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { | ||
| 159 | if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { | ||
| 160 | const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; | ||
| 161 | const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; | ||
| 162 | |||
| 163 | if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) { | ||
| 164 | LOG_CRITICAL(Service_NVDRV, | ||
| 165 | "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " | ||
| 166 | "mapping_size = {}, offset={}", | ||
| 167 | params.flags, params.nvmap_handle, params.buffer_offset, | ||
| 168 | params.mapping_size, params.offset); | ||
| 169 | |||
| 170 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 171 | return NvErrCodes::InvalidInput; | ||
| 172 | } | ||
| 173 | |||
| 174 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 175 | return NvErrCodes::Success; | ||
| 176 | } else { | ||
| 177 | LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset); | ||
| 178 | |||
| 179 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 180 | return NvErrCodes::InvalidInput; | ||
| 181 | } | ||
| 152 | } | 182 | } |
| 153 | 183 | ||
| 154 | // Create a new mapping entry for this operation. | 184 | // We can only map objects that have already been assigned a CPU address. |
| 155 | ASSERT_MSG(buffer_mappings.find(params.offset) == buffer_mappings.end(), | 185 | ASSERT(object->status == nvmap::Object::Status::Allocated); |
| 156 | "Offset is already mapped"); | 186 | |
| 187 | const auto physical_address{object->addr + params.buffer_offset}; | ||
| 188 | u64 size{params.mapping_size}; | ||
| 189 | if (!size) { | ||
| 190 | size = object->size; | ||
| 191 | } | ||
| 157 | 192 | ||
| 158 | BufferMapping mapping{}; | 193 | const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; |
| 159 | mapping.nvmap_handle = params.nvmap_handle; | 194 | if (is_alloc) { |
| 160 | mapping.offset = params.offset; | 195 | params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size); |
| 161 | mapping.size = object->size; | 196 | } else { |
| 197 | params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size); | ||
| 198 | } | ||
| 162 | 199 | ||
| 163 | buffer_mappings[params.offset] = mapping; | 200 | auto result{NvErrCodes::Success}; |
| 201 | if (!params.offset) { | ||
| 202 | LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); | ||
| 203 | result = NvErrCodes::InvalidInput; | ||
| 204 | } else { | ||
| 205 | AddBufferMap(params.offset, size, physical_address, is_alloc); | ||
| 206 | } | ||
| 164 | 207 | ||
| 165 | std::memcpy(output.data(), ¶ms, output.size()); | 208 | std::memcpy(output.data(), ¶ms, output.size()); |
| 166 | return 0; | 209 | return result; |
| 167 | } | 210 | } |
| 168 | 211 | ||
| 169 | u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { | 212 | u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { |
| @@ -172,24 +215,20 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 172 | 215 | ||
| 173 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | 216 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); |
| 174 | 217 | ||
| 175 | const auto itr = buffer_mappings.find(params.offset); | 218 | if (const auto size{RemoveBufferMap(params.offset)}; size) { |
| 176 | if (itr == buffer_mappings.end()) { | 219 | system.GPU().MemoryManager().Unmap(params.offset, *size); |
| 177 | LOG_WARNING(Service_NVDRV, "Tried to unmap an invalid offset 0x{:X}", params.offset); | 220 | } else { |
| 178 | // Hardware tests shows that unmapping an already unmapped buffer always returns successful | 221 | LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); |
| 179 | // and doesn't fail. | ||
| 180 | return 0; | ||
| 181 | } | 222 | } |
| 182 | 223 | ||
| 183 | params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size); | ||
| 184 | buffer_mappings.erase(itr->second.offset); | ||
| 185 | |||
| 186 | std::memcpy(output.data(), ¶ms, output.size()); | 224 | std::memcpy(output.data(), ¶ms, output.size()); |
| 187 | return 0; | 225 | return NvErrCodes::Success; |
| 188 | } | 226 | } |
| 189 | 227 | ||
| 190 | u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) { | 228 | u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) { |
| 191 | IoctlBindChannel params{}; | 229 | IoctlBindChannel params{}; |
| 192 | std::memcpy(¶ms, input.data(), input.size()); | 230 | std::memcpy(¶ms, input.data(), input.size()); |
| 231 | |||
| 193 | LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); | 232 | LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); |
| 194 | 233 | ||
| 195 | channel = params.fd; | 234 | channel = params.fd; |
| @@ -199,6 +238,7 @@ u32 nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 199 | u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { | 238 | u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { |
| 200 | IoctlGetVaRegions params{}; | 239 | IoctlGetVaRegions params{}; |
| 201 | std::memcpy(¶ms, input.data(), input.size()); | 240 | std::memcpy(¶ms, input.data(), input.size()); |
| 241 | |||
| 202 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, | 242 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, |
| 203 | params.buf_size); | 243 | params.buf_size); |
| 204 | 244 | ||
| @@ -210,9 +250,43 @@ u32 nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& o | |||
| 210 | params.regions[1].offset = 0x04000000; | 250 | params.regions[1].offset = 0x04000000; |
| 211 | params.regions[1].page_size = 0x10000; | 251 | params.regions[1].page_size = 0x10000; |
| 212 | params.regions[1].pages = 0x1bffff; | 252 | params.regions[1].pages = 0x1bffff; |
| 253 | |||
| 213 | // TODO(ogniK): This probably can stay stubbed but should add support way way later | 254 | // TODO(ogniK): This probably can stay stubbed but should add support way way later |
| 255 | |||
| 214 | std::memcpy(output.data(), ¶ms, output.size()); | 256 | std::memcpy(output.data(), ¶ms, output.size()); |
| 215 | return 0; | 257 | return 0; |
| 216 | } | 258 | } |
| 217 | 259 | ||
| 260 | std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const { | ||
| 261 | const auto end{buffer_mappings.upper_bound(gpu_addr)}; | ||
| 262 | for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) { | ||
| 263 | if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) { | ||
| 264 | return iter->second; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | return {}; | ||
| 269 | } | ||
| 270 | |||
| 271 | void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, | ||
| 272 | bool is_allocated) { | ||
| 273 | buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated}; | ||
| 274 | } | ||
| 275 | |||
| 276 | std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) { | ||
| 277 | if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) { | ||
| 278 | std::size_t size{}; | ||
| 279 | |||
| 280 | if (iter->second.IsAllocated()) { | ||
| 281 | size = iter->second.Size(); | ||
| 282 | } | ||
| 283 | |||
| 284 | buffer_mappings.erase(iter); | ||
| 285 | |||
| 286 | return size; | ||
| 287 | } | ||
| 288 | |||
| 289 | return {}; | ||
| 290 | } | ||
| 291 | |||
| 218 | } // namespace Service::Nvidia::Devices | 292 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index f79fcc065..9a0cdff0c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -4,9 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <map> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <unordered_map> | 9 | #include <optional> |
| 9 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 12 | #include "common/common_funcs.h" | ||
| 10 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 14 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 15 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| @@ -15,6 +18,13 @@ namespace Service::Nvidia::Devices { | |||
| 15 | 18 | ||
| 16 | class nvmap; | 19 | class nvmap; |
| 17 | 20 | ||
| 21 | enum class AddressSpaceFlags : u32 { | ||
| 22 | None = 0x0, | ||
| 23 | FixedOffset = 0x1, | ||
| 24 | Remap = 0x100, | ||
| 25 | }; | ||
| 26 | DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); | ||
| 27 | |||
| 18 | class nvhost_as_gpu final : public nvdevice { | 28 | class nvhost_as_gpu final : public nvdevice { |
| 19 | public: | 29 | public: |
| 20 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 30 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| @@ -25,6 +35,45 @@ public: | |||
| 25 | IoctlVersion version) override; | 35 | IoctlVersion version) override; |
| 26 | 36 | ||
| 27 | private: | 37 | private: |
| 38 | class BufferMap final { | ||
| 39 | public: | ||
| 40 | constexpr BufferMap() = default; | ||
| 41 | |||
| 42 | constexpr BufferMap(GPUVAddr start_addr, std::size_t size) | ||
| 43 | : start_addr{start_addr}, end_addr{start_addr + size} {} | ||
| 44 | |||
| 45 | constexpr BufferMap(GPUVAddr start_addr, std::size_t size, VAddr cpu_addr, | ||
| 46 | bool is_allocated) | ||
| 47 | : start_addr{start_addr}, end_addr{start_addr + size}, cpu_addr{cpu_addr}, | ||
| 48 | is_allocated{is_allocated} {} | ||
| 49 | |||
| 50 | constexpr VAddr StartAddr() const { | ||
| 51 | return start_addr; | ||
| 52 | } | ||
| 53 | |||
| 54 | constexpr VAddr EndAddr() const { | ||
| 55 | return end_addr; | ||
| 56 | } | ||
| 57 | |||
| 58 | constexpr std::size_t Size() const { | ||
| 59 | return end_addr - start_addr; | ||
| 60 | } | ||
| 61 | |||
| 62 | constexpr VAddr CpuAddr() const { | ||
| 63 | return cpu_addr; | ||
| 64 | } | ||
| 65 | |||
| 66 | constexpr bool IsAllocated() const { | ||
| 67 | return is_allocated; | ||
| 68 | } | ||
| 69 | |||
| 70 | private: | ||
| 71 | GPUVAddr start_addr{}; | ||
| 72 | GPUVAddr end_addr{}; | ||
| 73 | VAddr cpu_addr{}; | ||
| 74 | bool is_allocated{}; | ||
| 75 | }; | ||
| 76 | |||
| 28 | enum class IoctlCommand : u32_le { | 77 | enum class IoctlCommand : u32_le { |
| 29 | IocInitalizeExCommand = 0x40284109, | 78 | IocInitalizeExCommand = 0x40284109, |
| 30 | IocAllocateSpaceCommand = 0xC0184102, | 79 | IocAllocateSpaceCommand = 0xC0184102, |
| @@ -49,7 +98,7 @@ private: | |||
| 49 | struct IoctlAllocSpace { | 98 | struct IoctlAllocSpace { |
| 50 | u32_le pages; | 99 | u32_le pages; |
| 51 | u32_le page_size; | 100 | u32_le page_size; |
| 52 | u32_le flags; | 101 | AddressSpaceFlags flags; |
| 53 | INSERT_PADDING_WORDS(1); | 102 | INSERT_PADDING_WORDS(1); |
| 54 | union { | 103 | union { |
| 55 | u64_le offset; | 104 | u64_le offset; |
| @@ -69,18 +118,18 @@ private: | |||
| 69 | static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); | 118 | static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); |
| 70 | 119 | ||
| 71 | struct IoctlMapBufferEx { | 120 | struct IoctlMapBufferEx { |
| 72 | u32_le flags; // bit0: fixed_offset, bit2: cacheable | 121 | AddressSpaceFlags flags; // bit0: fixed_offset, bit2: cacheable |
| 73 | u32_le kind; // -1 is default | 122 | u32_le kind; // -1 is default |
| 74 | u32_le nvmap_handle; | 123 | u32_le nvmap_handle; |
| 75 | u32_le page_size; // 0 means don't care | 124 | u32_le page_size; // 0 means don't care |
| 76 | u64_le buffer_offset; | 125 | s64_le buffer_offset; |
| 77 | u64_le mapping_size; | 126 | u64_le mapping_size; |
| 78 | u64_le offset; | 127 | s64_le offset; |
| 79 | }; | 128 | }; |
| 80 | static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size"); | 129 | static_assert(sizeof(IoctlMapBufferEx) == 40, "IoctlMapBufferEx is incorrect size"); |
| 81 | 130 | ||
| 82 | struct IoctlUnmapBuffer { | 131 | struct IoctlUnmapBuffer { |
| 83 | u64_le offset; | 132 | s64_le offset; |
| 84 | }; | 133 | }; |
| 85 | static_assert(sizeof(IoctlUnmapBuffer) == 8, "IoctlUnmapBuffer is incorrect size"); | 134 | static_assert(sizeof(IoctlUnmapBuffer) == 8, "IoctlUnmapBuffer is incorrect size"); |
| 86 | 135 | ||
| @@ -106,15 +155,6 @@ private: | |||
| 106 | static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, | 155 | static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, |
| 107 | "IoctlGetVaRegions is incorrect size"); | 156 | "IoctlGetVaRegions is incorrect size"); |
| 108 | 157 | ||
| 109 | struct BufferMapping { | ||
| 110 | u64 offset; | ||
| 111 | u64 size; | ||
| 112 | u32 nvmap_handle; | ||
| 113 | }; | ||
| 114 | |||
| 115 | /// Map containing the nvmap object mappings in GPU memory. | ||
| 116 | std::unordered_map<u64, BufferMapping> buffer_mappings; | ||
| 117 | |||
| 118 | u32 channel{}; | 158 | u32 channel{}; |
| 119 | 159 | ||
| 120 | u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output); | 160 | u32 InitalizeEx(const std::vector<u8>& input, std::vector<u8>& output); |
| @@ -125,7 +165,14 @@ private: | |||
| 125 | u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output); | 165 | u32 BindChannel(const std::vector<u8>& input, std::vector<u8>& output); |
| 126 | u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); | 166 | u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); |
| 127 | 167 | ||
| 168 | std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; | ||
| 169 | void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); | ||
| 170 | std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); | ||
| 171 | |||
| 128 | std::shared_ptr<nvmap> nvmap_dev; | 172 | std::shared_ptr<nvmap> nvmap_dev; |
| 173 | |||
| 174 | // This is expected to be ordered, therefore we must use a map, not unordered_map | ||
| 175 | std::map<GPUVAddr, BufferMap> buffer_mappings; | ||
| 129 | }; | 176 | }; |
| 130 | 177 | ||
| 131 | } // namespace Service::Nvidia::Devices | 178 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 8c742316c..9436e16ad 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -18,7 +18,12 @@ enum { | |||
| 18 | }; | 18 | }; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | nvmap::nvmap(Core::System& system) : nvdevice(system) {} | 21 | nvmap::nvmap(Core::System& system) : nvdevice(system) { |
| 22 | // Handle 0 appears to be used when remapping, so we create a placeholder empty nvmap object to | ||
| 23 | // represent this. | ||
| 24 | CreateObject(0); | ||
| 25 | } | ||
| 26 | |||
| 22 | nvmap::~nvmap() = default; | 27 | nvmap::~nvmap() = default; |
| 23 | 28 | ||
| 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { | 29 | VAddr nvmap::GetObjectAddress(u32 handle) const { |
| @@ -50,6 +55,21 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector< | |||
| 50 | return 0; | 55 | return 0; |
| 51 | } | 56 | } |
| 52 | 57 | ||
| 58 | u32 nvmap::CreateObject(u32 size) { | ||
| 59 | // Create a new nvmap object and obtain a handle to it. | ||
| 60 | auto object = std::make_shared<Object>(); | ||
| 61 | object->id = next_id++; | ||
| 62 | object->size = size; | ||
| 63 | object->status = Object::Status::Created; | ||
| 64 | object->refcount = 1; | ||
| 65 | |||
| 66 | const u32 handle = next_handle++; | ||
| 67 | |||
| 68 | handles.insert_or_assign(handle, std::move(object)); | ||
| 69 | |||
| 70 | return handle; | ||
| 71 | } | ||
| 72 | |||
| 53 | u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { | 73 | u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { |
| 54 | IocCreateParams params; | 74 | IocCreateParams params; |
| 55 | std::memcpy(¶ms, input.data(), sizeof(params)); | 75 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| @@ -59,17 +79,8 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { | |||
| 59 | LOG_ERROR(Service_NVDRV, "Size is 0"); | 79 | LOG_ERROR(Service_NVDRV, "Size is 0"); |
| 60 | return static_cast<u32>(NvErrCodes::InvalidValue); | 80 | return static_cast<u32>(NvErrCodes::InvalidValue); |
| 61 | } | 81 | } |
| 62 | // Create a new nvmap object and obtain a handle to it. | ||
| 63 | auto object = std::make_shared<Object>(); | ||
| 64 | object->id = next_id++; | ||
| 65 | object->size = params.size; | ||
| 66 | object->status = Object::Status::Created; | ||
| 67 | object->refcount = 1; | ||
| 68 | |||
| 69 | u32 handle = next_handle++; | ||
| 70 | handles[handle] = std::move(object); | ||
| 71 | 82 | ||
| 72 | params.handle = handle; | 83 | params.handle = CreateObject(params.size); |
| 73 | 84 | ||
| 74 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 85 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 75 | return 0; | 86 | return 0; |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 73c2e8809..84624be00 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -49,10 +49,10 @@ public: | |||
| 49 | 49 | ||
| 50 | private: | 50 | private: |
| 51 | /// Id to use for the next handle that is created. | 51 | /// Id to use for the next handle that is created. |
| 52 | u32 next_handle = 1; | 52 | u32 next_handle = 0; |
| 53 | 53 | ||
| 54 | /// Id to use for the next object that is created. | 54 | /// Id to use for the next object that is created. |
| 55 | u32 next_id = 1; | 55 | u32 next_id = 0; |
| 56 | 56 | ||
| 57 | /// Mapping of currently allocated handles to the objects they represent. | 57 | /// Mapping of currently allocated handles to the objects they represent. |
| 58 | std::unordered_map<u32, std::shared_ptr<Object>> handles; | 58 | std::unordered_map<u32, std::shared_ptr<Object>> handles; |
| @@ -119,6 +119,8 @@ private: | |||
| 119 | }; | 119 | }; |
| 120 | static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); | 120 | static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); |
| 121 | 121 | ||
| 122 | u32 CreateObject(u32 size); | ||
| 123 | |||
| 122 | u32 IocCreate(const std::vector<u8>& input, std::vector<u8>& output); | 124 | u32 IocCreate(const std::vector<u8>& input, std::vector<u8>& output); |
| 123 | u32 IocAlloc(const std::vector<u8>& input, std::vector<u8>& output); | 125 | u32 IocAlloc(const std::vector<u8>& input, std::vector<u8>& output); |
| 124 | u32 IocGetId(const std::vector<u8>& input, std::vector<u8>& output); | 126 | u32 IocGetId(const std::vector<u8>& input, std::vector<u8>& output); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index ff5505d12..844164645 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | 7 | #include "core/core.h" |
| 9 | #include "core/hle/kernel/memory/page_table.h" | 8 | #include "core/hle/kernel/memory/page_table.h" |
| 10 | #include "core/hle/kernel/process.h" | 9 | #include "core/hle/kernel/process.h" |
| @@ -16,121 +15,137 @@ | |||
| 16 | namespace Tegra { | 15 | namespace Tegra { |
| 17 | 16 | ||
| 18 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 19 | : rasterizer{rasterizer}, system{system} { | 18 | : system{system}, rasterizer{rasterizer}, page_table(page_table_size) {} |
| 20 | page_table.Resize(address_space_width, page_bits, false); | ||
| 21 | |||
| 22 | // Initialize the map with a single free region covering the entire managed space. | ||
| 23 | VirtualMemoryArea initial_vma; | ||
| 24 | initial_vma.size = address_space_end; | ||
| 25 | vma_map.emplace(initial_vma.base, initial_vma); | ||
| 26 | |||
| 27 | UpdatePageTableForVMA(initial_vma); | ||
| 28 | } | ||
| 29 | 19 | ||
| 30 | MemoryManager::~MemoryManager() = default; | 20 | MemoryManager::~MemoryManager() = default; |
| 31 | 21 | ||
| 32 | GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { | 22 | GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { |
| 33 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 23 | u64 remaining_size{size}; |
| 34 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 24 | for (u64 offset{}; offset < size; offset += page_size) { |
| 35 | 25 | if (remaining_size < page_size) { | |
| 36 | AllocateMemory(gpu_addr, 0, aligned_size); | 26 | SetPageEntry(gpu_addr + offset, page_entry + offset, remaining_size); |
| 37 | 27 | } else { | |
| 28 | SetPageEntry(gpu_addr + offset, page_entry + offset); | ||
| 29 | } | ||
| 30 | remaining_size -= page_size; | ||
| 31 | } | ||
| 38 | return gpu_addr; | 32 | return gpu_addr; |
| 39 | } | 33 | } |
| 40 | 34 | ||
| 41 | GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) { | 35 | GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { |
| 42 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 36 | return UpdateRange(gpu_addr, cpu_addr, size); |
| 43 | 37 | } | |
| 44 | AllocateMemory(gpu_addr, 0, aligned_size); | ||
| 45 | 38 | ||
| 46 | return gpu_addr; | 39 | GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { |
| 40 | return Map(cpu_addr, *FindFreeRange(size, align), size); | ||
| 47 | } | 41 | } |
| 48 | 42 | ||
| 49 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | 43 | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { |
| 50 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 44 | if (!size) { |
| 51 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 45 | return; |
| 46 | } | ||
| 52 | 47 | ||
| 53 | MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); | 48 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. |
| 54 | ASSERT( | 49 | system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); |
| 55 | system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); | ||
| 56 | 50 | ||
| 57 | return gpu_addr; | 51 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); |
| 58 | } | 52 | } |
| 59 | 53 | ||
| 60 | GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) { | 54 | std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { |
| 61 | ASSERT((gpu_addr & page_mask) == 0); | 55 | for (u64 offset{}; offset < size; offset += page_size) { |
| 56 | if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) { | ||
| 57 | return {}; | ||
| 58 | } | ||
| 59 | } | ||
| 62 | 60 | ||
| 63 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 61 | return UpdateRange(gpu_addr, PageEntry::State::Allocated, size); |
| 62 | } | ||
| 64 | 63 | ||
| 65 | MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); | 64 | GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { |
| 66 | ASSERT( | 65 | return *AllocateFixed(*FindFreeRange(size, align), size); |
| 67 | system.CurrentProcess()->PageTable().LockForDeviceAddressSpace(cpu_addr, size).IsSuccess()); | ||
| 68 | return gpu_addr; | ||
| 69 | } | 66 | } |
| 70 | 67 | ||
| 71 | GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | 68 | void MemoryManager::TryLockPage(PageEntry page_entry, std::size_t size) { |
| 72 | ASSERT((gpu_addr & page_mask) == 0); | 69 | if (!page_entry.IsValid()) { |
| 70 | return; | ||
| 71 | } | ||
| 73 | 72 | ||
| 74 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 73 | ASSERT(system.CurrentProcess() |
| 75 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | 74 | ->PageTable() |
| 76 | ASSERT(cpu_addr); | 75 | .LockForDeviceAddressSpace(page_entry.ToAddress(), size) |
| 76 | .IsSuccess()); | ||
| 77 | } | ||
| 77 | 78 | ||
| 78 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 79 | void MemoryManager::TryUnlockPage(PageEntry page_entry, std::size_t size) { |
| 79 | system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); | 80 | if (!page_entry.IsValid()) { |
| 81 | return; | ||
| 82 | } | ||
| 80 | 83 | ||
| 81 | UnmapRange(gpu_addr, aligned_size); | ||
| 82 | ASSERT(system.CurrentProcess() | 84 | ASSERT(system.CurrentProcess() |
| 83 | ->PageTable() | 85 | ->PageTable() |
| 84 | .UnlockForDeviceAddressSpace(cpu_addr.value(), size) | 86 | .UnlockForDeviceAddressSpace(page_entry.ToAddress(), size) |
| 85 | .IsSuccess()); | 87 | .IsSuccess()); |
| 86 | |||
| 87 | return gpu_addr; | ||
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const { | 90 | PageEntry MemoryManager::GetPageEntry(GPUVAddr gpu_addr) const { |
| 91 | // Find the first Free VMA. | 91 | return page_table[PageEntryIndex(gpu_addr)]; |
| 92 | const VMAHandle vma_handle{ | 92 | } |
| 93 | std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) { | ||
| 94 | if (vma.second.type != VirtualMemoryArea::Type::Unmapped) { | ||
| 95 | return false; | ||
| 96 | } | ||
| 97 | 93 | ||
| 98 | const VAddr vma_end{vma.second.base + vma.second.size}; | 94 | void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { |
| 99 | return vma_end > region_start && vma_end >= region_start + size; | 95 | // TODO(bunnei): We should lock/unlock device regions. This currently causes issues due to |
| 100 | })}; | 96 | // improper tracking, but should be fixed in the future. |
| 101 | 97 | ||
| 102 | if (vma_handle == vma_map.end()) { | 98 | //// Unlock the old page |
| 103 | return {}; | 99 | // TryUnlockPage(page_table[PageEntryIndex(gpu_addr)], size); |
| 104 | } | ||
| 105 | 100 | ||
| 106 | return std::max(region_start, vma_handle->second.base); | 101 | //// Lock the new page |
| 107 | } | 102 | // TryLockPage(page_entry, size); |
| 108 | 103 | ||
| 109 | bool MemoryManager::IsAddressValid(GPUVAddr addr) const { | 104 | page_table[PageEntryIndex(gpu_addr)] = page_entry; |
| 110 | return (addr >> page_bits) < page_table.pointers.size(); | ||
| 111 | } | 105 | } |
| 112 | 106 | ||
| 113 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const { | 107 | std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align) const { |
| 114 | if (!IsAddressValid(addr)) { | 108 | if (!align) { |
| 115 | return {}; | 109 | align = page_size; |
| 110 | } else { | ||
| 111 | align = Common::AlignUp(align, page_size); | ||
| 116 | } | 112 | } |
| 117 | 113 | ||
| 118 | const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]}; | 114 | u64 available_size{}; |
| 119 | if (cpu_addr) { | 115 | GPUVAddr gpu_addr{address_space_start}; |
| 120 | return cpu_addr + (addr & page_mask); | 116 | while (gpu_addr + available_size < address_space_size) { |
| 117 | if (GetPageEntry(gpu_addr + available_size).IsUnmapped()) { | ||
| 118 | available_size += page_size; | ||
| 119 | |||
| 120 | if (available_size >= size) { | ||
| 121 | return gpu_addr; | ||
| 122 | } | ||
| 123 | } else { | ||
| 124 | gpu_addr += available_size + page_size; | ||
| 125 | available_size = 0; | ||
| 126 | |||
| 127 | const auto remainder{gpu_addr % align}; | ||
| 128 | if (remainder) { | ||
| 129 | gpu_addr = (gpu_addr - remainder) + align; | ||
| 130 | } | ||
| 131 | } | ||
| 121 | } | 132 | } |
| 122 | 133 | ||
| 123 | return {}; | 134 | return {}; |
| 124 | } | 135 | } |
| 125 | 136 | ||
| 126 | template <typename T> | 137 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { |
| 127 | T MemoryManager::Read(GPUVAddr addr) const { | 138 | const auto page_entry{GetPageEntry(gpu_addr)}; |
| 128 | if (!IsAddressValid(addr)) { | 139 | if (!page_entry.IsValid()) { |
| 129 | return {}; | 140 | return {}; |
| 130 | } | 141 | } |
| 131 | 142 | ||
| 132 | const u8* page_pointer{GetPointer(addr)}; | 143 | return page_entry.ToAddress() + (gpu_addr & page_mask); |
| 133 | if (page_pointer) { | 144 | } |
| 145 | |||
| 146 | template <typename T> | ||
| 147 | T MemoryManager::Read(GPUVAddr addr) const { | ||
| 148 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { | ||
| 134 | // NOTE: Avoid adding any extra logic to this fast-path block | 149 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 135 | T value; | 150 | T value; |
| 136 | std::memcpy(&value, page_pointer, sizeof(T)); | 151 | std::memcpy(&value, page_pointer, sizeof(T)); |
| @@ -144,12 +159,7 @@ T MemoryManager::Read(GPUVAddr addr) const { | |||
| 144 | 159 | ||
| 145 | template <typename T> | 160 | template <typename T> |
| 146 | void MemoryManager::Write(GPUVAddr addr, T data) { | 161 | void MemoryManager::Write(GPUVAddr addr, T data) { |
| 147 | if (!IsAddressValid(addr)) { | 162 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { |
| 148 | return; | ||
| 149 | } | ||
| 150 | |||
| 151 | u8* page_pointer{GetPointer(addr)}; | ||
| 152 | if (page_pointer) { | ||
| 153 | // NOTE: Avoid adding any extra logic to this fast-path block | 163 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 154 | std::memcpy(page_pointer, &data, sizeof(T)); | 164 | std::memcpy(page_pointer, &data, sizeof(T)); |
| 155 | return; | 165 | return; |
| @@ -167,66 +177,49 @@ template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data); | |||
| 167 | template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data); | 177 | template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data); |
| 168 | template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data); | 178 | template void MemoryManager::Write<u64>(GPUVAddr addr, u64 data); |
| 169 | 179 | ||
| 170 | u8* MemoryManager::GetPointer(GPUVAddr addr) { | 180 | u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { |
| 171 | if (!IsAddressValid(addr)) { | 181 | if (!GetPageEntry(gpu_addr).IsValid()) { |
| 172 | return {}; | 182 | return {}; |
| 173 | } | 183 | } |
| 174 | 184 | ||
| 175 | auto& memory = system.Memory(); | 185 | const auto address{GpuToCpuAddress(gpu_addr)}; |
| 176 | 186 | if (!address) { | |
| 177 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; | 187 | return {}; |
| 178 | |||
| 179 | if (page_addr != 0) { | ||
| 180 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 181 | } | 188 | } |
| 182 | 189 | ||
| 183 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 190 | return system.Memory().GetPointer(*address); |
| 184 | return {}; | ||
| 185 | } | 191 | } |
| 186 | 192 | ||
| 187 | const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | 193 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { |
| 188 | if (!IsAddressValid(addr)) { | 194 | if (!GetPageEntry(gpu_addr).IsValid()) { |
| 189 | return {}; | 195 | return {}; |
| 190 | } | 196 | } |
| 191 | 197 | ||
| 192 | const auto& memory = system.Memory(); | 198 | const auto address{GpuToCpuAddress(gpu_addr)}; |
| 193 | 199 | if (!address) { | |
| 194 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; | 200 | return {}; |
| 195 | |||
| 196 | if (page_addr != 0) { | ||
| 197 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 198 | } | 201 | } |
| 199 | 202 | ||
| 200 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 203 | return system.Memory().GetPointer(*address); |
| 201 | return {}; | ||
| 202 | } | ||
| 203 | |||
| 204 | bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { | ||
| 205 | const std::size_t inner_size = size - 1; | ||
| 206 | const GPUVAddr end = start + inner_size; | ||
| 207 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); | ||
| 208 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); | ||
| 209 | const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); | ||
| 210 | return range == inner_size; | ||
| 211 | } | 204 | } |
| 212 | 205 | ||
| 213 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, | 206 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { |
| 214 | const std::size_t size) const { | ||
| 215 | std::size_t remaining_size{size}; | 207 | std::size_t remaining_size{size}; |
| 216 | std::size_t page_index{gpu_src_addr >> page_bits}; | 208 | std::size_t page_index{gpu_src_addr >> page_bits}; |
| 217 | std::size_t page_offset{gpu_src_addr & page_mask}; | 209 | std::size_t page_offset{gpu_src_addr & page_mask}; |
| 218 | 210 | ||
| 219 | auto& memory = system.Memory(); | ||
| 220 | |||
| 221 | while (remaining_size > 0) { | 211 | while (remaining_size > 0) { |
| 222 | const std::size_t copy_amount{ | 212 | const std::size_t copy_amount{ |
| 223 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 213 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 224 | 214 | ||
| 225 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; | 215 | if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { |
| 226 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | 216 | const auto src_addr{*page_addr + page_offset}; |
| 227 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. | 217 | |
| 228 | rasterizer.FlushRegion(src_addr, copy_amount); | 218 | // Flush must happen on the rasterizer interface, such that memory is always synchronous |
| 229 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | 219 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. |
| 220 | rasterizer.FlushRegion(src_addr, copy_amount); | ||
| 221 | system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | ||
| 222 | } | ||
| 230 | 223 | ||
| 231 | page_index++; | 224 | page_index++; |
| 232 | page_offset = 0; | 225 | page_offset = 0; |
| @@ -241,18 +234,17 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | |||
| 241 | std::size_t page_index{gpu_src_addr >> page_bits}; | 234 | std::size_t page_index{gpu_src_addr >> page_bits}; |
| 242 | std::size_t page_offset{gpu_src_addr & page_mask}; | 235 | std::size_t page_offset{gpu_src_addr & page_mask}; |
| 243 | 236 | ||
| 244 | auto& memory = system.Memory(); | ||
| 245 | |||
| 246 | while (remaining_size > 0) { | 237 | while (remaining_size > 0) { |
| 247 | const std::size_t copy_amount{ | 238 | const std::size_t copy_amount{ |
| 248 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 239 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 249 | const u8* page_pointer = page_table.pointers[page_index]; | 240 | |
| 250 | if (page_pointer) { | 241 | if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { |
| 251 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; | 242 | const auto src_addr{*page_addr + page_offset}; |
| 252 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | 243 | system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 253 | } else { | 244 | } else { |
| 254 | std::memset(dest_buffer, 0, copy_amount); | 245 | std::memset(dest_buffer, 0, copy_amount); |
| 255 | } | 246 | } |
| 247 | |||
| 256 | page_index++; | 248 | page_index++; |
| 257 | page_offset = 0; | 249 | page_offset = 0; |
| 258 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 250 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| @@ -260,23 +252,23 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | |||
| 260 | } | 252 | } |
| 261 | } | 253 | } |
| 262 | 254 | ||
| 263 | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, | 255 | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { |
| 264 | const std::size_t size) { | ||
| 265 | std::size_t remaining_size{size}; | 256 | std::size_t remaining_size{size}; |
| 266 | std::size_t page_index{gpu_dest_addr >> page_bits}; | 257 | std::size_t page_index{gpu_dest_addr >> page_bits}; |
| 267 | std::size_t page_offset{gpu_dest_addr & page_mask}; | 258 | std::size_t page_offset{gpu_dest_addr & page_mask}; |
| 268 | 259 | ||
| 269 | auto& memory = system.Memory(); | ||
| 270 | |||
| 271 | while (remaining_size > 0) { | 260 | while (remaining_size > 0) { |
| 272 | const std::size_t copy_amount{ | 261 | const std::size_t copy_amount{ |
| 273 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 262 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 274 | 263 | ||
| 275 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; | 264 | if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { |
| 276 | // Invalidate must happen on the rasterizer interface, such that memory is always | 265 | const auto dest_addr{*page_addr + page_offset}; |
| 277 | // synchronous when it is written (even when in asynchronous GPU mode). | 266 | |
| 278 | rasterizer.InvalidateRegion(dest_addr, copy_amount); | 267 | // Invalidate must happen on the rasterizer interface, such that memory is always |
| 279 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | 268 | // synchronous when it is written (even when in asynchronous GPU mode). |
| 269 | rasterizer.InvalidateRegion(dest_addr, copy_amount); | ||
| 270 | system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | ||
| 271 | } | ||
| 280 | 272 | ||
| 281 | page_index++; | 273 | page_index++; |
| 282 | page_offset = 0; | 274 | page_offset = 0; |
| @@ -286,21 +278,20 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, | |||
| 286 | } | 278 | } |
| 287 | 279 | ||
| 288 | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, | 280 | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, |
| 289 | const std::size_t size) { | 281 | std::size_t size) { |
| 290 | std::size_t remaining_size{size}; | 282 | std::size_t remaining_size{size}; |
| 291 | std::size_t page_index{gpu_dest_addr >> page_bits}; | 283 | std::size_t page_index{gpu_dest_addr >> page_bits}; |
| 292 | std::size_t page_offset{gpu_dest_addr & page_mask}; | 284 | std::size_t page_offset{gpu_dest_addr & page_mask}; |
| 293 | 285 | ||
| 294 | auto& memory = system.Memory(); | ||
| 295 | |||
| 296 | while (remaining_size > 0) { | 286 | while (remaining_size > 0) { |
| 297 | const std::size_t copy_amount{ | 287 | const std::size_t copy_amount{ |
| 298 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 288 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 299 | u8* page_pointer = page_table.pointers[page_index]; | 289 | |
| 300 | if (page_pointer) { | 290 | if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { |
| 301 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; | 291 | const auto dest_addr{*page_addr + page_offset}; |
| 302 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | 292 | system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 303 | } | 293 | } |
| 294 | |||
| 304 | page_index++; | 295 | page_index++; |
| 305 | page_offset = 0; | 296 | page_offset = 0; |
| 306 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 297 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| @@ -308,273 +299,26 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf | |||
| 308 | } | 299 | } |
| 309 | } | 300 | } |
| 310 | 301 | ||
| 311 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, | 302 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { |
| 312 | const std::size_t size) { | ||
| 313 | std::vector<u8> tmp_buffer(size); | 303 | std::vector<u8> tmp_buffer(size); |
| 314 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size); | 304 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size); |
| 315 | WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); | 305 | WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); |
| 316 | } | 306 | } |
| 317 | 307 | ||
| 318 | void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, | 308 | void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, |
| 319 | const std::size_t size) { | 309 | std::size_t size) { |
| 320 | std::vector<u8> tmp_buffer(size); | 310 | std::vector<u8> tmp_buffer(size); |
| 321 | ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); | 311 | ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); |
| 322 | WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); | 312 | WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); |
| 323 | } | 313 | } |
| 324 | 314 | ||
| 325 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | 315 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { |
| 326 | const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; | 316 | const auto cpu_addr{GpuToCpuAddress(gpu_addr)}; |
| 327 | const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; | 317 | if (!cpu_addr) { |
| 328 | return page <= Core::Memory::PAGE_SIZE; | ||
| 329 | } | ||
| 330 | |||
| 331 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | ||
| 332 | VAddr backing_addr) { | ||
| 333 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | ||
| 334 | (base + size) * page_size); | ||
| 335 | |||
| 336 | const VAddr end{base + size}; | ||
| 337 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | ||
| 338 | base + page_table.pointers.size()); | ||
| 339 | |||
| 340 | if (memory == nullptr) { | ||
| 341 | while (base != end) { | ||
| 342 | page_table.pointers[base] = nullptr; | ||
| 343 | page_table.backing_addr[base] = 0; | ||
| 344 | |||
| 345 | base += 1; | ||
| 346 | } | ||
| 347 | } else { | ||
| 348 | while (base != end) { | ||
| 349 | page_table.pointers[base] = memory; | ||
| 350 | page_table.backing_addr[base] = backing_addr; | ||
| 351 | |||
| 352 | base += 1; | ||
| 353 | memory += page_size; | ||
| 354 | backing_addr += page_size; | ||
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | void MemoryManager::MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr) { | ||
| 360 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); | ||
| 361 | ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); | ||
| 362 | MapPages(base / page_size, size / page_size, target, Common::PageType::Memory, backing_addr); | ||
| 363 | } | ||
| 364 | |||
| 365 | void MemoryManager::UnmapRegion(GPUVAddr base, u64 size) { | ||
| 366 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: {:016X}", size); | ||
| 367 | ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: {:016X}", base); | ||
| 368 | MapPages(base / page_size, size / page_size, nullptr, Common::PageType::Unmapped); | ||
| 369 | } | ||
| 370 | |||
| 371 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | ||
| 372 | ASSERT(base + size == next.base); | ||
| 373 | if (type != next.type) { | ||
| 374 | return {}; | ||
| 375 | } | ||
| 376 | if (type == VirtualMemoryArea::Type::Allocated && (offset + size != next.offset)) { | ||
| 377 | return {}; | ||
| 378 | } | ||
| 379 | if (type == VirtualMemoryArea::Type::Mapped && backing_memory + size != next.backing_memory) { | ||
| 380 | return {}; | ||
| 381 | } | ||
| 382 | return true; | ||
| 383 | } | ||
| 384 | |||
| 385 | MemoryManager::VMAHandle MemoryManager::FindVMA(GPUVAddr target) const { | ||
| 386 | if (target >= address_space_end) { | ||
| 387 | return vma_map.end(); | ||
| 388 | } else { | ||
| 389 | return std::prev(vma_map.upper_bound(target)); | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | MemoryManager::VMAIter MemoryManager::Allocate(VMAIter vma_handle) { | ||
| 394 | VirtualMemoryArea& vma{vma_handle->second}; | ||
| 395 | |||
| 396 | vma.type = VirtualMemoryArea::Type::Allocated; | ||
| 397 | vma.backing_addr = 0; | ||
| 398 | vma.backing_memory = {}; | ||
| 399 | UpdatePageTableForVMA(vma); | ||
| 400 | |||
| 401 | return MergeAdjacent(vma_handle); | ||
| 402 | } | ||
| 403 | |||
| 404 | MemoryManager::VMAHandle MemoryManager::AllocateMemory(GPUVAddr target, std::size_t offset, | ||
| 405 | u64 size) { | ||
| 406 | |||
| 407 | // This is the appropriately sized VMA that will turn into our allocation. | ||
| 408 | VMAIter vma_handle{CarveVMA(target, size)}; | ||
| 409 | VirtualMemoryArea& vma{vma_handle->second}; | ||
| 410 | |||
| 411 | ASSERT(vma.size == size); | ||
| 412 | |||
| 413 | vma.offset = offset; | ||
| 414 | |||
| 415 | return Allocate(vma_handle); | ||
| 416 | } | ||
| 417 | |||
| 418 | MemoryManager::VMAHandle MemoryManager::MapBackingMemory(GPUVAddr target, u8* memory, u64 size, | ||
| 419 | VAddr backing_addr) { | ||
| 420 | // This is the appropriately sized VMA that will turn into our allocation. | ||
| 421 | VMAIter vma_handle{CarveVMA(target, size)}; | ||
| 422 | VirtualMemoryArea& vma{vma_handle->second}; | ||
| 423 | |||
| 424 | ASSERT(vma.size == size); | ||
| 425 | |||
| 426 | vma.type = VirtualMemoryArea::Type::Mapped; | ||
| 427 | vma.backing_memory = memory; | ||
| 428 | vma.backing_addr = backing_addr; | ||
| 429 | UpdatePageTableForVMA(vma); | ||
| 430 | |||
| 431 | return MergeAdjacent(vma_handle); | ||
| 432 | } | ||
| 433 | |||
| 434 | void MemoryManager::UnmapRange(GPUVAddr target, u64 size) { | ||
| 435 | VMAIter vma{CarveVMARange(target, size)}; | ||
| 436 | const VAddr target_end{target + size}; | ||
| 437 | const VMAIter end{vma_map.end()}; | ||
| 438 | |||
| 439 | // The comparison against the end of the range must be done using addresses since VMAs can be | ||
| 440 | // merged during this process, causing invalidation of the iterators. | ||
| 441 | while (vma != end && vma->second.base < target_end) { | ||
| 442 | // Unmapped ranges return to allocated state and can be reused | ||
| 443 | // This behavior is used by Super Mario Odyssey, Sonic Forces, and likely other games | ||
| 444 | vma = std::next(Allocate(vma)); | ||
| 445 | } | ||
| 446 | |||
| 447 | ASSERT(FindVMA(target)->second.size >= size); | ||
| 448 | } | ||
| 449 | |||
| 450 | MemoryManager::VMAIter MemoryManager::StripIterConstness(const VMAHandle& iter) { | ||
| 451 | // This uses a neat C++ trick to convert a const_iterator to a regular iterator, given | ||
| 452 | // non-const access to its container. | ||
| 453 | return vma_map.erase(iter, iter); // Erases an empty range of elements | ||
| 454 | } | ||
| 455 | |||
| 456 | MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) { | ||
| 457 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size); | ||
| 458 | ASSERT_MSG((base & page_mask) == 0, "non-page aligned base: 0x{:016X}", base); | ||
| 459 | |||
| 460 | VMAIter vma_handle{StripIterConstness(FindVMA(base))}; | ||
| 461 | if (vma_handle == vma_map.end()) { | ||
| 462 | // Target address is outside the managed range | ||
| 463 | return {}; | ||
| 464 | } | ||
| 465 | |||
| 466 | const VirtualMemoryArea& vma{vma_handle->second}; | ||
| 467 | if (vma.type == VirtualMemoryArea::Type::Mapped) { | ||
| 468 | // Region is already allocated | ||
| 469 | return vma_handle; | ||
| 470 | } | ||
| 471 | |||
| 472 | const VAddr start_in_vma{base - vma.base}; | ||
| 473 | const VAddr end_in_vma{start_in_vma + size}; | ||
| 474 | |||
| 475 | ASSERT_MSG(end_in_vma <= vma.size, "region size 0x{:016X} is less than required size 0x{:016X}", | ||
| 476 | vma.size, end_in_vma); | ||
| 477 | |||
| 478 | if (end_in_vma < vma.size) { | ||
| 479 | // Split VMA at the end of the allocated region | ||
| 480 | SplitVMA(vma_handle, end_in_vma); | ||
| 481 | } | ||
| 482 | if (start_in_vma != 0) { | ||
| 483 | // Split VMA at the start of the allocated region | ||
| 484 | vma_handle = SplitVMA(vma_handle, start_in_vma); | ||
| 485 | } | ||
| 486 | |||
| 487 | return vma_handle; | ||
| 488 | } | ||
| 489 | |||
| 490 | MemoryManager::VMAIter MemoryManager::CarveVMARange(GPUVAddr target, u64 size) { | ||
| 491 | ASSERT_MSG((size & page_mask) == 0, "non-page aligned size: 0x{:016X}", size); | ||
| 492 | ASSERT_MSG((target & page_mask) == 0, "non-page aligned base: 0x{:016X}", target); | ||
| 493 | |||
| 494 | const VAddr target_end{target + size}; | ||
| 495 | ASSERT(target_end >= target); | ||
| 496 | ASSERT(size > 0); | ||
| 497 | |||
| 498 | VMAIter begin_vma{StripIterConstness(FindVMA(target))}; | ||
| 499 | const VMAIter i_end{vma_map.lower_bound(target_end)}; | ||
| 500 | if (std::any_of(begin_vma, i_end, [](const auto& entry) { | ||
| 501 | return entry.second.type == VirtualMemoryArea::Type::Unmapped; | ||
| 502 | })) { | ||
| 503 | return {}; | 318 | return {}; |
| 504 | } | 319 | } |
| 505 | 320 | const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size}; | |
| 506 | if (target != begin_vma->second.base) { | 321 | return page <= Core::Memory::PAGE_SIZE; |
| 507 | begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); | ||
| 508 | } | ||
| 509 | |||
| 510 | VMAIter end_vma{StripIterConstness(FindVMA(target_end))}; | ||
| 511 | if (end_vma != vma_map.end() && target_end != end_vma->second.base) { | ||
| 512 | end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); | ||
| 513 | } | ||
| 514 | |||
| 515 | return begin_vma; | ||
| 516 | } | ||
| 517 | |||
| 518 | MemoryManager::VMAIter MemoryManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { | ||
| 519 | VirtualMemoryArea& old_vma{vma_handle->second}; | ||
| 520 | VirtualMemoryArea new_vma{old_vma}; // Make a copy of the VMA | ||
| 521 | |||
| 522 | // For now, don't allow no-op VMA splits (trying to split at a boundary) because it's probably | ||
| 523 | // a bug. This restriction might be removed later. | ||
| 524 | ASSERT(offset_in_vma < old_vma.size); | ||
| 525 | ASSERT(offset_in_vma > 0); | ||
| 526 | |||
| 527 | old_vma.size = offset_in_vma; | ||
| 528 | new_vma.base += offset_in_vma; | ||
| 529 | new_vma.size -= offset_in_vma; | ||
| 530 | |||
| 531 | switch (new_vma.type) { | ||
| 532 | case VirtualMemoryArea::Type::Unmapped: | ||
| 533 | break; | ||
| 534 | case VirtualMemoryArea::Type::Allocated: | ||
| 535 | new_vma.offset += offset_in_vma; | ||
| 536 | break; | ||
| 537 | case VirtualMemoryArea::Type::Mapped: | ||
| 538 | new_vma.backing_memory += offset_in_vma; | ||
| 539 | break; | ||
| 540 | } | ||
| 541 | |||
| 542 | ASSERT(old_vma.CanBeMergedWith(new_vma)); | ||
| 543 | |||
| 544 | return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma); | ||
| 545 | } | ||
| 546 | |||
| 547 | MemoryManager::VMAIter MemoryManager::MergeAdjacent(VMAIter iter) { | ||
| 548 | const VMAIter next_vma{std::next(iter)}; | ||
| 549 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | ||
| 550 | iter->second.size += next_vma->second.size; | ||
| 551 | vma_map.erase(next_vma); | ||
| 552 | } | ||
| 553 | |||
| 554 | if (iter != vma_map.begin()) { | ||
| 555 | VMAIter prev_vma{std::prev(iter)}; | ||
| 556 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | ||
| 557 | prev_vma->second.size += iter->second.size; | ||
| 558 | vma_map.erase(iter); | ||
| 559 | iter = prev_vma; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | return iter; | ||
| 564 | } | ||
| 565 | |||
| 566 | void MemoryManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | ||
| 567 | switch (vma.type) { | ||
| 568 | case VirtualMemoryArea::Type::Unmapped: | ||
| 569 | UnmapRegion(vma.base, vma.size); | ||
| 570 | break; | ||
| 571 | case VirtualMemoryArea::Type::Allocated: | ||
| 572 | MapMemoryRegion(vma.base, vma.size, nullptr, vma.backing_addr); | ||
| 573 | break; | ||
| 574 | case VirtualMemoryArea::Type::Mapped: | ||
| 575 | MapMemoryRegion(vma.base, vma.size, vma.backing_memory, vma.backing_addr); | ||
| 576 | break; | ||
| 577 | } | ||
| 578 | } | 322 | } |
| 579 | 323 | ||
| 580 | } // namespace Tegra | 324 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 87658e87a..681bd9588 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -6,9 +6,9 @@ | |||
| 6 | 6 | ||
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <vector> | ||
| 9 | 10 | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "common/page_table.h" | ||
| 12 | 12 | ||
| 13 | namespace VideoCore { | 13 | namespace VideoCore { |
| 14 | class RasterizerInterface; | 14 | class RasterizerInterface; |
| @@ -20,45 +20,57 @@ class System; | |||
| 20 | 20 | ||
| 21 | namespace Tegra { | 21 | namespace Tegra { |
| 22 | 22 | ||
| 23 | /** | 23 | class PageEntry final { |
| 24 | * Represents a VMA in an address space. A VMA is a contiguous region of virtual addressing space | 24 | public: |
| 25 | * with homogeneous attributes across its extents. In this particular implementation each VMA is | 25 | enum class State : u32 { |
| 26 | * also backed by a single host memory allocation. | 26 | Unmapped = static_cast<u32>(-1), |
| 27 | */ | 27 | Allocated = static_cast<u32>(-2), |
| 28 | struct VirtualMemoryArea { | ||
| 29 | enum class Type : u8 { | ||
| 30 | Unmapped, | ||
| 31 | Allocated, | ||
| 32 | Mapped, | ||
| 33 | }; | 28 | }; |
| 34 | 29 | ||
| 35 | /// Virtual base address of the region. | 30 | constexpr PageEntry() = default; |
| 36 | GPUVAddr base{}; | 31 | constexpr PageEntry(State state) : state{state} {} |
| 37 | /// Size of the region. | 32 | constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} |
| 38 | u64 size{}; | 33 | |
| 39 | /// Memory area mapping type. | 34 | constexpr bool IsUnmapped() const { |
| 40 | Type type{Type::Unmapped}; | 35 | return state == State::Unmapped; |
| 41 | /// CPU memory mapped address corresponding to this memory area. | 36 | } |
| 42 | VAddr backing_addr{}; | 37 | |
| 43 | /// Offset into the backing_memory the mapping starts from. | 38 | constexpr bool IsAllocated() const { |
| 44 | std::size_t offset{}; | 39 | return state == State::Allocated; |
| 45 | /// Pointer backing this VMA. | 40 | } |
| 46 | u8* backing_memory{}; | 41 | |
| 47 | 42 | constexpr bool IsValid() const { | |
| 48 | /// Tests if this area can be merged to the right with `next`. | 43 | return !IsUnmapped() && !IsAllocated(); |
| 49 | bool CanBeMergedWith(const VirtualMemoryArea& next) const; | 44 | } |
| 45 | |||
| 46 | constexpr VAddr ToAddress() const { | ||
| 47 | if (!IsValid()) { | ||
| 48 | return {}; | ||
| 49 | } | ||
| 50 | |||
| 51 | return static_cast<VAddr>(state) << ShiftBits; | ||
| 52 | } | ||
| 53 | |||
| 54 | constexpr PageEntry operator+(u64 offset) { | ||
| 55 | // If this is a reserved value, offsets do not apply | ||
| 56 | if (!IsValid()) { | ||
| 57 | return *this; | ||
| 58 | } | ||
| 59 | return PageEntry{(static_cast<VAddr>(state) << ShiftBits) + offset}; | ||
| 60 | } | ||
| 61 | |||
| 62 | private: | ||
| 63 | static constexpr std::size_t ShiftBits{12}; | ||
| 64 | |||
| 65 | State state{State::Unmapped}; | ||
| 50 | }; | 66 | }; |
| 67 | static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); | ||
| 51 | 68 | ||
| 52 | class MemoryManager final { | 69 | class MemoryManager final { |
| 53 | public: | 70 | public: |
| 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); | 71 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 55 | ~MemoryManager(); | 72 | ~MemoryManager(); |
| 56 | 73 | ||
| 57 | GPUVAddr AllocateSpace(u64 size, u64 align); | ||
| 58 | GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); | ||
| 59 | GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); | ||
| 60 | GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size); | ||
| 61 | GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size); | ||
| 62 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 74 | std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 63 | 75 | ||
| 64 | template <typename T> | 76 | template <typename T> |
| @@ -70,9 +82,6 @@ public: | |||
| 70 | u8* GetPointer(GPUVAddr addr); | 82 | u8* GetPointer(GPUVAddr addr); |
| 71 | const u8* GetPointer(GPUVAddr addr) const; | 83 | const u8* GetPointer(GPUVAddr addr) const; |
| 72 | 84 | ||
| 73 | /// Returns true if the block is continuous in host memory, false otherwise | ||
| 74 | bool IsBlockContinuous(GPUVAddr start, std::size_t size) const; | ||
| 75 | |||
| 76 | /** | 85 | /** |
| 77 | * ReadBlock and WriteBlock are full read and write operations over virtual | 86 | * ReadBlock and WriteBlock are full read and write operations over virtual |
| 78 | * GPU Memory. It's important to use these when GPU memory may not be continuous | 87 | * GPU Memory. It's important to use these when GPU memory may not be continuous |
| @@ -98,92 +107,43 @@ public: | |||
| 98 | void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); | 107 | void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); |
| 99 | 108 | ||
| 100 | /** | 109 | /** |
| 101 | * IsGranularRange checks if a gpu region can be simply read with a pointer | 110 | * IsGranularRange checks if a gpu region can be simply read with a pointer. |
| 102 | */ | 111 | */ |
| 103 | bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); | 112 | bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); |
| 104 | 113 | ||
| 105 | private: | 114 | GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); |
| 106 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | 115 | GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); |
| 107 | using VMAHandle = VMAMap::const_iterator; | 116 | std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); |
| 108 | using VMAIter = VMAMap::iterator; | 117 | GPUVAddr Allocate(std::size_t size, std::size_t align); |
| 109 | 118 | void Unmap(GPUVAddr gpu_addr, std::size_t size); | |
| 110 | bool IsAddressValid(GPUVAddr addr) const; | ||
| 111 | void MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | ||
| 112 | VAddr backing_addr = 0); | ||
| 113 | void MapMemoryRegion(GPUVAddr base, u64 size, u8* target, VAddr backing_addr); | ||
| 114 | void UnmapRegion(GPUVAddr base, u64 size); | ||
| 115 | |||
| 116 | /// Finds the VMA in which the given address is included in, or `vma_map.end()`. | ||
| 117 | VMAHandle FindVMA(GPUVAddr target) const; | ||
| 118 | |||
| 119 | VMAHandle AllocateMemory(GPUVAddr target, std::size_t offset, u64 size); | ||
| 120 | |||
| 121 | /** | ||
| 122 | * Maps an unmanaged host memory pointer at a given address. | ||
| 123 | * | ||
| 124 | * @param target The guest address to start the mapping at. | ||
| 125 | * @param memory The memory to be mapped. | ||
| 126 | * @param size Size of the mapping in bytes. | ||
| 127 | * @param backing_addr The base address of the range to back this mapping. | ||
| 128 | */ | ||
| 129 | VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); | ||
| 130 | |||
| 131 | /// Unmaps a range of addresses, splitting VMAs as necessary. | ||
| 132 | void UnmapRange(GPUVAddr target, u64 size); | ||
| 133 | |||
| 134 | /// Converts a VMAHandle to a mutable VMAIter. | ||
| 135 | VMAIter StripIterConstness(const VMAHandle& iter); | ||
| 136 | |||
| 137 | /// Marks as the specified VMA as allocated. | ||
| 138 | VMAIter Allocate(VMAIter vma); | ||
| 139 | |||
| 140 | /** | ||
| 141 | * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing | ||
| 142 | * the appropriate error checking. | ||
| 143 | */ | ||
| 144 | VMAIter CarveVMA(GPUVAddr base, u64 size); | ||
| 145 | |||
| 146 | /** | ||
| 147 | * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each | ||
| 148 | * end of the range. | ||
| 149 | */ | ||
| 150 | VMAIter CarveVMARange(GPUVAddr base, u64 size); | ||
| 151 | |||
| 152 | /** | ||
| 153 | * Splits a VMA in two, at the specified offset. | ||
| 154 | * @returns the right side of the split, with the original iterator becoming the left side. | ||
| 155 | */ | ||
| 156 | VMAIter SplitVMA(VMAIter vma, u64 offset_in_vma); | ||
| 157 | 119 | ||
| 158 | /** | 120 | private: |
| 159 | * Checks for and merges the specified VMA with adjacent ones if possible. | 121 | PageEntry GetPageEntry(GPUVAddr gpu_addr) const; |
| 160 | * @returns the merged VMA or the original if no merging was possible. | 122 | void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); |
| 161 | */ | 123 | GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); |
| 162 | VMAIter MergeAdjacent(VMAIter vma); | 124 | std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; |
| 163 | 125 | ||
| 164 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | 126 | void TryLockPage(PageEntry page_entry, std::size_t size); |
| 165 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | 127 | void TryUnlockPage(PageEntry page_entry, std::size_t size); |
| 166 | 128 | ||
| 167 | /// Finds a free (unmapped region) of the specified size starting at the specified address. | 129 | static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { |
| 168 | GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const; | 130 | return (gpu_addr >> page_bits) & page_table_mask; |
| 131 | } | ||
| 169 | 132 | ||
| 170 | private: | 133 | static constexpr u64 address_space_size = 1ULL << 40; |
| 134 | static constexpr u64 address_space_start = 1ULL << 32; | ||
| 171 | static constexpr u64 page_bits{16}; | 135 | static constexpr u64 page_bits{16}; |
| 172 | static constexpr u64 page_size{1 << page_bits}; | 136 | static constexpr u64 page_size{1 << page_bits}; |
| 173 | static constexpr u64 page_mask{page_size - 1}; | 137 | static constexpr u64 page_mask{page_size - 1}; |
| 138 | static constexpr u64 page_table_bits{24}; | ||
| 139 | static constexpr u64 page_table_size{1 << page_table_bits}; | ||
| 140 | static constexpr u64 page_table_mask{page_table_size - 1}; | ||
| 174 | 141 | ||
| 175 | /// Address space in bits, according to Tegra X1 TRM | 142 | Core::System& system; |
| 176 | static constexpr u32 address_space_width{40}; | ||
| 177 | /// Start address for mapping, this is fairly arbitrary but must be non-zero. | ||
| 178 | static constexpr GPUVAddr address_space_base{0x100000}; | ||
| 179 | /// End of address space, based on address space in bits. | ||
| 180 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; | ||
| 181 | 143 | ||
| 182 | Common::PageTable page_table; | ||
| 183 | VMAMap vma_map; | ||
| 184 | VideoCore::RasterizerInterface& rasterizer; | 144 | VideoCore::RasterizerInterface& rasterizer; |
| 185 | 145 | ||
| 186 | Core::System& system; | 146 | std::vector<PageEntry> page_table; |
| 187 | }; | 147 | }; |
| 188 | 148 | ||
| 189 | } // namespace Tegra | 149 | } // namespace Tegra |