diff options
| author | 2021-11-14 20:55:52 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:52 +0200 | |
| commit | feb49c822d9cabc5bc7be9eab1f2bf4ba460176a (patch) | |
| tree | 678c58e7fb8e41f5a04e309df9d196320d11de99 /src | |
| parent | NVDRV: Update copyright notices. (diff) | |
| download | yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.gz yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.tar.xz yuzu-feb49c822d9cabc5bc7be9eab1f2bf4ba460176a.zip | |
NVDRV: Remake ASGPU
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/address_space.cpp | 11 | ||||
| -rw-r--r-- | src/common/address_space.h | 134 | ||||
| -rw-r--r-- | src/common/address_space.inc | 338 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 460 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | 163 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 3 |
8 files changed, 882 insertions, 239 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2db414819..a02696873 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -17,6 +17,8 @@ endif () | |||
| 17 | include(GenerateSCMRev) | 17 | include(GenerateSCMRev) |
| 18 | 18 | ||
| 19 | add_library(common STATIC | 19 | add_library(common STATIC |
| 20 | address_space.cpp | ||
| 21 | address_space.h | ||
| 20 | algorithm.h | 22 | algorithm.h |
| 21 | alignment.h | 23 | alignment.h |
| 22 | announce_multiplayer_room.h | 24 | announce_multiplayer_room.h |
diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp new file mode 100644 index 000000000..6db85be87 --- /dev/null +++ b/src/common/address_space.cpp | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/address_space.inc" | ||
| 6 | |||
| 7 | namespace Common { | ||
| 8 | |||
| 9 | template class Common::FlatAllocator<u32, 0, 32>; | ||
| 10 | |||
| 11 | } | ||
diff --git a/src/common/address_space.h b/src/common/address_space.h new file mode 100644 index 000000000..fd2f32b7d --- /dev/null +++ b/src/common/address_space.h | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <concepts> | ||
| 8 | #include <functional> | ||
| 9 | #include <mutex> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Common { | ||
| 15 | template <typename VaType, size_t AddressSpaceBits> | ||
| 16 | concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits; | ||
| 17 | |||
| 18 | struct EmptyStruct {}; | ||
| 19 | |||
| 20 | /** | ||
| 21 | * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector | ||
| 22 | */ | ||
| 23 | template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, | ||
| 24 | bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct> | ||
| 25 | requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap { | ||
| 26 | private: | ||
| 27 | std::function<void(VaType, VaType)> | ||
| 28 | unmapCallback{}; //!< Callback called when the mappings in an region have changed | ||
| 29 | |||
| 30 | protected: | ||
| 31 | /** | ||
| 32 | * @brief Represents a block of memory in the AS, the physical mapping is contiguous until | ||
| 33 | * another block with a different phys address is hit | ||
| 34 | */ | ||
| 35 | struct Block { | ||
| 36 | VaType virt{UnmappedVa}; //!< VA of the block | ||
| 37 | PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block | ||
| 38 | //!< is encountered | ||
| 39 | [[no_unique_address]] ExtraBlockInfo extraInfo; | ||
| 40 | |||
| 41 | Block() = default; | ||
| 42 | |||
| 43 | Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo) | ||
| 44 | : virt(virt), phys(phys), extraInfo(extraInfo) {} | ||
| 45 | |||
| 46 | constexpr bool Valid() { | ||
| 47 | return virt != UnmappedVa; | ||
| 48 | } | ||
| 49 | |||
| 50 | constexpr bool Mapped() { | ||
| 51 | return phys != UnmappedPa; | ||
| 52 | } | ||
| 53 | |||
| 54 | constexpr bool Unmapped() { | ||
| 55 | return phys == UnmappedPa; | ||
| 56 | } | ||
| 57 | |||
| 58 | bool operator<(const VaType& pVirt) const { | ||
| 59 | return virt < pVirt; | ||
| 60 | } | ||
| 61 | }; | ||
| 62 | |||
| 63 | std::mutex blockMutex; | ||
| 64 | std::vector<Block> blocks{Block{}}; | ||
| 65 | |||
| 66 | /** | ||
| 67 | * @brief Maps a PA range into the given AS region | ||
| 68 | * @note blockMutex MUST be locked when calling this | ||
| 69 | */ | ||
| 70 | void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo); | ||
| 71 | |||
| 72 | /** | ||
| 73 | * @brief Unmaps the given range and merges it with other unmapped regions | ||
| 74 | * @note blockMutex MUST be locked when calling this | ||
| 75 | */ | ||
| 76 | void UnmapLocked(VaType virt, VaType size); | ||
| 77 | |||
| 78 | public: | ||
| 79 | static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) + | ||
| 80 | ((1ULL << (AddressSpaceBits - 1)) - | ||
| 81 | 1)}; //!< The maximum VA that this AS can technically reach | ||
| 82 | |||
| 83 | VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS | ||
| 84 | |||
| 85 | FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {}); | ||
| 86 | |||
| 87 | FlatAddressSpaceMap() = default; | ||
| 88 | |||
| 89 | void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) { | ||
| 90 | std::scoped_lock lock(blockMutex); | ||
| 91 | MapLocked(virt, phys, size, extraInfo); | ||
| 92 | } | ||
| 93 | |||
| 94 | void Unmap(VaType virt, VaType size) { | ||
| 95 | std::scoped_lock lock(blockMutex); | ||
| 96 | UnmapLocked(virt, size); | ||
| 97 | } | ||
| 98 | }; | ||
| 99 | |||
| 100 | /** | ||
| 101 | * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an | ||
| 102 | * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block | ||
| 103 | */ | ||
| 104 | template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> | ||
| 105 | requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator | ||
| 106 | : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> { | ||
| 107 | private: | ||
| 108 | using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>; | ||
| 109 | |||
| 110 | VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once | ||
| 111 | //!< this reaches the AS limit the slower allocation path will be | ||
| 112 | //!< used | ||
| 113 | |||
| 114 | public: | ||
| 115 | VaType vaStart; //!< The base VA of the allocator, no allocations will be below this | ||
| 116 | |||
| 117 | FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum); | ||
| 118 | |||
| 119 | /** | ||
| 120 | * @brief Allocates a region in the AS of the given size and returns its address | ||
| 121 | */ | ||
| 122 | VaType Allocate(VaType size); | ||
| 123 | |||
| 124 | /** | ||
| 125 | * @brief Marks the given region in the AS as allocated | ||
| 126 | */ | ||
| 127 | void AllocateFixed(VaType virt, VaType size); | ||
| 128 | |||
| 129 | /** | ||
| 130 | * @brief Frees an AS region so it can be used again | ||
| 131 | */ | ||
| 132 | void Free(VaType virt, VaType size); | ||
| 133 | }; | ||
| 134 | } // namespace Common | ||
diff --git a/src/common/address_space.inc b/src/common/address_space.inc new file mode 100644 index 000000000..907c55d88 --- /dev/null +++ b/src/common/address_space.inc | |||
| @@ -0,0 +1,338 @@ | |||
| 1 | // SPDX-License-Identifier: GPLv3 or later | ||
| 2 | // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | ||
| 3 | |||
| 4 | #include "common/address_space.h" | ||
| 5 | #include "common/assert.h" | ||
| 6 | |||
| 7 | #define MAP_MEMBER(returnType) \ | ||
| 8 | template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \ | ||
| 9 | bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \ | ||
| 10 | requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \ | ||
| 11 | VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> | ||
| 12 | #define MAP_MEMBER_CONST() \ | ||
| 13 | template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \ | ||
| 14 | bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \ | ||
| 15 | requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \ | ||
| 16 | VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo> | ||
| 17 | |||
| 18 | #define MM_MEMBER(returnType) \ | ||
| 19 | template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ | ||
| 20 | requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \ | ||
| 21 | FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits> | ||
| 22 | |||
| 23 | #define ALLOC_MEMBER(returnType) \ | ||
| 24 | template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ | ||
| 25 | requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \ | ||
| 26 | FlatAllocator<VaType, UnmappedVa, AddressSpaceBits> | ||
| 27 | #define ALLOC_MEMBER_CONST() \ | ||
| 28 | template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \ | ||
| 29 | requires AddressSpaceValid<VaType, AddressSpaceBits> \ | ||
| 30 | FlatAllocator<VaType, UnmappedVa, AddressSpaceBits> | ||
| 31 | |||
| 32 | namespace Common { | ||
| 33 | MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit, | ||
| 34 | std::function<void(VaType, VaType)> unmapCallback) | ||
| 35 | : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) { | ||
| 36 | if (vaLimit > VaMaximum) | ||
| 37 | UNREACHABLE_MSG("Invalid VA limit!"); | ||
| 38 | } | ||
| 39 | |||
| 40 | MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) { | ||
| 41 | VaType virtEnd{virt + size}; | ||
| 42 | |||
| 43 | if (virtEnd > vaLimit) | ||
| 44 | UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", | ||
| 45 | virtEnd, vaLimit); | ||
| 46 | |||
| 47 | auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; | ||
| 48 | if (blockEndSuccessor == blocks.begin()) | ||
| 49 | UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd); | ||
| 50 | |||
| 51 | auto blockEndPredecessor{std::prev(blockEndSuccessor)}; | ||
| 52 | |||
| 53 | if (blockEndSuccessor != blocks.end()) { | ||
| 54 | // We have blocks in front of us, if one is directly in front then we don't have to add a | ||
| 55 | // tail | ||
| 56 | if (blockEndSuccessor->virt != virtEnd) { | ||
| 57 | PaType tailPhys{[&]() -> PaType { | ||
| 58 | if constexpr (!PaContigSplit) { | ||
| 59 | return blockEndPredecessor | ||
| 60 | ->phys; // Always propagate unmapped regions rather than calculating offset | ||
| 61 | } else { | ||
| 62 | if (blockEndPredecessor->Unmapped()) | ||
| 63 | return blockEndPredecessor->phys; // Always propagate unmapped regions | ||
| 64 | // rather than calculating offset | ||
| 65 | else | ||
| 66 | return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; | ||
| 67 | } | ||
| 68 | }()}; | ||
| 69 | |||
| 70 | if (blockEndPredecessor->virt >= virt) { | ||
| 71 | // If this block's start would be overlapped by the map then reuse it as a tail | ||
| 72 | // block | ||
| 73 | blockEndPredecessor->virt = virtEnd; | ||
| 74 | blockEndPredecessor->phys = tailPhys; | ||
| 75 | blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo; | ||
| 76 | |||
| 77 | // No longer predecessor anymore | ||
| 78 | blockEndSuccessor = blockEndPredecessor--; | ||
| 79 | } else { | ||
| 80 | // Else insert a new one and we're done | ||
| 81 | blocks.insert(blockEndSuccessor, | ||
| 82 | {Block(virt, phys, extraInfo), | ||
| 83 | Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); | ||
| 84 | if (unmapCallback) | ||
| 85 | unmapCallback(virt, size); | ||
| 86 | |||
| 87 | return; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | } else { | ||
| 91 | // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped | ||
| 92 | // chunk | ||
| 93 | if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) { | ||
| 94 | // Move the unmapped block start backwards | ||
| 95 | blockEndPredecessor->virt = virtEnd; | ||
| 96 | |||
| 97 | // No longer predecessor anymore | ||
| 98 | blockEndSuccessor = blockEndPredecessor--; | ||
| 99 | } else { | ||
| 100 | // Else insert a new one and we're done | ||
| 101 | blocks.insert(blockEndSuccessor, | ||
| 102 | {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})}); | ||
| 103 | if (unmapCallback) | ||
| 104 | unmapCallback(virt, size); | ||
| 105 | |||
| 106 | return; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | auto blockStartSuccessor{blockEndSuccessor}; | ||
| 111 | |||
| 112 | // Walk the block vector to find the start successor as this is more efficient than another | ||
| 113 | // binary search in most scenarios | ||
| 114 | while (std::prev(blockStartSuccessor)->virt >= virt) | ||
| 115 | blockStartSuccessor--; | ||
| 116 | |||
| 117 | // Check that the start successor is either the end block or something in between | ||
| 118 | if (blockStartSuccessor->virt > virtEnd) { | ||
| 119 | UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); | ||
| 120 | } else if (blockStartSuccessor->virt == virtEnd) { | ||
| 121 | // We need to create a new block as there are none spare that we would overwrite | ||
| 122 | blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo)); | ||
| 123 | } else { | ||
| 124 | // Erase overwritten blocks | ||
| 125 | if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) | ||
| 126 | blocks.erase(eraseStart, blockEndSuccessor); | ||
| 127 | |||
| 128 | // Reuse a block that would otherwise be overwritten as a start block | ||
| 129 | blockStartSuccessor->virt = virt; | ||
| 130 | blockStartSuccessor->phys = phys; | ||
| 131 | blockStartSuccessor->extraInfo = extraInfo; | ||
| 132 | } | ||
| 133 | |||
| 134 | if (unmapCallback) | ||
| 135 | unmapCallback(virt, size); | ||
| 136 | } | ||
| 137 | |||
| 138 | MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) { | ||
| 139 | VaType virtEnd{virt + size}; | ||
| 140 | |||
| 141 | if (virtEnd > vaLimit) | ||
| 142 | UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}", | ||
| 143 | virtEnd, vaLimit); | ||
| 144 | |||
| 145 | auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)}; | ||
| 146 | if (blockEndSuccessor == blocks.begin()) | ||
| 147 | UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd); | ||
| 148 | |||
| 149 | auto blockEndPredecessor{std::prev(blockEndSuccessor)}; | ||
| 150 | |||
| 151 | auto walkBackToPredecessor{[&](auto iter) { | ||
| 152 | while (iter->virt >= virt) | ||
| 153 | iter--; | ||
| 154 | |||
| 155 | return iter; | ||
| 156 | }}; | ||
| 157 | |||
| 158 | auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) { | ||
| 159 | auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)}; | ||
| 160 | auto blockStartSuccessor{std::next(blockStartPredecessor)}; | ||
| 161 | |||
| 162 | auto eraseEnd{[&]() { | ||
| 163 | if (blockStartPredecessor->Unmapped()) { | ||
| 164 | // If the start predecessor is unmapped then we can erase everything in our region | ||
| 165 | // and be done | ||
| 166 | return std::next(unmappedEnd); | ||
| 167 | } else { | ||
| 168 | // Else reuse the end predecessor as the start of our unmapped region then erase all | ||
| 169 | // up to it | ||
| 170 | unmappedEnd->virt = virt; | ||
| 171 | return unmappedEnd; | ||
| 172 | } | ||
| 173 | }()}; | ||
| 174 | |||
| 175 | // We can't have two unmapped regions after each other | ||
| 176 | if (eraseEnd != blocks.end() && | ||
| 177 | (eraseEnd == blockStartSuccessor || | ||
| 178 | (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped()))) | ||
| 179 | UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!"); | ||
| 180 | |||
| 181 | blocks.erase(blockStartSuccessor, eraseEnd); | ||
| 182 | }}; | ||
| 183 | |||
| 184 | // We can avoid any splitting logic if these are the case | ||
| 185 | if (blockEndPredecessor->Unmapped()) { | ||
| 186 | if (blockEndPredecessor->virt > virt) | ||
| 187 | eraseBlocksWithEndUnmapped(blockEndPredecessor); | ||
| 188 | |||
| 189 | if (unmapCallback) | ||
| 190 | unmapCallback(virt, size); | ||
| 191 | |||
| 192 | return; // The region is unmapped, bail out early | ||
| 193 | } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) { | ||
| 194 | eraseBlocksWithEndUnmapped(blockEndSuccessor); | ||
| 195 | |||
| 196 | if (unmapCallback) | ||
| 197 | unmapCallback(virt, size); | ||
| 198 | |||
| 199 | return; // The region is unmapped here and doesn't need splitting, bail out early | ||
| 200 | } else if (blockEndSuccessor == blocks.end()) { | ||
| 201 | // This should never happen as the end should always follow an unmapped block | ||
| 202 | UNREACHABLE_MSG("Unexpected Memory Manager state!"); | ||
| 203 | } else if (blockEndSuccessor->virt != virtEnd) { | ||
| 204 | // If one block is directly in front then we don't have to add a tail | ||
| 205 | |||
| 206 | // The previous block is mapped so we will need to add a tail with an offset | ||
| 207 | PaType tailPhys{[&]() { | ||
| 208 | if constexpr (PaContigSplit) | ||
| 209 | return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt; | ||
| 210 | else | ||
| 211 | return blockEndPredecessor->phys; | ||
| 212 | }()}; | ||
| 213 | |||
| 214 | if (blockEndPredecessor->virt >= virt) { | ||
| 215 | // If this block's start would be overlapped by the unmap then reuse it as a tail block | ||
| 216 | blockEndPredecessor->virt = virtEnd; | ||
| 217 | blockEndPredecessor->phys = tailPhys; | ||
| 218 | |||
| 219 | // No longer predecessor anymore | ||
| 220 | blockEndSuccessor = blockEndPredecessor--; | ||
| 221 | } else { | ||
| 222 | blocks.insert(blockEndSuccessor, | ||
| 223 | {Block(virt, UnmappedPa, {}), | ||
| 224 | Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)}); | ||
| 225 | if (unmapCallback) | ||
| 226 | unmapCallback(virt, size); | ||
| 227 | |||
| 228 | return; // The previous block is mapped and ends before | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | // Walk the block vector to find the start predecessor as this is more efficient than another | ||
| 233 | // binary search in most scenarios | ||
| 234 | auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)}; | ||
| 235 | auto blockStartSuccessor{std::next(blockStartPredecessor)}; | ||
| 236 | |||
| 237 | if (blockStartSuccessor->virt > virtEnd) { | ||
| 238 | UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt); | ||
| 239 | } else if (blockStartSuccessor->virt == virtEnd) { | ||
| 240 | // There are no blocks between the start and the end that would let us skip inserting a new | ||
| 241 | // one for head | ||
| 242 | |||
| 243 | // The previous block is may be unmapped, if so we don't need to insert any unmaps after it | ||
| 244 | if (blockStartPredecessor->Mapped()) | ||
| 245 | blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {})); | ||
| 246 | } else if (blockStartPredecessor->Unmapped()) { | ||
| 247 | // If the previous block is unmapped | ||
| 248 | blocks.erase(blockStartSuccessor, blockEndPredecessor); | ||
| 249 | } else { | ||
| 250 | // Erase overwritten blocks, skipping the first one as we have written the unmapped start | ||
| 251 | // block there | ||
| 252 | if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor) | ||
| 253 | blocks.erase(eraseStart, blockEndSuccessor); | ||
| 254 | |||
| 255 | // Add in the unmapped block header | ||
| 256 | blockStartSuccessor->virt = virt; | ||
| 257 | blockStartSuccessor->phys = UnmappedPa; | ||
| 258 | } | ||
| 259 | |||
| 260 | if (unmapCallback) | ||
| 261 | unmapCallback(virt, size); | ||
| 262 | } | ||
| 263 | |||
| 264 | ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit) | ||
| 265 | : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {} | ||
| 266 | |||
| 267 | ALLOC_MEMBER(VaType)::Allocate(VaType size) { | ||
| 268 | std::scoped_lock lock(this->blockMutex); | ||
| 269 | |||
| 270 | VaType allocStart{UnmappedVa}; | ||
| 271 | VaType allocEnd{currentLinearAllocEnd + size}; | ||
| 272 | |||
| 273 | // Avoid searching backwards in the address space if possible | ||
| 274 | if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) { | ||
| 275 | auto allocEndSuccessor{ | ||
| 276 | std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)}; | ||
| 277 | if (allocEndSuccessor == this->blocks.begin()) | ||
| 278 | UNREACHABLE_MSG("First block in AS map is invalid!"); | ||
| 279 | |||
| 280 | auto allocEndPredecessor{std::prev(allocEndSuccessor)}; | ||
| 281 | if (allocEndPredecessor->virt <= currentLinearAllocEnd) { | ||
| 282 | allocStart = currentLinearAllocEnd; | ||
| 283 | } else { | ||
| 284 | // Skip over fixed any mappings in front of us | ||
| 285 | while (allocEndSuccessor != this->blocks.end()) { | ||
| 286 | if (allocEndSuccessor->virt - allocEndPredecessor->virt < size || | ||
| 287 | allocEndPredecessor->Mapped()) { | ||
| 288 | allocStart = allocEndPredecessor->virt; | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | |||
| 292 | allocEndPredecessor = allocEndSuccessor++; | ||
| 293 | |||
| 294 | // Use the VA limit to calculate if we can fit in the final block since it has no | ||
| 295 | // successor | ||
| 296 | if (allocEndSuccessor == this->blocks.end()) { | ||
| 297 | allocEnd = allocEndPredecessor->virt + size; | ||
| 298 | |||
| 299 | if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit) | ||
| 300 | allocStart = allocEndPredecessor->virt; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | if (allocStart != UnmappedVa) { | ||
| 307 | currentLinearAllocEnd = allocStart + size; | ||
| 308 | } else { // If linear allocation overflows the AS then find a gap | ||
| 309 | if (this->blocks.size() <= 2) | ||
| 310 | UNREACHABLE_MSG("Unexpected allocator state!"); | ||
| 311 | |||
| 312 | auto searchPredecessor{this->blocks.begin()}; | ||
| 313 | auto searchSuccessor{std::next(searchPredecessor)}; | ||
| 314 | |||
| 315 | while (searchSuccessor != this->blocks.end() && | ||
| 316 | (searchSuccessor->virt - searchPredecessor->virt < size || | ||
| 317 | searchPredecessor->Mapped())) { | ||
| 318 | searchPredecessor = searchSuccessor++; | ||
| 319 | } | ||
| 320 | |||
| 321 | if (searchSuccessor != this->blocks.end()) | ||
| 322 | allocStart = searchPredecessor->virt; | ||
| 323 | else | ||
| 324 | return {}; // AS is full | ||
| 325 | } | ||
| 326 | |||
| 327 | this->MapLocked(allocStart, true, size, {}); | ||
| 328 | return allocStart; | ||
| 329 | } | ||
| 330 | |||
| 331 | ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) { | ||
| 332 | this->Map(virt, true, size); | ||
| 333 | } | ||
| 334 | |||
| 335 | ALLOC_MEMBER(void)::Free(VaType virt, VaType size) { | ||
| 336 | this->Unmap(virt, size); | ||
| 337 | } | ||
| 338 | } // namespace Common | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 5c70c9a57..344ddfc90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 10 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 11 | #include "core/core.h" | 12 | #include "core/core.h" |
| @@ -21,8 +22,8 @@ | |||
| 21 | namespace Service::Nvidia::Devices { | 22 | namespace Service::Nvidia::Devices { |
| 22 | 23 | ||
| 23 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) | 24 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) |
| 24 | : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, | 25 | : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{}, |
| 25 | gmmu{std::make_shared<Tegra::MemoryManager>(system)} {} | 26 | gmmu{} {} |
| 26 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 27 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 27 | 28 | ||
| 28 | NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 29 | NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| @@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>& | |||
| 89 | IoctlAllocAsEx params{}; | 90 | IoctlAllocAsEx params{}; |
| 90 | std::memcpy(¶ms, input.data(), input.size()); | 91 | std::memcpy(¶ms, input.data(), input.size()); |
| 91 | 92 | ||
| 92 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); | 93 | LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); |
| 93 | if (params.big_page_size == 0) { | 94 | |
| 94 | params.big_page_size = DEFAULT_BIG_PAGE_SIZE; | 95 | std::scoped_lock lock(mutex); |
| 96 | |||
| 97 | if (vm.initialised) { | ||
| 98 | UNREACHABLE_MSG("Cannot initialise an address space twice!"); | ||
| 99 | return NvResult::InvalidState; | ||
| 95 | } | 100 | } |
| 96 | 101 | ||
| 97 | big_page_size = params.big_page_size; | 102 | if (params.big_page_size) { |
| 103 | if (!std::has_single_bit(params.big_page_size)) { | ||
| 104 | LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size); | ||
| 105 | return NvResult::BadValue; | ||
| 106 | } | ||
| 107 | |||
| 108 | if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) { | ||
| 109 | LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size); | ||
| 110 | return NvResult::BadValue; | ||
| 111 | } | ||
| 112 | |||
| 113 | vm.big_page_size = params.big_page_size; | ||
| 114 | vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size)); | ||
| 115 | |||
| 116 | vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT; | ||
| 117 | } | ||
| 118 | |||
| 119 | // If this is unspecified then default values should be used | ||
| 120 | if (params.va_range_start) { | ||
| 121 | vm.va_range_start = params.va_range_start; | ||
| 122 | vm.va_range_split = params.va_range_split; | ||
| 123 | vm.va_range_end = params.va_range_end; | ||
| 124 | } | ||
| 125 | |||
| 126 | const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS}; | ||
| 127 | const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS}; | ||
| 128 | vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages); | ||
| 129 | |||
| 130 | const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits}; | ||
| 131 | const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; | ||
| 132 | vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); | ||
| 133 | |||
| 134 | gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); | ||
| 135 | vm.initialised = true; | ||
| 98 | 136 | ||
| 99 | return NvResult::Success; | 137 | return NvResult::Success; |
| 100 | } | 138 | } |
| @@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector< | |||
| 106 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, | 144 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, |
| 107 | params.page_size, params.flags); | 145 | params.page_size, params.flags); |
| 108 | 146 | ||
| 109 | const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 147 | std::scoped_lock lock(mutex); |
| 110 | if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { | 148 | |
| 111 | params.offset = *(gmmu->AllocateFixed(params.offset, size)); | 149 | if (!vm.initialised) { |
| 150 | return NvResult::BadValue; | ||
| 151 | } | ||
| 152 | |||
| 153 | if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) { | ||
| 154 | return NvResult::BadValue; | ||
| 155 | } | ||
| 156 | |||
| 157 | if (params.page_size != vm.big_page_size && | ||
| 158 | ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) { | ||
| 159 | UNIMPLEMENTED_MSG("Sparse small pages are not implemented!"); | ||
| 160 | return NvResult::NotImplemented; | ||
| 161 | } | ||
| 162 | |||
| 163 | const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS | ||
| 164 | : vm.big_page_size_bits}; | ||
| 165 | |||
| 166 | auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator | ||
| 167 | : *vm.big_page_allocator}; | ||
| 168 | |||
| 169 | if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { | ||
| 170 | allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages); | ||
| 112 | } else { | 171 | } else { |
| 113 | params.offset = gmmu->Allocate(size, params.align); | 172 | params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits; |
| 173 | if (!params.offset) { | ||
| 174 | UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); | ||
| 175 | return NvResult::InsufficientMemory; | ||
| 176 | } | ||
| 114 | } | 177 | } |
| 115 | 178 | ||
| 116 | auto result = NvResult::Success; | 179 | u64 size{static_cast<u64>(params.pages) * params.page_size}; |
| 117 | if (!params.offset) { | 180 | |
| 118 | LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size); | 181 | if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) { |
| 119 | result = NvResult::InsufficientMemory; | 182 | gmmu->MapSparse(params.offset, size); |
| 120 | } | 183 | } |
| 121 | 184 | ||
| 185 | allocation_map[params.offset] = { | ||
| 186 | .size = size, | ||
| 187 | .page_size = params.page_size, | ||
| 188 | .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, | ||
| 189 | }; | ||
| 190 | |||
| 122 | std::memcpy(output.data(), ¶ms, output.size()); | 191 | std::memcpy(output.data(), ¶ms, output.size()); |
| 123 | return result; | 192 | return NvResult::Success; |
| 193 | } | ||
| 194 | |||
| 195 | void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | ||
| 196 | auto mapping{mapping_map.at(offset)}; | ||
| 197 | |||
| 198 | if (!mapping->fixed) { | ||
| 199 | auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||
| 200 | u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||
| 201 | |||
| 202 | allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits), | ||
| 203 | static_cast<u32>(mapping->size >> page_size_bits)); | ||
| 204 | } | ||
| 205 | |||
| 206 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | ||
| 207 | // Only FreeSpace can unmap them fully | ||
| 208 | if (mapping->sparse_alloc) | ||
| 209 | gmmu->MapSparse(offset, mapping->size); | ||
| 210 | else | ||
| 211 | gmmu->Unmap(offset, mapping->size); | ||
| 212 | |||
| 213 | mapping_map.erase(offset); | ||
| 124 | } | 214 | } |
| 125 | 215 | ||
| 126 | NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) { | 216 | NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) { |
| @@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& | |||
| 130 | LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, | 220 | LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, |
| 131 | params.pages, params.page_size); | 221 | params.pages, params.page_size); |
| 132 | 222 | ||
| 133 | gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size); | 223 | std::scoped_lock lock(mutex); |
| 224 | |||
| 225 | if (!vm.initialised) { | ||
| 226 | return NvResult::BadValue; | ||
| 227 | } | ||
| 228 | |||
| 229 | try { | ||
| 230 | auto allocation{allocation_map[params.offset]}; | ||
| 231 | |||
| 232 | if (allocation.page_size != params.page_size || | ||
| 233 | allocation.size != (static_cast<u64>(params.pages) * params.page_size)) { | ||
| 234 | return NvResult::BadValue; | ||
| 235 | } | ||
| 236 | |||
| 237 | for (const auto& mapping : allocation.mappings) { | ||
| 238 | FreeMappingLocked(mapping->offset); | ||
| 239 | } | ||
| 240 | |||
| 241 | // Unset sparse flag if required | ||
| 242 | if (allocation.sparse) { | ||
| 243 | gmmu->Unmap(params.offset, allocation.size); | ||
| 244 | } | ||
| 245 | |||
| 246 | auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator | ||
| 247 | : *vm.big_page_allocator}; | ||
| 248 | u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS | ||
| 249 | : vm.big_page_size_bits}; | ||
| 250 | |||
| 251 | allocator.Free(static_cast<u32>(params.offset >> page_size_bits), | ||
| 252 | static_cast<u32>(allocation.size >> page_size_bits)); | ||
| 253 | allocation_map.erase(params.offset); | ||
| 254 | } catch ([[maybe_unused]] const std::out_of_range& e) { | ||
| 255 | return NvResult::BadValue; | ||
| 256 | } | ||
| 134 | 257 | ||
| 135 | std::memcpy(output.data(), ¶ms, output.size()); | 258 | std::memcpy(output.data(), ¶ms, output.size()); |
| 136 | return NvResult::Success; | 259 | return NvResult::Success; |
| @@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||
| 141 | 264 | ||
| 142 | LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); | 265 | LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); |
| 143 | 266 | ||
| 144 | auto result = NvResult::Success; | ||
| 145 | std::vector<IoctlRemapEntry> entries(num_entries); | 267 | std::vector<IoctlRemapEntry> entries(num_entries); |
| 146 | std::memcpy(entries.data(), input.data(), input.size()); | 268 | std::memcpy(entries.data(), input.data(), input.size()); |
| 147 | 269 | ||
| 270 | std::scoped_lock lock(mutex); | ||
| 271 | |||
| 272 | if (!vm.initialised) { | ||
| 273 | return NvResult::BadValue; | ||
| 274 | } | ||
| 275 | |||
| 148 | for (const auto& entry : entries) { | 276 | for (const auto& entry : entries) { |
| 149 | LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", | 277 | GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages) |
| 150 | entry.offset, entry.nvmap_handle, entry.pages); | 278 | << vm.big_page_size_bits}; |
| 151 | 279 | u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits}; | |
| 152 | if (entry.nvmap_handle == 0) { | 280 | |
| 153 | // If nvmap handle is null, we should unmap instead. | 281 | auto alloc{allocation_map.upper_bound(virtual_address)}; |
| 154 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; | 282 | |
| 155 | const auto size{static_cast<u64>(entry.pages) << 0x10}; | 283 | if (alloc-- == allocation_map.begin() || |
| 156 | gmmu->Unmap(offset, size); | 284 | (virtual_address - alloc->first) + size > alloc->second.size) { |
| 157 | continue; | 285 | LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!"); |
| 286 | return NvResult::BadValue; | ||
| 158 | } | 287 | } |
| 159 | 288 | ||
| 160 | const auto object{nvmap.GetHandle(entry.nvmap_handle)}; | 289 | if (!alloc->second.sparse) { |
| 161 | if (!object) { | 290 | LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!"); |
| 162 | LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); | 291 | return NvResult::BadValue; |
| 163 | result = NvResult::InvalidState; | ||
| 164 | break; | ||
| 165 | } | 292 | } |
| 166 | 293 | ||
| 167 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; | 294 | if (!entry.handle) { |
| 168 | const auto size{static_cast<u64>(entry.pages) << 0x10}; | 295 | gmmu->MapSparse(virtual_address, size); |
| 169 | const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; | 296 | } else { |
| 170 | const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; | 297 | auto handle{nvmap.GetHandle(entry.handle)}; |
| 298 | if (!handle) { | ||
| 299 | return NvResult::BadValue; | ||
| 300 | } | ||
| 171 | 301 | ||
| 172 | if (!addr) { | 302 | VAddr cpu_address{static_cast<VAddr>( |
| 173 | LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); | 303 | handle->address + |
| 174 | result = NvResult::InvalidState; | 304 | (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |
| 175 | break; | 305 | |
| 306 | gmmu->Map(virtual_address, cpu_address, size); | ||
| 176 | } | 307 | } |
| 177 | } | 308 | } |
| 178 | 309 | ||
| 179 | std::memcpy(output.data(), entries.data(), output.size()); | 310 | std::memcpy(output.data(), entries.data(), output.size()); |
| 180 | return result; | 311 | return NvResult::Success; |
| 181 | } | 312 | } |
| 182 | 313 | ||
| 183 | NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { | 314 | NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { |
| @@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 187 | LOG_DEBUG(Service_NVDRV, | 318 | LOG_DEBUG(Service_NVDRV, |
| 188 | "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" | 319 | "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" |
| 189 | ", offset={}", | 320 | ", offset={}", |
| 190 | params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, | 321 | params.flags, params.handle, params.buffer_offset, params.mapping_size, |
| 191 | params.offset); | 322 | params.offset); |
| 192 | 323 | ||
| 193 | if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { | 324 | std::scoped_lock lock(mutex); |
| 194 | if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { | 325 | |
| 195 | const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; | 326 | if (!vm.initialised) { |
| 196 | const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; | 327 | return NvResult::BadValue; |
| 328 | } | ||
| 197 | 329 | ||
| 198 | if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { | 330 | // Remaps a subregion of an existing mapping to a different PA |
| 199 | LOG_CRITICAL(Service_NVDRV, | 331 | if ((params.flags & MappingFlags::Remap) != MappingFlags::None) { |
| 200 | "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " | 332 | try { |
| 201 | "mapping_size = {}, offset={}", | 333 | auto mapping{mapping_map.at(params.offset)}; |
| 202 | params.flags, params.nvmap_handle, params.buffer_offset, | ||
| 203 | params.mapping_size, params.offset); | ||
| 204 | 334 | ||
| 205 | std::memcpy(output.data(), ¶ms, output.size()); | 335 | if (mapping->size < params.mapping_size) { |
| 206 | return NvResult::InvalidState; | 336 | LOG_WARNING(Service_NVDRV, |
| 337 | "Cannot remap a partially mapped GPU address space region: 0x{:X}", | ||
| 338 | params.offset); | ||
| 339 | return NvResult::BadValue; | ||
| 207 | } | 340 | } |
| 208 | 341 | ||
| 209 | std::memcpy(output.data(), ¶ms, output.size()); | 342 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |
| 210 | return NvResult::Success; | 343 | VAddr cpu_address{mapping->ptr + params.buffer_offset}; |
| 211 | } else { | 344 | |
| 212 | LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset); | 345 | gmmu->Map(gpu_address, cpu_address, params.mapping_size); |
| 213 | 346 | ||
| 214 | std::memcpy(output.data(), ¶ms, output.size()); | 347 | return NvResult::Success; |
| 215 | return NvResult::InvalidState; | 348 | } catch ([[maybe_unused]] const std::out_of_range& e) { |
| 349 | LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}", | ||
| 350 | params.offset); | ||
| 351 | return NvResult::BadValue; | ||
| 216 | } | 352 | } |
| 217 | } | 353 | } |
| 218 | 354 | ||
| 219 | const auto object{nvmap.GetHandle(params.nvmap_handle)}; | 355 | auto handle{nvmap.GetHandle(params.handle)}; |
| 220 | if (!object) { | 356 | if (!handle) { |
| 221 | LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); | 357 | return NvResult::BadValue; |
| 222 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 223 | return NvResult::InvalidState; | ||
| 224 | } | 358 | } |
| 225 | 359 | ||
| 226 | // The real nvservices doesn't make a distinction between handles and ids, and | 360 | VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; |
| 227 | // object can only have one handle and it will be the same as its id. Assert that this is the | 361 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |
| 228 | // case to prevent unexpected behavior. | ||
| 229 | ASSERT(object->id == params.nvmap_handle); | ||
| 230 | 362 | ||
| 231 | u64 page_size{params.page_size}; | 363 | if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { |
| 232 | if (!page_size) { | 364 | auto alloc{allocation_map.upper_bound(params.offset)}; |
| 233 | page_size = object->align; | ||
| 234 | } | ||
| 235 | 365 | ||
| 236 | const auto physical_address{object->address + params.buffer_offset}; | 366 | if (alloc-- == allocation_map.begin() || |
| 237 | u64 size{params.mapping_size}; | 367 | (params.offset - alloc->first) + size > alloc->second.size) { |
| 238 | if (!size) { | 368 | UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); |
| 239 | size = object->size; | 369 | return NvResult::BadValue; |
| 240 | } | 370 | } |
| 241 | 371 | ||
| 242 | const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; | 372 | gmmu->Map(params.offset, cpu_address, size); |
| 243 | if (is_alloc) { | ||
| 244 | params.offset = gmmu->MapAllocate(physical_address, size, page_size); | ||
| 245 | } else { | ||
| 246 | params.offset = gmmu->Map(physical_address, params.offset, size); | ||
| 247 | } | ||
| 248 | 373 | ||
| 249 | auto result = NvResult::Success; | 374 | auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false, |
| 250 | if (!params.offset) { | 375 | alloc->second.sparse)}; |
| 251 | LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); | 376 | alloc->second.mappings.push_back(mapping); |
| 252 | result = NvResult::InvalidState; | 377 | mapping_map[params.offset] = mapping; |
| 253 | } else { | 378 | } else { |
| 254 | AddBufferMap(params.offset, size, physical_address, is_alloc); | 379 | bool big_page{[&]() { |
| 380 | if (Common::IsAligned(handle->align, vm.big_page_size)) | ||
| 381 | return true; | ||
| 382 | else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE)) | ||
| 383 | return false; | ||
| 384 | else { | ||
| 385 | UNREACHABLE(); | ||
| 386 | return false; | ||
| 387 | } | ||
| 388 | }()}; | ||
| 389 | |||
| 390 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||
| 391 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | ||
| 392 | u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||
| 393 | |||
| 394 | params.offset = static_cast<u64>(allocator.Allocate( | ||
| 395 | static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits))) | ||
| 396 | << page_size_bits; | ||
| 397 | if (!params.offset) { | ||
| 398 | UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!"); | ||
| 399 | return NvResult::InsufficientMemory; | ||
| 400 | } | ||
| 401 | |||
| 402 | gmmu->Map(params.offset, cpu_address, size); | ||
| 403 | |||
| 404 | auto mapping{ | ||
| 405 | std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | ||
| 406 | mapping_map[params.offset] = mapping; | ||
| 255 | } | 407 | } |
| 256 | 408 | ||
| 257 | std::memcpy(output.data(), ¶ms, output.size()); | 409 | std::memcpy(output.data(), ¶ms, output.size()); |
| 258 | return result; | 410 | return NvResult::Success; |
| 259 | } | 411 | } |
| 260 | 412 | ||
| 261 | NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { | 413 | NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { |
| @@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 | |||
| 264 | 416 | ||
| 265 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | 417 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); |
| 266 | 418 | ||
| 267 | if (const auto size{RemoveBufferMap(params.offset)}; size) { | 419 | std::scoped_lock lock(mutex); |
| 268 | gmmu->Unmap(params.offset, *size); | 420 | |
| 269 | } else { | 421 | if (!vm.initialised) { |
| 270 | LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); | 422 | return NvResult::BadValue; |
| 423 | } | ||
| 424 | |||
| 425 | try { | ||
| 426 | auto mapping{mapping_map.at(params.offset)}; | ||
| 427 | |||
| 428 | if (!mapping->fixed) { | ||
| 429 | auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||
| 430 | u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||
| 431 | |||
| 432 | allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits), | ||
| 433 | static_cast<u32>(mapping->size >> page_size_bits)); | ||
| 434 | } | ||
| 435 | |||
| 436 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | ||
| 437 | // Only FreeSpace can unmap them fully | ||
| 438 | if (mapping->sparse_alloc) { | ||
| 439 | gmmu->MapSparse(params.offset, mapping->size); | ||
| 440 | } else { | ||
| 441 | gmmu->Unmap(params.offset, mapping->size); | ||
| 442 | } | ||
| 443 | |||
| 444 | mapping_map.erase(params.offset); | ||
| 445 | } catch ([[maybe_unused]] const std::out_of_range& e) { | ||
| 446 | LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); | ||
| 271 | } | 447 | } |
| 272 | 448 | ||
| 273 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 274 | return NvResult::Success; | 449 | return NvResult::Success; |
| 275 | } | 450 | } |
| 276 | 451 | ||
| @@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8 | |||
| 284 | return NvResult::Success; | 459 | return NvResult::Success; |
| 285 | } | 460 | } |
| 286 | 461 | ||
| 462 | void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { | ||
| 463 | params.buf_size = 2 * sizeof(VaRegion); | ||
| 464 | |||
| 465 | params.regions = std::array<VaRegion, 2>{ | ||
| 466 | VaRegion{ | ||
| 467 | .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS, | ||
| 468 | .page_size = VM::YUZU_PAGESIZE, | ||
| 469 | .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart, | ||
| 470 | }, | ||
| 471 | VaRegion{ | ||
| 472 | .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits, | ||
| 473 | .page_size = vm.big_page_size, | ||
| 474 | .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart, | ||
| 475 | }, | ||
| 476 | }; | ||
| 477 | } | ||
| 478 | |||
| 287 | NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { | 479 | NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { |
| 288 | IoctlGetVaRegions params{}; | 480 | IoctlGetVaRegions params{}; |
| 289 | std::memcpy(¶ms, input.data(), input.size()); | 481 | std::memcpy(¶ms, input.data(), input.size()); |
| 290 | 482 | ||
| 291 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, | 483 | LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, |
| 292 | params.buf_size); | 484 | params.buf_size); |
| 293 | |||
| 294 | params.buf_size = 0x30; | ||
| 295 | 485 | ||
| 296 | params.small = IoctlVaRegion{ | 486 | std::scoped_lock lock(mutex); |
| 297 | .offset = 0x04000000, | ||
| 298 | .page_size = DEFAULT_SMALL_PAGE_SIZE, | ||
| 299 | .pages = 0x3fbfff, | ||
| 300 | }; | ||
| 301 | 487 | ||
| 302 | params.big = IoctlVaRegion{ | 488 | if (!vm.initialised) { |
| 303 | .offset = 0x04000000, | 489 | return NvResult::BadValue; |
| 304 | .page_size = big_page_size, | 490 | } |
| 305 | .pages = 0x1bffff, | ||
| 306 | }; | ||
| 307 | 491 | ||
| 308 | // TODO(ogniK): This probably can stay stubbed but should add support way way later | 492 | GetVARegionsImpl(params); |
| 309 | 493 | ||
| 310 | std::memcpy(output.data(), ¶ms, output.size()); | 494 | std::memcpy(output.data(), ¶ms, output.size()); |
| 311 | return NvResult::Success; | 495 | return NvResult::Success; |
| @@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u | |||
| 316 | IoctlGetVaRegions params{}; | 500 | IoctlGetVaRegions params{}; |
| 317 | std::memcpy(¶ms, input.data(), input.size()); | 501 | std::memcpy(¶ms, input.data(), input.size()); |
| 318 | 502 | ||
| 319 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, | 503 | LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, |
| 320 | params.buf_size); | 504 | params.buf_size); |
| 321 | 505 | ||
| 322 | params.buf_size = 0x30; | 506 | std::scoped_lock lock(mutex); |
| 323 | |||
| 324 | params.small = IoctlVaRegion{ | ||
| 325 | .offset = 0x04000000, | ||
| 326 | .page_size = 0x1000, | ||
| 327 | .pages = 0x3fbfff, | ||
| 328 | }; | ||
| 329 | 507 | ||
| 330 | params.big = IoctlVaRegion{ | 508 | if (!vm.initialised) { |
| 331 | .offset = 0x04000000, | 509 | return NvResult::BadValue; |
| 332 | .page_size = big_page_size, | 510 | } |
| 333 | .pages = 0x1bffff, | ||
| 334 | }; | ||
| 335 | 511 | ||
| 336 | // TODO(ogniK): This probably can stay stubbed but should add support way way later | 512 | GetVARegionsImpl(params); |
| 337 | 513 | ||
| 338 | std::memcpy(output.data(), ¶ms, output.size()); | 514 | std::memcpy(output.data(), ¶ms, output.size()); |
| 339 | std::memcpy(inline_output.data(), ¶ms.small, sizeof(IoctlVaRegion)); | 515 | std::memcpy(inline_output.data(), ¶ms.regions[0], sizeof(VaRegion)); |
| 340 | std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), ¶ms.big, sizeof(IoctlVaRegion)); | 516 | std::memcpy(inline_output.data() + sizeof(VaRegion), ¶ms.regions[1], sizeof(VaRegion)); |
| 341 | 517 | ||
| 342 | return NvResult::Success; | 518 | return NvResult::Success; |
| 343 | } | 519 | } |
| 344 | 520 | ||
| 345 | std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const { | ||
| 346 | const auto end{buffer_mappings.upper_bound(gpu_addr)}; | ||
| 347 | for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) { | ||
| 348 | if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) { | ||
| 349 | return iter->second; | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | return std::nullopt; | ||
| 354 | } | ||
| 355 | |||
| 356 | void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, | ||
| 357 | bool is_allocated) { | ||
| 358 | buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated}; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) { | ||
| 362 | if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) { | ||
| 363 | std::size_t size{}; | ||
| 364 | |||
| 365 | if (iter->second.IsAllocated()) { | ||
| 366 | size = iter->second.Size(); | ||
| 367 | } | ||
| 368 | |||
| 369 | buffer_mappings.erase(iter); | ||
| 370 | |||
| 371 | return size; | ||
| 372 | } | ||
| 373 | |||
| 374 | return std::nullopt; | ||
| 375 | } | ||
| 376 | |||
| 377 | Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { | 521 | Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { |
| 378 | LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); | 522 | LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); |
| 379 | return nullptr; | 523 | return nullptr; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index f5fb33ba7..1d27739e2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -5,14 +5,19 @@ | |||
| 5 | 5 | ||
| 6 | #pragma once | 6 | #pragma once |
| 7 | 7 | ||
| 8 | #include <bit> | ||
| 9 | #include <list> | ||
| 8 | #include <map> | 10 | #include <map> |
| 9 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 10 | #include <optional> | 13 | #include <optional> |
| 11 | #include <vector> | 14 | #include <vector> |
| 12 | 15 | ||
| 16 | #include "common/address_space.h" | ||
| 13 | #include "common/common_funcs.h" | 17 | #include "common/common_funcs.h" |
| 14 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 15 | #include "common/swap.h" | 19 | #include "common/swap.h" |
| 20 | #include "core/hle/service/nvdrv/core/nvmap.h" | ||
| 16 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 21 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 17 | 22 | ||
| 18 | namespace Tegra { | 23 | namespace Tegra { |
| @@ -30,17 +35,13 @@ class NvMap; | |||
| 30 | 35 | ||
| 31 | namespace Service::Nvidia::Devices { | 36 | namespace Service::Nvidia::Devices { |
| 32 | 37 | ||
| 33 | constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16; | 38 | enum class MappingFlags : u32 { |
| 34 | constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12; | 39 | None = 0, |
| 35 | 40 | Fixed = 1 << 0, | |
| 36 | class nvmap; | 41 | Sparse = 1 << 1, |
| 37 | 42 | Remap = 1 << 8, | |
| 38 | enum class AddressSpaceFlags : u32 { | ||
| 39 | None = 0x0, | ||
| 40 | FixedOffset = 0x1, | ||
| 41 | Remap = 0x100, | ||
| 42 | }; | 43 | }; |
| 43 | DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); | 44 | DECLARE_ENUM_FLAG_OPERATORS(MappingFlags); |
| 44 | 45 | ||
| 45 | class nvhost_as_gpu final : public nvdevice { | 46 | class nvhost_as_gpu final : public nvdevice { |
| 46 | public: | 47 | public: |
| @@ -59,46 +60,15 @@ public: | |||
| 59 | 60 | ||
| 60 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 61 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
| 61 | 62 | ||
| 62 | private: | 63 | struct VaRegion { |
| 63 | class BufferMap final { | 64 | u64 offset; |
| 64 | public: | 65 | u32 page_size; |
| 65 | constexpr BufferMap() = default; | 66 | u32 _pad0_; |
| 66 | 67 | u64 pages; | |
| 67 | constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_) | ||
| 68 | : start_addr{start_addr_}, end_addr{start_addr_ + size_} {} | ||
| 69 | |||
| 70 | constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_, | ||
| 71 | bool is_allocated_) | ||
| 72 | : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_}, | ||
| 73 | is_allocated{is_allocated_} {} | ||
| 74 | |||
| 75 | constexpr VAddr StartAddr() const { | ||
| 76 | return start_addr; | ||
| 77 | } | ||
| 78 | |||
| 79 | constexpr VAddr EndAddr() const { | ||
| 80 | return end_addr; | ||
| 81 | } | ||
| 82 | |||
| 83 | constexpr std::size_t Size() const { | ||
| 84 | return end_addr - start_addr; | ||
| 85 | } | ||
| 86 | |||
| 87 | constexpr VAddr CpuAddr() const { | ||
| 88 | return cpu_addr; | ||
| 89 | } | ||
| 90 | |||
| 91 | constexpr bool IsAllocated() const { | ||
| 92 | return is_allocated; | ||
| 93 | } | ||
| 94 | |||
| 95 | private: | ||
| 96 | GPUVAddr start_addr{}; | ||
| 97 | GPUVAddr end_addr{}; | ||
| 98 | VAddr cpu_addr{}; | ||
| 99 | bool is_allocated{}; | ||
| 100 | }; | 68 | }; |
| 69 | static_assert(sizeof(VaRegion) == 0x18); | ||
| 101 | 70 | ||
| 71 | private: | ||
| 102 | struct IoctlAllocAsEx { | 72 | struct IoctlAllocAsEx { |
| 103 | u32_le flags{}; // usually passes 1 | 73 | u32_le flags{}; // usually passes 1 |
| 104 | s32_le as_fd{}; // ignored; passes 0 | 74 | s32_le as_fd{}; // ignored; passes 0 |
| @@ -113,7 +83,7 @@ private: | |||
| 113 | struct IoctlAllocSpace { | 83 | struct IoctlAllocSpace { |
| 114 | u32_le pages{}; | 84 | u32_le pages{}; |
| 115 | u32_le page_size{}; | 85 | u32_le page_size{}; |
| 116 | AddressSpaceFlags flags{}; | 86 | MappingFlags flags{}; |
| 117 | INSERT_PADDING_WORDS(1); | 87 | INSERT_PADDING_WORDS(1); |
| 118 | union { | 88 | union { |
| 119 | u64_le offset; | 89 | u64_le offset; |
| @@ -130,19 +100,19 @@ private: | |||
| 130 | static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); | 100 | static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); |
| 131 | 101 | ||
| 132 | struct IoctlRemapEntry { | 102 | struct IoctlRemapEntry { |
| 133 | u16_le flags{}; | 103 | u16 flags; |
| 134 | u16_le kind{}; | 104 | u16 kind; |
| 135 | u32_le nvmap_handle{}; | 105 | NvCore::NvMap::Handle::Id handle; |
| 136 | u32_le map_offset{}; | 106 | u32 handle_offset_big_pages; |
| 137 | u32_le offset{}; | 107 | u32 as_offset_big_pages; |
| 138 | u32_le pages{}; | 108 | u32 big_pages; |
| 139 | }; | 109 | }; |
| 140 | static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); | 110 | static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); |
| 141 | 111 | ||
| 142 | struct IoctlMapBufferEx { | 112 | struct IoctlMapBufferEx { |
| 143 | AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable | 113 | MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable |
| 144 | u32_le kind{}; // -1 is default | 114 | u32_le kind{}; // -1 is default |
| 145 | u32_le nvmap_handle{}; | 115 | NvCore::NvMap::Handle::Id handle; |
| 146 | u32_le page_size{}; // 0 means don't care | 116 | u32_le page_size{}; // 0 means don't care |
| 147 | s64_le buffer_offset{}; | 117 | s64_le buffer_offset{}; |
| 148 | u64_le mapping_size{}; | 118 | u64_le mapping_size{}; |
| @@ -160,27 +130,15 @@ private: | |||
| 160 | }; | 130 | }; |
| 161 | static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); | 131 | static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); |
| 162 | 132 | ||
| 163 | struct IoctlVaRegion { | ||
| 164 | u64_le offset{}; | ||
| 165 | u32_le page_size{}; | ||
| 166 | INSERT_PADDING_WORDS(1); | ||
| 167 | u64_le pages{}; | ||
| 168 | }; | ||
| 169 | static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size"); | ||
| 170 | |||
| 171 | struct IoctlGetVaRegions { | 133 | struct IoctlGetVaRegions { |
| 172 | u64_le buf_addr{}; // (contained output user ptr on linux, ignored) | 134 | u64_le buf_addr{}; // (contained output user ptr on linux, ignored) |
| 173 | u32_le buf_size{}; // forced to 2*sizeof(struct va_region) | 135 | u32_le buf_size{}; // forced to 2*sizeof(struct va_region) |
| 174 | u32_le reserved{}; | 136 | u32_le reserved{}; |
| 175 | IoctlVaRegion small{}; | 137 | std::array<VaRegion, 2> regions{}; |
| 176 | IoctlVaRegion big{}; | ||
| 177 | }; | 138 | }; |
| 178 | static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, | 139 | static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, |
| 179 | "IoctlGetVaRegions is incorrect size"); | 140 | "IoctlGetVaRegions is incorrect size"); |
| 180 | 141 | ||
| 181 | s32 channel{}; | ||
| 182 | u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; | ||
| 183 | |||
| 184 | NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output); | 142 | NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output); |
| 185 | NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output); | 143 | NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output); |
| 186 | NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output); | 144 | NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output); |
| @@ -189,23 +147,74 @@ private: | |||
| 189 | NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output); | 147 | NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output); |
| 190 | NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output); | 148 | NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output); |
| 191 | 149 | ||
| 150 | void GetVARegionsImpl(IoctlGetVaRegions& params); | ||
| 192 | NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); | 151 | NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); |
| 193 | NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output, | 152 | NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output, |
| 194 | std::vector<u8>& inline_output); | 153 | std::vector<u8>& inline_output); |
| 195 | 154 | ||
| 196 | std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; | 155 | void FreeMappingLocked(u64 offset); |
| 197 | void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); | ||
| 198 | std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); | ||
| 199 | 156 | ||
| 200 | Module& module; | 157 | Module& module; |
| 201 | 158 | ||
| 202 | NvCore::Container& container; | 159 | NvCore::Container& container; |
| 203 | NvCore::NvMap& nvmap; | 160 | NvCore::NvMap& nvmap; |
| 204 | 161 | ||
| 162 | struct Mapping { | ||
| 163 | VAddr ptr; | ||
| 164 | u64 offset; | ||
| 165 | u64 size; | ||
| 166 | bool fixed; | ||
| 167 | bool big_page; // Only valid if fixed == false | ||
| 168 | bool sparse_alloc; | ||
| 169 | |||
| 170 | Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) | ||
| 171 | : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), | ||
| 172 | sparse_alloc(sparse_alloc_) {} | ||
| 173 | }; | ||
| 174 | |||
| 175 | struct Allocation { | ||
| 176 | u64 size; | ||
| 177 | std::list<std::shared_ptr<Mapping>> mappings; | ||
| 178 | u32 page_size; | ||
| 179 | bool sparse; | ||
| 180 | }; | ||
| 181 | |||
| 182 | std::map<u64, std::shared_ptr<Mapping>> | ||
| 183 | mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and | ||
| 184 | //!< mapping type, this is needed as what was originally a single buffer may | ||
| 185 | //!< have been split into multiple GPU side buffers with the remap flag. | ||
| 186 | std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from | ||
| 187 | //!< which fixed buffers can be mapped into | ||
| 188 | std::mutex mutex; //!< Locks all AS operations | ||
| 189 | |||
| 190 | struct VM { | ||
| 191 | static constexpr u32 YUZU_PAGESIZE{0x1000}; | ||
| 192 | static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)}; | ||
| 193 | |||
| 194 | static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000}; | ||
| 195 | static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000}; | ||
| 196 | u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; | ||
| 197 | u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)}; | ||
| 198 | |||
| 199 | static constexpr u32 VA_START_SHIFT{10}; | ||
| 200 | static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34}; | ||
| 201 | static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37}; | ||
| 202 | u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT}; | ||
| 203 | u64 va_range_split{DEFAULT_VA_SPLIT}; | ||
| 204 | u64 va_range_end{DEFAULT_VA_RANGE}; | ||
| 205 | |||
| 206 | using Allocator = Common::FlatAllocator<u32, 0, 32>; | ||
| 207 | |||
| 208 | std::unique_ptr<Allocator> big_page_allocator; | ||
| 209 | std::shared_ptr<Allocator> | ||
| 210 | small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel | ||
| 211 | |||
| 212 | bool initialised{}; | ||
| 213 | } vm; | ||
| 205 | std::shared_ptr<Tegra::MemoryManager> gmmu; | 214 | std::shared_ptr<Tegra::MemoryManager> gmmu; |
| 206 | 215 | ||
| 207 | // This is expected to be ordered, therefore we must use a map, not unordered_map | 216 | // s32 channel{}; |
| 208 | std::map<GPUVAddr, BufferMap> buffer_mappings; | 217 | // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; |
| 209 | }; | 218 | }; |
| 210 | 219 | ||
| 211 | } // namespace Service::Nvidia::Devices | 220 | } // namespace Service::Nvidia::Devices |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 9e946d448..fc68bcc73 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
| 71 | rasterizer = rasterizer_; | 71 | rasterizer = rasterizer_; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { | 74 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { |
| 75 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); | 75 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { | ||
| 79 | return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | ||
| 80 | } | ||
| 81 | |||
| 78 | GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { | 82 | GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { |
| 79 | return Map(cpu_addr, *FindFreeRange(size, align), size); | 83 | return Map(*FindFreeRange(size, align), cpu_addr, size); |
| 80 | } | 84 | } |
| 81 | 85 | ||
| 82 | GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { | 86 | GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { |
| 83 | const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); | 87 | const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); |
| 84 | ASSERT(gpu_addr); | 88 | ASSERT(gpu_addr); |
| 85 | return Map(cpu_addr, *gpu_addr, size); | 89 | return Map(*gpu_addr, cpu_addr, size); |
| 86 | } | 90 | } |
| 87 | 91 | ||
| 88 | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | 92 | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0a763fd19..b8878476a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -88,7 +88,8 @@ public: | |||
| 88 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | 88 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, |
| 89 | std::size_t size) const; | 89 | std::size_t size) const; |
| 90 | 90 | ||
| 91 | [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); | 91 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); |
| 92 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); | ||
| 92 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); | 93 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); |
| 93 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); | 94 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); |
| 94 | [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); | 95 | [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); |