summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/address_space.cpp11
-rw-r--r--src/common/address_space.h134
-rw-r--r--src/common/address_space.inc338
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp460
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h163
-rw-r--r--src/video_core/memory_manager.cpp10
-rw-r--r--src/video_core/memory_manager.h3
8 files changed, 882 insertions, 239 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2db414819..a02696873 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -17,6 +17,8 @@ endif ()
17include(GenerateSCMRev) 17include(GenerateSCMRev)
18 18
19add_library(common STATIC 19add_library(common STATIC
20 address_space.cpp
21 address_space.h
20 algorithm.h 22 algorithm.h
21 alignment.h 23 alignment.h
22 announce_multiplayer_room.h 24 announce_multiplayer_room.h
diff --git a/src/common/address_space.cpp b/src/common/address_space.cpp
new file mode 100644
index 000000000..6db85be87
--- /dev/null
+++ b/src/common/address_space.cpp
@@ -0,0 +1,11 @@
1// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#include "common/address_space.inc"
6
7namespace Common {
8
9template class Common::FlatAllocator<u32, 0, 32>;
10
11}
diff --git a/src/common/address_space.h b/src/common/address_space.h
new file mode 100644
index 000000000..fd2f32b7d
--- /dev/null
+++ b/src/common/address_space.h
@@ -0,0 +1,134 @@
1// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <concepts>
8#include <functional>
9#include <mutex>
10#include <vector>
11
12#include "common/common_types.h"
13
14namespace Common {
15template <typename VaType, size_t AddressSpaceBits>
16concept AddressSpaceValid = std::is_unsigned_v<VaType> && sizeof(VaType) * 8 >= AddressSpaceBits;
17
18struct EmptyStruct {};
19
20/**
21 * @brief FlatAddressSpaceMap provides a generic VA->PA mapping implementation using a sorted vector
22 */
23template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa,
24 bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo = EmptyStruct>
25requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAddressSpaceMap {
26private:
27 std::function<void(VaType, VaType)>
28 unmapCallback{}; //!< Callback called when the mappings in an region have changed
29
30protected:
31 /**
32 * @brief Represents a block of memory in the AS, the physical mapping is contiguous until
33 * another block with a different phys address is hit
34 */
35 struct Block {
36 VaType virt{UnmappedVa}; //!< VA of the block
37 PaType phys{UnmappedPa}; //!< PA of the block, will increase 1-1 with VA until a new block
38 //!< is encountered
39 [[no_unique_address]] ExtraBlockInfo extraInfo;
40
41 Block() = default;
42
43 Block(VaType virt, PaType phys, ExtraBlockInfo extraInfo)
44 : virt(virt), phys(phys), extraInfo(extraInfo) {}
45
46 constexpr bool Valid() {
47 return virt != UnmappedVa;
48 }
49
50 constexpr bool Mapped() {
51 return phys != UnmappedPa;
52 }
53
54 constexpr bool Unmapped() {
55 return phys == UnmappedPa;
56 }
57
58 bool operator<(const VaType& pVirt) const {
59 return virt < pVirt;
60 }
61 };
62
63 std::mutex blockMutex;
64 std::vector<Block> blocks{Block{}};
65
66 /**
67 * @brief Maps a PA range into the given AS region
68 * @note blockMutex MUST be locked when calling this
69 */
70 void MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo);
71
72 /**
73 * @brief Unmaps the given range and merges it with other unmapped regions
74 * @note blockMutex MUST be locked when calling this
75 */
76 void UnmapLocked(VaType virt, VaType size);
77
78public:
79 static constexpr VaType VaMaximum{(1ULL << (AddressSpaceBits - 1)) +
80 ((1ULL << (AddressSpaceBits - 1)) -
81 1)}; //!< The maximum VA that this AS can technically reach
82
83 VaType vaLimit{VaMaximum}; //!< A soft limit on the maximum VA of the AS
84
85 FlatAddressSpaceMap(VaType vaLimit, std::function<void(VaType, VaType)> unmapCallback = {});
86
87 FlatAddressSpaceMap() = default;
88
89 void Map(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo = {}) {
90 std::scoped_lock lock(blockMutex);
91 MapLocked(virt, phys, size, extraInfo);
92 }
93
94 void Unmap(VaType virt, VaType size) {
95 std::scoped_lock lock(blockMutex);
96 UnmapLocked(virt, size);
97 }
98};
99
100/**
101 * @brief FlatMemoryManager specialises FlatAddressSpaceMap to work as an allocator, with an
102 * initial, fast linear pass and a subsequent slower pass that iterates until it finds a free block
103 */
104template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits>
105requires AddressSpaceValid<VaType, AddressSpaceBits> class FlatAllocator
106 : public FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits> {
107private:
108 using Base = FlatAddressSpaceMap<VaType, UnmappedVa, bool, false, false, AddressSpaceBits>;
109
110 VaType currentLinearAllocEnd; //!< The end address for the initial linear allocation pass, once
111 //!< this reaches the AS limit the slower allocation path will be
112 //!< used
113
114public:
115 VaType vaStart; //!< The base VA of the allocator, no allocations will be below this
116
117 FlatAllocator(VaType vaStart, VaType vaLimit = Base::VaMaximum);
118
119 /**
120 * @brief Allocates a region in the AS of the given size and returns its address
121 */
122 VaType Allocate(VaType size);
123
124 /**
125 * @brief Marks the given region in the AS as allocated
126 */
127 void AllocateFixed(VaType virt, VaType size);
128
129 /**
130 * @brief Frees an AS region so it can be used again
131 */
132 void Free(VaType virt, VaType size);
133};
134} // namespace Common
diff --git a/src/common/address_space.inc b/src/common/address_space.inc
new file mode 100644
index 000000000..907c55d88
--- /dev/null
+++ b/src/common/address_space.inc
@@ -0,0 +1,338 @@
1// SPDX-License-Identifier: GPLv3 or later
2// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
3
4#include "common/address_space.h"
5#include "common/assert.h"
6
7#define MAP_MEMBER(returnType) \
8 template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
9 bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
10 requires AddressSpaceValid<VaType, AddressSpaceBits> returnType FlatAddressSpaceMap< \
11 VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
12#define MAP_MEMBER_CONST() \
13 template <typename VaType, VaType UnmappedVa, typename PaType, PaType UnmappedPa, \
14 bool PaContigSplit, size_t AddressSpaceBits, typename ExtraBlockInfo> \
15 requires AddressSpaceValid<VaType, AddressSpaceBits> FlatAddressSpaceMap< \
16 VaType, UnmappedVa, PaType, UnmappedPa, PaContigSplit, AddressSpaceBits, ExtraBlockInfo>
17
18#define MM_MEMBER(returnType) \
19 template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
20 requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
21 FlatMemoryManager<VaType, UnmappedVa, AddressSpaceBits>
22
23#define ALLOC_MEMBER(returnType) \
24 template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
25 requires AddressSpaceValid<VaType, AddressSpaceBits> returnType \
26 FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
27#define ALLOC_MEMBER_CONST() \
28 template <typename VaType, VaType UnmappedVa, size_t AddressSpaceBits> \
29 requires AddressSpaceValid<VaType, AddressSpaceBits> \
30 FlatAllocator<VaType, UnmappedVa, AddressSpaceBits>
31
32namespace Common {
33MAP_MEMBER_CONST()::FlatAddressSpaceMap(VaType vaLimit,
34 std::function<void(VaType, VaType)> unmapCallback)
35 : unmapCallback(std::move(unmapCallback)), vaLimit(vaLimit) {
36 if (vaLimit > VaMaximum)
37 UNREACHABLE_MSG("Invalid VA limit!");
38}
39
40MAP_MEMBER(void)::MapLocked(VaType virt, PaType phys, VaType size, ExtraBlockInfo extraInfo) {
41 VaType virtEnd{virt + size};
42
43 if (virtEnd > vaLimit)
44 UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
45 virtEnd, vaLimit);
46
47 auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
48 if (blockEndSuccessor == blocks.begin())
49 UNREACHABLE_MSG("Trying to map a block before the VA start: virtEnd: 0x{:X}", virtEnd);
50
51 auto blockEndPredecessor{std::prev(blockEndSuccessor)};
52
53 if (blockEndSuccessor != blocks.end()) {
54 // We have blocks in front of us, if one is directly in front then we don't have to add a
55 // tail
56 if (blockEndSuccessor->virt != virtEnd) {
57 PaType tailPhys{[&]() -> PaType {
58 if constexpr (!PaContigSplit) {
59 return blockEndPredecessor
60 ->phys; // Always propagate unmapped regions rather than calculating offset
61 } else {
62 if (blockEndPredecessor->Unmapped())
63 return blockEndPredecessor->phys; // Always propagate unmapped regions
64 // rather than calculating offset
65 else
66 return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
67 }
68 }()};
69
70 if (blockEndPredecessor->virt >= virt) {
71 // If this block's start would be overlapped by the map then reuse it as a tail
72 // block
73 blockEndPredecessor->virt = virtEnd;
74 blockEndPredecessor->phys = tailPhys;
75 blockEndPredecessor->extraInfo = blockEndPredecessor->extraInfo;
76
77 // No longer predecessor anymore
78 blockEndSuccessor = blockEndPredecessor--;
79 } else {
80 // Else insert a new one and we're done
81 blocks.insert(blockEndSuccessor,
82 {Block(virt, phys, extraInfo),
83 Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
84 if (unmapCallback)
85 unmapCallback(virt, size);
86
87 return;
88 }
89 }
90 } else {
91 // blockEndPredecessor will always be unmapped as blocks has to be terminated by an unmapped
92 // chunk
93 if (blockEndPredecessor != blocks.begin() && blockEndPredecessor->virt >= virt) {
94 // Move the unmapped block start backwards
95 blockEndPredecessor->virt = virtEnd;
96
97 // No longer predecessor anymore
98 blockEndSuccessor = blockEndPredecessor--;
99 } else {
100 // Else insert a new one and we're done
101 blocks.insert(blockEndSuccessor,
102 {Block(virt, phys, extraInfo), Block(virtEnd, UnmappedPa, {})});
103 if (unmapCallback)
104 unmapCallback(virt, size);
105
106 return;
107 }
108 }
109
110 auto blockStartSuccessor{blockEndSuccessor};
111
112 // Walk the block vector to find the start successor as this is more efficient than another
113 // binary search in most scenarios
114 while (std::prev(blockStartSuccessor)->virt >= virt)
115 blockStartSuccessor--;
116
117 // Check that the start successor is either the end block or something in between
118 if (blockStartSuccessor->virt > virtEnd) {
119 UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
120 } else if (blockStartSuccessor->virt == virtEnd) {
121 // We need to create a new block as there are none spare that we would overwrite
122 blocks.insert(blockStartSuccessor, Block(virt, phys, extraInfo));
123 } else {
124 // Erase overwritten blocks
125 if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
126 blocks.erase(eraseStart, blockEndSuccessor);
127
128 // Reuse a block that would otherwise be overwritten as a start block
129 blockStartSuccessor->virt = virt;
130 blockStartSuccessor->phys = phys;
131 blockStartSuccessor->extraInfo = extraInfo;
132 }
133
134 if (unmapCallback)
135 unmapCallback(virt, size);
136}
137
138MAP_MEMBER(void)::UnmapLocked(VaType virt, VaType size) {
139 VaType virtEnd{virt + size};
140
141 if (virtEnd > vaLimit)
142 UNREACHABLE_MSG("Trying to map a block past the VA limit: virtEnd: 0x{:X}, vaLimit: 0x{:X}",
143 virtEnd, vaLimit);
144
145 auto blockEndSuccessor{std::lower_bound(blocks.begin(), blocks.end(), virtEnd)};
146 if (blockEndSuccessor == blocks.begin())
147 UNREACHABLE_MSG("Trying to unmap a block before the VA start: virtEnd: 0x{:X}", virtEnd);
148
149 auto blockEndPredecessor{std::prev(blockEndSuccessor)};
150
151 auto walkBackToPredecessor{[&](auto iter) {
152 while (iter->virt >= virt)
153 iter--;
154
155 return iter;
156 }};
157
158 auto eraseBlocksWithEndUnmapped{[&](auto unmappedEnd) {
159 auto blockStartPredecessor{walkBackToPredecessor(unmappedEnd)};
160 auto blockStartSuccessor{std::next(blockStartPredecessor)};
161
162 auto eraseEnd{[&]() {
163 if (blockStartPredecessor->Unmapped()) {
164 // If the start predecessor is unmapped then we can erase everything in our region
165 // and be done
166 return std::next(unmappedEnd);
167 } else {
168 // Else reuse the end predecessor as the start of our unmapped region then erase all
169 // up to it
170 unmappedEnd->virt = virt;
171 return unmappedEnd;
172 }
173 }()};
174
175 // We can't have two unmapped regions after each other
176 if (eraseEnd != blocks.end() &&
177 (eraseEnd == blockStartSuccessor ||
178 (blockStartPredecessor->Unmapped() && eraseEnd->Unmapped())))
179 UNREACHABLE_MSG("Multiple contiguous unmapped regions are unsupported!");
180
181 blocks.erase(blockStartSuccessor, eraseEnd);
182 }};
183
184 // We can avoid any splitting logic if these are the case
185 if (blockEndPredecessor->Unmapped()) {
186 if (blockEndPredecessor->virt > virt)
187 eraseBlocksWithEndUnmapped(blockEndPredecessor);
188
189 if (unmapCallback)
190 unmapCallback(virt, size);
191
192 return; // The region is unmapped, bail out early
193 } else if (blockEndSuccessor->virt == virtEnd && blockEndSuccessor->Unmapped()) {
194 eraseBlocksWithEndUnmapped(blockEndSuccessor);
195
196 if (unmapCallback)
197 unmapCallback(virt, size);
198
199 return; // The region is unmapped here and doesn't need splitting, bail out early
200 } else if (blockEndSuccessor == blocks.end()) {
201 // This should never happen as the end should always follow an unmapped block
202 UNREACHABLE_MSG("Unexpected Memory Manager state!");
203 } else if (blockEndSuccessor->virt != virtEnd) {
204 // If one block is directly in front then we don't have to add a tail
205
206 // The previous block is mapped so we will need to add a tail with an offset
207 PaType tailPhys{[&]() {
208 if constexpr (PaContigSplit)
209 return blockEndPredecessor->phys + virtEnd - blockEndPredecessor->virt;
210 else
211 return blockEndPredecessor->phys;
212 }()};
213
214 if (blockEndPredecessor->virt >= virt) {
215 // If this block's start would be overlapped by the unmap then reuse it as a tail block
216 blockEndPredecessor->virt = virtEnd;
217 blockEndPredecessor->phys = tailPhys;
218
219 // No longer predecessor anymore
220 blockEndSuccessor = blockEndPredecessor--;
221 } else {
222 blocks.insert(blockEndSuccessor,
223 {Block(virt, UnmappedPa, {}),
224 Block(virtEnd, tailPhys, blockEndPredecessor->extraInfo)});
225 if (unmapCallback)
226 unmapCallback(virt, size);
227
228 return; // The previous block is mapped and ends before
229 }
230 }
231
232 // Walk the block vector to find the start predecessor as this is more efficient than another
233 // binary search in most scenarios
234 auto blockStartPredecessor{walkBackToPredecessor(blockEndSuccessor)};
235 auto blockStartSuccessor{std::next(blockStartPredecessor)};
236
237 if (blockStartSuccessor->virt > virtEnd) {
238 UNREACHABLE_MSG("Unsorted block in AS map: virt: 0x{:X}", blockStartSuccessor->virt);
239 } else if (blockStartSuccessor->virt == virtEnd) {
240 // There are no blocks between the start and the end that would let us skip inserting a new
241 // one for head
242
243 // The previous block is may be unmapped, if so we don't need to insert any unmaps after it
244 if (blockStartPredecessor->Mapped())
245 blocks.insert(blockStartSuccessor, Block(virt, UnmappedPa, {}));
246 } else if (blockStartPredecessor->Unmapped()) {
247 // If the previous block is unmapped
248 blocks.erase(blockStartSuccessor, blockEndPredecessor);
249 } else {
250 // Erase overwritten blocks, skipping the first one as we have written the unmapped start
251 // block there
252 if (auto eraseStart{std::next(blockStartSuccessor)}; eraseStart != blockEndSuccessor)
253 blocks.erase(eraseStart, blockEndSuccessor);
254
255 // Add in the unmapped block header
256 blockStartSuccessor->virt = virt;
257 blockStartSuccessor->phys = UnmappedPa;
258 }
259
260 if (unmapCallback)
261 unmapCallback(virt, size);
262}
263
264ALLOC_MEMBER_CONST()::FlatAllocator(VaType vaStart, VaType vaLimit)
265 : Base(vaLimit), currentLinearAllocEnd(vaStart), vaStart(vaStart) {}
266
267ALLOC_MEMBER(VaType)::Allocate(VaType size) {
268 std::scoped_lock lock(this->blockMutex);
269
270 VaType allocStart{UnmappedVa};
271 VaType allocEnd{currentLinearAllocEnd + size};
272
273 // Avoid searching backwards in the address space if possible
274 if (allocEnd >= currentLinearAllocEnd && allocEnd <= this->vaLimit) {
275 auto allocEndSuccessor{
276 std::lower_bound(this->blocks.begin(), this->blocks.end(), allocEnd)};
277 if (allocEndSuccessor == this->blocks.begin())
278 UNREACHABLE_MSG("First block in AS map is invalid!");
279
280 auto allocEndPredecessor{std::prev(allocEndSuccessor)};
281 if (allocEndPredecessor->virt <= currentLinearAllocEnd) {
282 allocStart = currentLinearAllocEnd;
283 } else {
284 // Skip over fixed any mappings in front of us
285 while (allocEndSuccessor != this->blocks.end()) {
286 if (allocEndSuccessor->virt - allocEndPredecessor->virt < size ||
287 allocEndPredecessor->Mapped()) {
288 allocStart = allocEndPredecessor->virt;
289 break;
290 }
291
292 allocEndPredecessor = allocEndSuccessor++;
293
294 // Use the VA limit to calculate if we can fit in the final block since it has no
295 // successor
296 if (allocEndSuccessor == this->blocks.end()) {
297 allocEnd = allocEndPredecessor->virt + size;
298
299 if (allocEnd >= allocEndPredecessor->virt && allocEnd <= this->vaLimit)
300 allocStart = allocEndPredecessor->virt;
301 }
302 }
303 }
304 }
305
306 if (allocStart != UnmappedVa) {
307 currentLinearAllocEnd = allocStart + size;
308 } else { // If linear allocation overflows the AS then find a gap
309 if (this->blocks.size() <= 2)
310 UNREACHABLE_MSG("Unexpected allocator state!");
311
312 auto searchPredecessor{this->blocks.begin()};
313 auto searchSuccessor{std::next(searchPredecessor)};
314
315 while (searchSuccessor != this->blocks.end() &&
316 (searchSuccessor->virt - searchPredecessor->virt < size ||
317 searchPredecessor->Mapped())) {
318 searchPredecessor = searchSuccessor++;
319 }
320
321 if (searchSuccessor != this->blocks.end())
322 allocStart = searchPredecessor->virt;
323 else
324 return {}; // AS is full
325 }
326
327 this->MapLocked(allocStart, true, size, {});
328 return allocStart;
329}
330
331ALLOC_MEMBER(void)::AllocateFixed(VaType virt, VaType size) {
332 this->Map(virt, true, size);
333}
334
335ALLOC_MEMBER(void)::Free(VaType virt, VaType size) {
336 this->Unmap(virt, size);
337}
338} // namespace Common
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 5c70c9a57..344ddfc90 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -6,6 +6,7 @@
6#include <cstring> 6#include <cstring>
7#include <utility> 7#include <utility>
8 8
9#include "common/alignment.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/logging/log.h" 11#include "common/logging/log.h"
11#include "core/core.h" 12#include "core/core.h"
@@ -21,8 +22,8 @@
21namespace Service::Nvidia::Devices { 22namespace Service::Nvidia::Devices {
22 23
23nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) 24nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
24 : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, 25 : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{},
25 gmmu{std::make_shared<Tegra::MemoryManager>(system)} {} 26 gmmu{} {}
26nvhost_as_gpu::~nvhost_as_gpu() = default; 27nvhost_as_gpu::~nvhost_as_gpu() = default;
27 28
28NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 29NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -89,12 +90,49 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>&
89 IoctlAllocAsEx params{}; 90 IoctlAllocAsEx params{};
90 std::memcpy(&params, input.data(), input.size()); 91 std::memcpy(&params, input.data(), input.size());
91 92
92 LOG_WARNING(Service_NVDRV, "(STUBBED) called, big_page_size=0x{:X}", params.big_page_size); 93 LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
93 if (params.big_page_size == 0) { 94
94 params.big_page_size = DEFAULT_BIG_PAGE_SIZE; 95 std::scoped_lock lock(mutex);
96
97 if (vm.initialised) {
98 UNREACHABLE_MSG("Cannot initialise an address space twice!");
99 return NvResult::InvalidState;
95 } 100 }
96 101
97 big_page_size = params.big_page_size; 102 if (params.big_page_size) {
103 if (!std::has_single_bit(params.big_page_size)) {
104 LOG_ERROR(Service_NVDRV, "Non power-of-2 big page size: 0x{:X}!", params.big_page_size);
105 return NvResult::BadValue;
106 }
107
108 if (!(params.big_page_size & VM::SUPPORTED_BIG_PAGE_SIZES)) {
109 LOG_ERROR(Service_NVDRV, "Unsupported big page size: 0x{:X}!", params.big_page_size);
110 return NvResult::BadValue;
111 }
112
113 vm.big_page_size = params.big_page_size;
114 vm.big_page_size_bits = static_cast<u32>(std::countr_zero(params.big_page_size));
115
116 vm.va_range_start = params.big_page_size << VM::VA_START_SHIFT;
117 }
118
119 // If this is unspecified then default values should be used
120 if (params.va_range_start) {
121 vm.va_range_start = params.va_range_start;
122 vm.va_range_split = params.va_range_split;
123 vm.va_range_end = params.va_range_end;
124 }
125
126 const u64 start_pages{vm.va_range_start >> VM::PAGE_SIZE_BITS};
127 const u64 end_pages{vm.va_range_split >> VM::PAGE_SIZE_BITS};
128 vm.small_page_allocator = std::make_shared<VM::Allocator>(start_pages, end_pages);
129
130 const u64 start_big_pages{vm.va_range_split >> vm.big_page_size_bits};
131 const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits};
132 vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages);
133
134 gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS);
135 vm.initialised = true;
98 136
99 return NvResult::Success; 137 return NvResult::Success;
100} 138}
@@ -106,21 +144,73 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
106 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, 144 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
107 params.page_size, params.flags); 145 params.page_size, params.flags);
108 146
109 const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; 147 std::scoped_lock lock(mutex);
110 if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { 148
111 params.offset = *(gmmu->AllocateFixed(params.offset, size)); 149 if (!vm.initialised) {
150 return NvResult::BadValue;
151 }
152
153 if (params.page_size != VM::YUZU_PAGESIZE && params.page_size != vm.big_page_size) {
154 return NvResult::BadValue;
155 }
156
157 if (params.page_size != vm.big_page_size &&
158 ((params.flags & MappingFlags::Sparse) != MappingFlags::None)) {
159 UNIMPLEMENTED_MSG("Sparse small pages are not implemented!");
160 return NvResult::NotImplemented;
161 }
162
163 const u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
164 : vm.big_page_size_bits};
165
166 auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
167 : *vm.big_page_allocator};
168
169 if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
170 allocator.AllocateFixed(static_cast<u32>(params.offset >> page_size_bits), params.pages);
112 } else { 171 } else {
113 params.offset = gmmu->Allocate(size, params.align); 172 params.offset = static_cast<u64>(allocator.Allocate(params.pages)) << page_size_bits;
173 if (!params.offset) {
174 UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
175 return NvResult::InsufficientMemory;
176 }
114 } 177 }
115 178
116 auto result = NvResult::Success; 179 u64 size{static_cast<u64>(params.pages) * params.page_size};
117 if (!params.offset) { 180
118 LOG_CRITICAL(Service_NVDRV, "allocation failed for size {}", size); 181 if ((params.flags & MappingFlags::Sparse) != MappingFlags::None) {
119 result = NvResult::InsufficientMemory; 182 gmmu->MapSparse(params.offset, size);
120 } 183 }
121 184
185 allocation_map[params.offset] = {
186 .size = size,
187 .page_size = params.page_size,
188 .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None,
189 };
190
122 std::memcpy(output.data(), &params, output.size()); 191 std::memcpy(output.data(), &params, output.size());
123 return result; 192 return NvResult::Success;
193}
194
195void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
196 auto mapping{mapping_map.at(offset)};
197
198 if (!mapping->fixed) {
199 auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
200 u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
201
202 allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
203 static_cast<u32>(mapping->size >> page_size_bits));
204 }
205
206 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
207 // Only FreeSpace can unmap them fully
208 if (mapping->sparse_alloc)
209 gmmu->MapSparse(offset, mapping->size);
210 else
211 gmmu->Unmap(offset, mapping->size);
212
213 mapping_map.erase(offset);
124} 214}
125 215
126NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) { 216NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -130,7 +220,40 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
130 LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, 220 LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
131 params.pages, params.page_size); 221 params.pages, params.page_size);
132 222
133 gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size); 223 std::scoped_lock lock(mutex);
224
225 if (!vm.initialised) {
226 return NvResult::BadValue;
227 }
228
229 try {
230 auto allocation{allocation_map[params.offset]};
231
232 if (allocation.page_size != params.page_size ||
233 allocation.size != (static_cast<u64>(params.pages) * params.page_size)) {
234 return NvResult::BadValue;
235 }
236
237 for (const auto& mapping : allocation.mappings) {
238 FreeMappingLocked(mapping->offset);
239 }
240
241 // Unset sparse flag if required
242 if (allocation.sparse) {
243 gmmu->Unmap(params.offset, allocation.size);
244 }
245
246 auto& allocator{params.page_size == VM::YUZU_PAGESIZE ? *vm.small_page_allocator
247 : *vm.big_page_allocator};
248 u32 page_size_bits{params.page_size == VM::YUZU_PAGESIZE ? VM::PAGE_SIZE_BITS
249 : vm.big_page_size_bits};
250
251 allocator.Free(static_cast<u32>(params.offset >> page_size_bits),
252 static_cast<u32>(allocation.size >> page_size_bits));
253 allocation_map.erase(params.offset);
254 } catch ([[maybe_unused]] const std::out_of_range& e) {
255 return NvResult::BadValue;
256 }
134 257
135 std::memcpy(output.data(), &params, output.size()); 258 std::memcpy(output.data(), &params, output.size());
136 return NvResult::Success; 259 return NvResult::Success;
@@ -141,43 +264,51 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
141 264
142 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); 265 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
143 266
144 auto result = NvResult::Success;
145 std::vector<IoctlRemapEntry> entries(num_entries); 267 std::vector<IoctlRemapEntry> entries(num_entries);
146 std::memcpy(entries.data(), input.data(), input.size()); 268 std::memcpy(entries.data(), input.data(), input.size());
147 269
270 std::scoped_lock lock(mutex);
271
272 if (!vm.initialised) {
273 return NvResult::BadValue;
274 }
275
148 for (const auto& entry : entries) { 276 for (const auto& entry : entries) {
149 LOG_DEBUG(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 277 GPUVAddr virtual_address{static_cast<u64>(entry.as_offset_big_pages)
150 entry.offset, entry.nvmap_handle, entry.pages); 278 << vm.big_page_size_bits};
151 279 u64 size{static_cast<u64>(entry.big_pages) << vm.big_page_size_bits};
152 if (entry.nvmap_handle == 0) { 280
153 // If nvmap handle is null, we should unmap instead. 281 auto alloc{allocation_map.upper_bound(virtual_address)};
154 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; 282
155 const auto size{static_cast<u64>(entry.pages) << 0x10}; 283 if (alloc-- == allocation_map.begin() ||
156 gmmu->Unmap(offset, size); 284 (virtual_address - alloc->first) + size > alloc->second.size) {
157 continue; 285 LOG_WARNING(Service_NVDRV, "Cannot remap into an unallocated region!");
286 return NvResult::BadValue;
158 } 287 }
159 288
160 const auto object{nvmap.GetHandle(entry.nvmap_handle)}; 289 if (!alloc->second.sparse) {
161 if (!object) { 290 LOG_WARNING(Service_NVDRV, "Cannot remap a non-sparse mapping!");
162 LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", entry.nvmap_handle); 291 return NvResult::BadValue;
163 result = NvResult::InvalidState;
164 break;
165 } 292 }
166 293
167 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; 294 if (!entry.handle) {
168 const auto size{static_cast<u64>(entry.pages) << 0x10}; 295 gmmu->MapSparse(virtual_address, size);
169 const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; 296 } else {
170 const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; 297 auto handle{nvmap.GetHandle(entry.handle)};
298 if (!handle) {
299 return NvResult::BadValue;
300 }
171 301
172 if (!addr) { 302 VAddr cpu_address{static_cast<VAddr>(
173 LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); 303 handle->address +
174 result = NvResult::InvalidState; 304 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
175 break; 305
306 gmmu->Map(virtual_address, cpu_address, size);
176 } 307 }
177 } 308 }
178 309
179 std::memcpy(output.data(), entries.data(), output.size()); 310 std::memcpy(output.data(), entries.data(), output.size());
180 return result; 311 return NvResult::Success;
181} 312}
182 313
183NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) { 314NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -187,75 +318,96 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
187 LOG_DEBUG(Service_NVDRV, 318 LOG_DEBUG(Service_NVDRV,
188 "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" 319 "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
189 ", offset={}", 320 ", offset={}",
190 params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, 321 params.flags, params.handle, params.buffer_offset, params.mapping_size,
191 params.offset); 322 params.offset);
192 323
193 if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { 324 std::scoped_lock lock(mutex);
194 if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { 325
195 const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; 326 if (!vm.initialised) {
196 const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; 327 return NvResult::BadValue;
328 }
197 329
198 if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { 330 // Remaps a subregion of an existing mapping to a different PA
199 LOG_CRITICAL(Service_NVDRV, 331 if ((params.flags & MappingFlags::Remap) != MappingFlags::None) {
200 "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " 332 try {
201 "mapping_size = {}, offset={}", 333 auto mapping{mapping_map.at(params.offset)};
202 params.flags, params.nvmap_handle, params.buffer_offset,
203 params.mapping_size, params.offset);
204 334
205 std::memcpy(output.data(), &params, output.size()); 335 if (mapping->size < params.mapping_size) {
206 return NvResult::InvalidState; 336 LOG_WARNING(Service_NVDRV,
337 "Cannot remap a partially mapped GPU address space region: 0x{:X}",
338 params.offset);
339 return NvResult::BadValue;
207 } 340 }
208 341
209 std::memcpy(output.data(), &params, output.size()); 342 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
210 return NvResult::Success; 343 VAddr cpu_address{mapping->ptr + params.buffer_offset};
211 } else { 344
212 LOG_CRITICAL(Service_NVDRV, "address not mapped offset={}", params.offset); 345 gmmu->Map(gpu_address, cpu_address, params.mapping_size);
213 346
214 std::memcpy(output.data(), &params, output.size()); 347 return NvResult::Success;
215 return NvResult::InvalidState; 348 } catch ([[maybe_unused]] const std::out_of_range& e) {
349 LOG_WARNING(Service_NVDRV, "Cannot remap an unmapped GPU address space region: 0x{:X}",
350 params.offset);
351 return NvResult::BadValue;
216 } 352 }
217 } 353 }
218 354
219 const auto object{nvmap.GetHandle(params.nvmap_handle)}; 355 auto handle{nvmap.GetHandle(params.handle)};
220 if (!object) { 356 if (!handle) {
221 LOG_CRITICAL(Service_NVDRV, "invalid nvmap_handle={:X}", params.nvmap_handle); 357 return NvResult::BadValue;
222 std::memcpy(output.data(), &params, output.size());
223 return NvResult::InvalidState;
224 } 358 }
225 359
226 // The real nvservices doesn't make a distinction between handles and ids, and 360 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)};
227 // object can only have one handle and it will be the same as its id. Assert that this is the 361 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
228 // case to prevent unexpected behavior.
229 ASSERT(object->id == params.nvmap_handle);
230 362
231 u64 page_size{params.page_size}; 363 if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) {
232 if (!page_size) { 364 auto alloc{allocation_map.upper_bound(params.offset)};
233 page_size = object->align;
234 }
235 365
236 const auto physical_address{object->address + params.buffer_offset}; 366 if (alloc-- == allocation_map.begin() ||
237 u64 size{params.mapping_size}; 367 (params.offset - alloc->first) + size > alloc->second.size) {
238 if (!size) { 368 UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!");
239 size = object->size; 369 return NvResult::BadValue;
240 } 370 }
241 371
242 const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; 372 gmmu->Map(params.offset, cpu_address, size);
243 if (is_alloc) {
244 params.offset = gmmu->MapAllocate(physical_address, size, page_size);
245 } else {
246 params.offset = gmmu->Map(physical_address, params.offset, size);
247 }
248 373
249 auto result = NvResult::Success; 374 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false,
250 if (!params.offset) { 375 alloc->second.sparse)};
251 LOG_CRITICAL(Service_NVDRV, "failed to map size={}", size); 376 alloc->second.mappings.push_back(mapping);
252 result = NvResult::InvalidState; 377 mapping_map[params.offset] = mapping;
253 } else { 378 } else {
254 AddBufferMap(params.offset, size, physical_address, is_alloc); 379 bool big_page{[&]() {
380 if (Common::IsAligned(handle->align, vm.big_page_size))
381 return true;
382 else if (Common::IsAligned(handle->align, VM::YUZU_PAGESIZE))
383 return false;
384 else {
385 UNREACHABLE();
386 return false;
387 }
388 }()};
389
390 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
391 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
392 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
393
394 params.offset = static_cast<u64>(allocator.Allocate(
395 static_cast<u32>(Common::AlignUp(size, page_size) >> page_size_bits)))
396 << page_size_bits;
397 if (!params.offset) {
398 UNREACHABLE_MSG("Failed to allocate free space in the GPU AS!");
399 return NvResult::InsufficientMemory;
400 }
401
402 gmmu->Map(params.offset, cpu_address, size);
403
404 auto mapping{
405 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
406 mapping_map[params.offset] = mapping;
255 } 407 }
256 408
257 std::memcpy(output.data(), &params, output.size()); 409 std::memcpy(output.data(), &params, output.size());
258 return result; 410 return NvResult::Success;
259} 411}
260 412
261NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) { 413NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& output) {
@@ -264,13 +416,36 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
264 416
265 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); 417 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
266 418
267 if (const auto size{RemoveBufferMap(params.offset)}; size) { 419 std::scoped_lock lock(mutex);
268 gmmu->Unmap(params.offset, *size); 420
269 } else { 421 if (!vm.initialised) {
270 LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); 422 return NvResult::BadValue;
423 }
424
425 try {
426 auto mapping{mapping_map.at(params.offset)};
427
428 if (!mapping->fixed) {
429 auto& allocator{mapping->big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
430 u32 page_size_bits{mapping->big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
431
432 allocator.Free(static_cast<u32>(mapping->offset >> page_size_bits),
433 static_cast<u32>(mapping->size >> page_size_bits));
434 }
435
436 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
437 // Only FreeSpace can unmap them fully
438 if (mapping->sparse_alloc) {
439 gmmu->MapSparse(params.offset, mapping->size);
440 } else {
441 gmmu->Unmap(params.offset, mapping->size);
442 }
443
444 mapping_map.erase(params.offset);
445 } catch ([[maybe_unused]] const std::out_of_range& e) {
446 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
271 } 447 }
272 448
273 std::memcpy(output.data(), &params, output.size());
274 return NvResult::Success; 449 return NvResult::Success;
275} 450}
276 451
@@ -284,28 +459,37 @@ NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8
284 return NvResult::Success; 459 return NvResult::Success;
285} 460}
286 461
462void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
463 params.buf_size = 2 * sizeof(VaRegion);
464
465 params.regions = std::array<VaRegion, 2>{
466 VaRegion{
467 .offset = vm.small_page_allocator->vaStart << VM::PAGE_SIZE_BITS,
468 .page_size = VM::YUZU_PAGESIZE,
469 .pages = vm.small_page_allocator->vaLimit - vm.small_page_allocator->vaStart,
470 },
471 VaRegion{
472 .offset = vm.big_page_allocator->vaStart << vm.big_page_size_bits,
473 .page_size = vm.big_page_size,
474 .pages = vm.big_page_allocator->vaLimit - vm.big_page_allocator->vaStart,
475 },
476 };
477}
478
287NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) { 479NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u8>& output) {
288 IoctlGetVaRegions params{}; 480 IoctlGetVaRegions params{};
289 std::memcpy(&params, input.data(), input.size()); 481 std::memcpy(&params, input.data(), input.size());
290 482
291 LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, 483 LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
292 params.buf_size); 484 params.buf_size);
293
294 params.buf_size = 0x30;
295 485
296 params.small = IoctlVaRegion{ 486 std::scoped_lock lock(mutex);
297 .offset = 0x04000000,
298 .page_size = DEFAULT_SMALL_PAGE_SIZE,
299 .pages = 0x3fbfff,
300 };
301 487
302 params.big = IoctlVaRegion{ 488 if (!vm.initialised) {
303 .offset = 0x04000000, 489 return NvResult::BadValue;
304 .page_size = big_page_size, 490 }
305 .pages = 0x1bffff,
306 };
307 491
308 // TODO(ogniK): This probably can stay stubbed but should add support way way later 492 GetVARegionsImpl(params);
309 493
310 std::memcpy(output.data(), &params, output.size()); 494 std::memcpy(output.data(), &params, output.size());
311 return NvResult::Success; 495 return NvResult::Success;
@@ -316,64 +500,24 @@ NvResult nvhost_as_gpu::GetVARegions(const std::vector<u8>& input, std::vector<u
316 IoctlGetVaRegions params{}; 500 IoctlGetVaRegions params{};
317 std::memcpy(&params, input.data(), input.size()); 501 std::memcpy(&params, input.data(), input.size());
318 502
319 LOG_WARNING(Service_NVDRV, "(STUBBED) called, buf_addr={:X}, buf_size={:X}", params.buf_addr, 503 LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr,
320 params.buf_size); 504 params.buf_size);
321 505
322 params.buf_size = 0x30; 506 std::scoped_lock lock(mutex);
323
324 params.small = IoctlVaRegion{
325 .offset = 0x04000000,
326 .page_size = 0x1000,
327 .pages = 0x3fbfff,
328 };
329 507
330 params.big = IoctlVaRegion{ 508 if (!vm.initialised) {
331 .offset = 0x04000000, 509 return NvResult::BadValue;
332 .page_size = big_page_size, 510 }
333 .pages = 0x1bffff,
334 };
335 511
336 // TODO(ogniK): This probably can stay stubbed but should add support way way later 512 GetVARegionsImpl(params);
337 513
338 std::memcpy(output.data(), &params, output.size()); 514 std::memcpy(output.data(), &params, output.size());
339 std::memcpy(inline_output.data(), &params.small, sizeof(IoctlVaRegion)); 515 std::memcpy(inline_output.data(), &params.regions[0], sizeof(VaRegion));
340 std::memcpy(inline_output.data() + sizeof(IoctlVaRegion), &params.big, sizeof(IoctlVaRegion)); 516 std::memcpy(inline_output.data() + sizeof(VaRegion), &params.regions[1], sizeof(VaRegion));
341 517
342 return NvResult::Success; 518 return NvResult::Success;
343} 519}
344 520
345std::optional<nvhost_as_gpu::BufferMap> nvhost_as_gpu::FindBufferMap(GPUVAddr gpu_addr) const {
346 const auto end{buffer_mappings.upper_bound(gpu_addr)};
347 for (auto iter{buffer_mappings.begin()}; iter != end; ++iter) {
348 if (gpu_addr >= iter->second.StartAddr() && gpu_addr < iter->second.EndAddr()) {
349 return iter->second;
350 }
351 }
352
353 return std::nullopt;
354}
355
356void nvhost_as_gpu::AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr,
357 bool is_allocated) {
358 buffer_mappings[gpu_addr] = {gpu_addr, size, cpu_addr, is_allocated};
359}
360
361std::optional<std::size_t> nvhost_as_gpu::RemoveBufferMap(GPUVAddr gpu_addr) {
362 if (const auto iter{buffer_mappings.find(gpu_addr)}; iter != buffer_mappings.end()) {
363 std::size_t size{};
364
365 if (iter->second.IsAllocated()) {
366 size = iter->second.Size();
367 }
368
369 buffer_mappings.erase(iter);
370
371 return size;
372 }
373
374 return std::nullopt;
375}
376
377Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) { 521Kernel::KEvent* nvhost_as_gpu::QueryEvent(u32 event_id) {
378 LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id); 522 LOG_CRITICAL(Service_NVDRV, "Unknown AS GPU Event {}", event_id);
379 return nullptr; 523 return nullptr;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index f5fb33ba7..1d27739e2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -5,14 +5,19 @@
5 5
6#pragma once 6#pragma once
7 7
8#include <bit>
9#include <list>
8#include <map> 10#include <map>
9#include <memory> 11#include <memory>
12#include <mutex>
10#include <optional> 13#include <optional>
11#include <vector> 14#include <vector>
12 15
16#include "common/address_space.h"
13#include "common/common_funcs.h" 17#include "common/common_funcs.h"
14#include "common/common_types.h" 18#include "common/common_types.h"
15#include "common/swap.h" 19#include "common/swap.h"
20#include "core/hle/service/nvdrv/core/nvmap.h"
16#include "core/hle/service/nvdrv/devices/nvdevice.h" 21#include "core/hle/service/nvdrv/devices/nvdevice.h"
17 22
18namespace Tegra { 23namespace Tegra {
@@ -30,17 +35,13 @@ class NvMap;
30 35
31namespace Service::Nvidia::Devices { 36namespace Service::Nvidia::Devices {
32 37
33constexpr u32 DEFAULT_BIG_PAGE_SIZE = 1 << 16; 38enum class MappingFlags : u32 {
34constexpr u32 DEFAULT_SMALL_PAGE_SIZE = 1 << 12; 39 None = 0,
35 40 Fixed = 1 << 0,
36class nvmap; 41 Sparse = 1 << 1,
37 42 Remap = 1 << 8,
38enum class AddressSpaceFlags : u32 {
39 None = 0x0,
40 FixedOffset = 0x1,
41 Remap = 0x100,
42}; 43};
43DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); 44DECLARE_ENUM_FLAG_OPERATORS(MappingFlags);
44 45
45class nvhost_as_gpu final : public nvdevice { 46class nvhost_as_gpu final : public nvdevice {
46public: 47public:
@@ -59,46 +60,15 @@ public:
59 60
60 Kernel::KEvent* QueryEvent(u32 event_id) override; 61 Kernel::KEvent* QueryEvent(u32 event_id) override;
61 62
62private: 63 struct VaRegion {
63 class BufferMap final { 64 u64 offset;
64 public: 65 u32 page_size;
65 constexpr BufferMap() = default; 66 u32 _pad0_;
66 67 u64 pages;
67 constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_)
68 : start_addr{start_addr_}, end_addr{start_addr_ + size_} {}
69
70 constexpr BufferMap(GPUVAddr start_addr_, std::size_t size_, VAddr cpu_addr_,
71 bool is_allocated_)
72 : start_addr{start_addr_}, end_addr{start_addr_ + size_}, cpu_addr{cpu_addr_},
73 is_allocated{is_allocated_} {}
74
75 constexpr VAddr StartAddr() const {
76 return start_addr;
77 }
78
79 constexpr VAddr EndAddr() const {
80 return end_addr;
81 }
82
83 constexpr std::size_t Size() const {
84 return end_addr - start_addr;
85 }
86
87 constexpr VAddr CpuAddr() const {
88 return cpu_addr;
89 }
90
91 constexpr bool IsAllocated() const {
92 return is_allocated;
93 }
94
95 private:
96 GPUVAddr start_addr{};
97 GPUVAddr end_addr{};
98 VAddr cpu_addr{};
99 bool is_allocated{};
100 }; 68 };
69 static_assert(sizeof(VaRegion) == 0x18);
101 70
71private:
102 struct IoctlAllocAsEx { 72 struct IoctlAllocAsEx {
103 u32_le flags{}; // usually passes 1 73 u32_le flags{}; // usually passes 1
104 s32_le as_fd{}; // ignored; passes 0 74 s32_le as_fd{}; // ignored; passes 0
@@ -113,7 +83,7 @@ private:
113 struct IoctlAllocSpace { 83 struct IoctlAllocSpace {
114 u32_le pages{}; 84 u32_le pages{};
115 u32_le page_size{}; 85 u32_le page_size{};
116 AddressSpaceFlags flags{}; 86 MappingFlags flags{};
117 INSERT_PADDING_WORDS(1); 87 INSERT_PADDING_WORDS(1);
118 union { 88 union {
119 u64_le offset; 89 u64_le offset;
@@ -130,19 +100,19 @@ private:
130 static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size"); 100 static_assert(sizeof(IoctlFreeSpace) == 16, "IoctlFreeSpace is incorrect size");
131 101
132 struct IoctlRemapEntry { 102 struct IoctlRemapEntry {
133 u16_le flags{}; 103 u16 flags;
134 u16_le kind{}; 104 u16 kind;
135 u32_le nvmap_handle{}; 105 NvCore::NvMap::Handle::Id handle;
136 u32_le map_offset{}; 106 u32 handle_offset_big_pages;
137 u32_le offset{}; 107 u32 as_offset_big_pages;
138 u32_le pages{}; 108 u32 big_pages;
139 }; 109 };
140 static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size"); 110 static_assert(sizeof(IoctlRemapEntry) == 20, "IoctlRemapEntry is incorrect size");
141 111
142 struct IoctlMapBufferEx { 112 struct IoctlMapBufferEx {
143 AddressSpaceFlags flags{}; // bit0: fixed_offset, bit2: cacheable 113 MappingFlags flags{}; // bit0: fixed_offset, bit2: cacheable
144 u32_le kind{}; // -1 is default 114 u32_le kind{}; // -1 is default
145 u32_le nvmap_handle{}; 115 NvCore::NvMap::Handle::Id handle;
146 u32_le page_size{}; // 0 means don't care 116 u32_le page_size{}; // 0 means don't care
147 s64_le buffer_offset{}; 117 s64_le buffer_offset{};
148 u64_le mapping_size{}; 118 u64_le mapping_size{};
@@ -160,27 +130,15 @@ private:
160 }; 130 };
161 static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size"); 131 static_assert(sizeof(IoctlBindChannel) == 4, "IoctlBindChannel is incorrect size");
162 132
163 struct IoctlVaRegion {
164 u64_le offset{};
165 u32_le page_size{};
166 INSERT_PADDING_WORDS(1);
167 u64_le pages{};
168 };
169 static_assert(sizeof(IoctlVaRegion) == 24, "IoctlVaRegion is incorrect size");
170
171 struct IoctlGetVaRegions { 133 struct IoctlGetVaRegions {
172 u64_le buf_addr{}; // (contained output user ptr on linux, ignored) 134 u64_le buf_addr{}; // (contained output user ptr on linux, ignored)
173 u32_le buf_size{}; // forced to 2*sizeof(struct va_region) 135 u32_le buf_size{}; // forced to 2*sizeof(struct va_region)
174 u32_le reserved{}; 136 u32_le reserved{};
175 IoctlVaRegion small{}; 137 std::array<VaRegion, 2> regions{};
176 IoctlVaRegion big{};
177 }; 138 };
178 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(IoctlVaRegion) * 2, 139 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
179 "IoctlGetVaRegions is incorrect size"); 140 "IoctlGetVaRegions is incorrect size");
180 141
181 s32 channel{};
182 u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
183
184 NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output); 142 NvResult AllocAsEx(const std::vector<u8>& input, std::vector<u8>& output);
185 NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output); 143 NvResult AllocateSpace(const std::vector<u8>& input, std::vector<u8>& output);
186 NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output); 144 NvResult Remap(const std::vector<u8>& input, std::vector<u8>& output);
@@ -189,23 +147,74 @@ private:
189 NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output); 147 NvResult FreeSpace(const std::vector<u8>& input, std::vector<u8>& output);
190 NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output); 148 NvResult BindChannel(const std::vector<u8>& input, std::vector<u8>& output);
191 149
150 void GetVARegionsImpl(IoctlGetVaRegions& params);
192 NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output); 151 NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);
193 NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output, 152 NvResult GetVARegions(const std::vector<u8>& input, std::vector<u8>& output,
194 std::vector<u8>& inline_output); 153 std::vector<u8>& inline_output);
195 154
196 std::optional<BufferMap> FindBufferMap(GPUVAddr gpu_addr) const; 155 void FreeMappingLocked(u64 offset);
197 void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
198 std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
199 156
200 Module& module; 157 Module& module;
201 158
202 NvCore::Container& container; 159 NvCore::Container& container;
203 NvCore::NvMap& nvmap; 160 NvCore::NvMap& nvmap;
204 161
162 struct Mapping {
163 VAddr ptr;
164 u64 offset;
165 u64 size;
166 bool fixed;
167 bool big_page; // Only valid if fixed == false
168 bool sparse_alloc;
169
170 Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_)
171 : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_),
172 sparse_alloc(sparse_alloc_) {}
173 };
174
175 struct Allocation {
176 u64 size;
177 std::list<std::shared_ptr<Mapping>> mappings;
178 u32 page_size;
179 bool sparse;
180 };
181
182 std::map<u64, std::shared_ptr<Mapping>>
183 mapping_map; //!< This maps the base addresses of mapped buffers to their total sizes and
184 //!< mapping type, this is needed as what was originally a single buffer may
185 //!< have been split into multiple GPU side buffers with the remap flag.
186 std::map<u64, Allocation> allocation_map; //!< Holds allocations created by AllocSpace from
187 //!< which fixed buffers can be mapped into
188 std::mutex mutex; //!< Locks all AS operations
189
190 struct VM {
191 static constexpr u32 YUZU_PAGESIZE{0x1000};
192 static constexpr u32 PAGE_SIZE_BITS{std::countr_zero(YUZU_PAGESIZE)};
193
194 static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000};
195 static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000};
196 u32 big_page_size{DEFAULT_BIG_PAGE_SIZE};
197 u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)};
198
199 static constexpr u32 VA_START_SHIFT{10};
200 static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34};
201 static constexpr u64 DEFAULT_VA_RANGE{1ULL << 37};
202 u64 va_range_start{DEFAULT_BIG_PAGE_SIZE << VA_START_SHIFT};
203 u64 va_range_split{DEFAULT_VA_SPLIT};
204 u64 va_range_end{DEFAULT_VA_RANGE};
205
206 using Allocator = Common::FlatAllocator<u32, 0, 32>;
207
208 std::unique_ptr<Allocator> big_page_allocator;
209 std::shared_ptr<Allocator>
210 small_page_allocator; //! Shared as this is also used by nvhost::GpuChannel
211
212 bool initialised{};
213 } vm;
205 std::shared_ptr<Tegra::MemoryManager> gmmu; 214 std::shared_ptr<Tegra::MemoryManager> gmmu;
206 215
207 // This is expected to be ordered, therefore we must use a map, not unordered_map 216 // s32 channel{};
208 std::map<GPUVAddr, BufferMap> buffer_mappings; 217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
209}; 218};
210 219
211} // namespace Service::Nvidia::Devices 220} // namespace Service::Nvidia::Devices
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 9e946d448..fc68bcc73 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -71,18 +71,22 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
71 rasterizer = rasterizer_; 71 rasterizer = rasterizer_;
72} 72}
73 73
74GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { 74GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
75 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); 75 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size);
76} 76}
77 77
78GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) {
79 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size);
80}
81
78GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { 82GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) {
79 return Map(cpu_addr, *FindFreeRange(size, align), size); 83 return Map(*FindFreeRange(size, align), cpu_addr, size);
80} 84}
81 85
82GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { 86GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) {
83 const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); 87 const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true);
84 ASSERT(gpu_addr); 88 ASSERT(gpu_addr);
85 return Map(cpu_addr, *gpu_addr, size); 89 return Map(*gpu_addr, cpu_addr, size);
86} 90}
87 91
88void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { 92void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0a763fd19..b8878476a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -88,7 +88,8 @@ public:
88 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 88 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
89 std::size_t size) const; 89 std::size_t size) const;
90 90
91 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); 91 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size);
92 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size);
92 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); 93 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
93 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); 94 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
94 [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); 95 [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);