summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGravatar liamwhite2024-01-22 10:55:39 -0500
committerGravatar GitHub2024-01-22 10:55:39 -0500
commit8bd10473d60503c7acddc399604a51b9c9947541 (patch)
treef713f84942681321fca27ba028e31d6c74a09013 /src/core
parentMerge pull request #12747 from t895/homescreen-widget (diff)
parentdevice_memory_manager: use unique_lock for update (diff)
downloadyuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.gz
yuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.xz
yuzu-8bd10473d60503c7acddc399604a51b9c9947541.zip
Merge pull request #12579 from FernandoS27/smmu
Core: Implement Device Mapping & GPU SMMU
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/core.cpp14
-rw-r--r--src/core/core.h6
-rw-r--r--src/core/device_memory.h16
-rw-r--r--src/core/device_memory_manager.h211
-rw-r--r--src/core/device_memory_manager.inc582
-rw-r--r--src/core/gpu_dirty_memory_manager.h14
-rw-r--r--src/core/guest_memory.h214
-rw-r--r--src/core/hle/kernel/k_process.cpp14
-rw-r--r--src/core/hle/kernel/k_process.h4
-rw-r--r--src/core/hle/service/hle_ipc.cpp61
-rw-r--r--src/core/hle/service/hle_ipc.h5
-rw-r--r--src/core/hle/service/nvdrv/core/container.cpp114
-rw-r--r--src/core/hle/service/nvdrv/core/container.h32
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.cpp175
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.h49
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp120
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h25
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp36
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h7
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp27
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h6
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp34
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h1
-rw-r--r--src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp25
-rw-r--r--src/core/hle/service/nvnflinger/fb_share_buffer_manager.h5
-rw-r--r--src/core/hle/service/nvnflinger/nvnflinger.cpp2
-rw-r--r--src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp2
-rw-r--r--src/core/memory.cpp108
-rw-r--r--src/core/memory.h211
49 files changed, 1774 insertions, 459 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 16ddb5e90..4ff2c1bb7 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -37,6 +37,8 @@ add_library(core STATIC
37 debugger/gdbstub_arch.h 37 debugger/gdbstub_arch.h
38 debugger/gdbstub.cpp 38 debugger/gdbstub.cpp
39 debugger/gdbstub.h 39 debugger/gdbstub.h
40 device_memory_manager.h
41 device_memory_manager.inc
40 device_memory.cpp 42 device_memory.cpp
41 device_memory.h 43 device_memory.h
42 file_sys/fssystem/fs_i_storage.h 44 file_sys/fssystem/fs_i_storage.h
@@ -609,6 +611,8 @@ add_library(core STATIC
609 hle/service/ns/pdm_qry.h 611 hle/service/ns/pdm_qry.h
610 hle/service/nvdrv/core/container.cpp 612 hle/service/nvdrv/core/container.cpp
611 hle/service/nvdrv/core/container.h 613 hle/service/nvdrv/core/container.h
614 hle/service/nvdrv/core/heap_mapper.cpp
615 hle/service/nvdrv/core/heap_mapper.h
612 hle/service/nvdrv/core/nvmap.cpp 616 hle/service/nvdrv/core/nvmap.cpp
613 hle/service/nvdrv/core/nvmap.h 617 hle/service/nvdrv/core/nvmap.h
614 hle/service/nvdrv/core/syncpoint_manager.cpp 618 hle/service/nvdrv/core/syncpoint_manager.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 461eea9c8..2392fe136 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -28,6 +28,7 @@
28#include "core/file_sys/savedata_factory.h" 28#include "core/file_sys/savedata_factory.h"
29#include "core/file_sys/vfs_concat.h" 29#include "core/file_sys/vfs_concat.h"
30#include "core/file_sys/vfs_real.h" 30#include "core/file_sys/vfs_real.h"
31#include "core/gpu_dirty_memory_manager.h"
31#include "core/hle/kernel/k_memory_manager.h" 32#include "core/hle/kernel/k_memory_manager.h"
32#include "core/hle/kernel/k_process.h" 33#include "core/hle/kernel/k_process.h"
33#include "core/hle/kernel/k_resource_limit.h" 34#include "core/hle/kernel/k_resource_limit.h"
@@ -565,6 +566,9 @@ struct System::Impl {
565 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; 566 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
566 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; 567 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
567 568
569 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
570 gpu_dirty_memory_managers;
571
568 std::deque<std::vector<u8>> user_channel; 572 std::deque<std::vector<u8>> user_channel;
569}; 573};
570 574
@@ -651,8 +655,14 @@ size_t System::GetCurrentHostThreadID() const {
651 return impl->kernel.GetCurrentHostThreadID(); 655 return impl->kernel.GetCurrentHostThreadID();
652} 656}
653 657
654void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { 658std::span<GPUDirtyMemoryManager> System::GetGPUDirtyMemoryManager() {
655 return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); 659 return impl->gpu_dirty_memory_managers;
660}
661
662void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) {
663 for (auto& manager : impl->gpu_dirty_memory_managers) {
664 manager.Gather(callback);
665 }
656} 666}
657 667
658PerfStatsResults System::GetAndResetPerfStats() { 668PerfStatsResults System::GetAndResetPerfStats() {
diff --git a/src/core/core.h b/src/core/core.h
index ba5add0dc..80446f385 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -8,6 +8,7 @@
8#include <functional> 8#include <functional>
9#include <memory> 9#include <memory>
10#include <mutex> 10#include <mutex>
11#include <span>
11#include <string> 12#include <string>
12#include <vector> 13#include <vector>
13 14
@@ -116,6 +117,7 @@ class CpuManager;
116class Debugger; 117class Debugger;
117class DeviceMemory; 118class DeviceMemory;
118class ExclusiveMonitor; 119class ExclusiveMonitor;
120class GPUDirtyMemoryManager;
119class PerfStats; 121class PerfStats;
120class Reporter; 122class Reporter;
121class SpeedLimiter; 123class SpeedLimiter;
@@ -224,7 +226,9 @@ public:
224 /// Prepare the core emulation for a reschedule 226 /// Prepare the core emulation for a reschedule
225 void PrepareReschedule(u32 core_index); 227 void PrepareReschedule(u32 core_index);
226 228
227 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); 229 std::span<GPUDirtyMemoryManager> GetGPUDirtyMemoryManager();
230
231 void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback);
228 232
229 [[nodiscard]] size_t GetCurrentHostThreadID() const; 233 [[nodiscard]] size_t GetCurrentHostThreadID() const;
230 234
diff --git a/src/core/device_memory.h b/src/core/device_memory.h
index 13388b73e..11bf0e326 100644
--- a/src/core/device_memory.h
+++ b/src/core/device_memory.h
@@ -32,6 +32,12 @@ public:
32 } 32 }
33 33
34 template <typename T> 34 template <typename T>
35 PAddr GetRawPhysicalAddr(const T* ptr) const {
36 return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) -
37 reinterpret_cast<uintptr_t>(buffer.BackingBasePointer()));
38 }
39
40 template <typename T>
35 T* GetPointer(Common::PhysicalAddress addr) { 41 T* GetPointer(Common::PhysicalAddress addr) {
36 return reinterpret_cast<T*>(buffer.BackingBasePointer() + 42 return reinterpret_cast<T*>(buffer.BackingBasePointer() +
37 (GetInteger(addr) - DramMemoryMap::Base)); 43 (GetInteger(addr) - DramMemoryMap::Base));
@@ -43,6 +49,16 @@ public:
43 (GetInteger(addr) - DramMemoryMap::Base)); 49 (GetInteger(addr) - DramMemoryMap::Base));
44 } 50 }
45 51
52 template <typename T>
53 T* GetPointerFromRaw(PAddr addr) {
54 return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr);
55 }
56
57 template <typename T>
58 const T* GetPointerFromRaw(PAddr addr) const {
59 return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr);
60 }
61
46 Common::HostMemory buffer; 62 Common::HostMemory buffer;
47}; 63};
48 64
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
new file mode 100644
index 000000000..ffeed46cc
--- /dev/null
+++ b/src/core/device_memory_manager.h
@@ -0,0 +1,211 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <atomic>
8#include <deque>
9#include <memory>
10#include <mutex>
11
12#include "common/common_types.h"
13#include "common/scratch_buffer.h"
14#include "common/virtual_buffer.h"
15
16namespace Core {
17
18constexpr size_t DEVICE_PAGEBITS = 12ULL;
19constexpr size_t DEVICE_PAGESIZE = 1ULL << DEVICE_PAGEBITS;
20constexpr size_t DEVICE_PAGEMASK = DEVICE_PAGESIZE - 1ULL;
21
22class DeviceMemory;
23
24namespace Memory {
25class Memory;
26}
27
28template <typename DTraits>
29struct DeviceMemoryManagerAllocator;
30
31struct Asid {
32 size_t id;
33};
34
35template <typename Traits>
36class DeviceMemoryManager {
37 using DeviceInterface = typename Traits::DeviceInterface;
38 using DeviceMethods = typename Traits::DeviceMethods;
39
40public:
41 DeviceMemoryManager(const DeviceMemory& device_memory);
42 ~DeviceMemoryManager();
43
44 void BindInterface(DeviceInterface* device_inter);
45
46 DAddr Allocate(size_t size);
47 void AllocateFixed(DAddr start, size_t size);
48 void Free(DAddr start, size_t size);
49
50 void Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, bool track = false);
51
52 void Unmap(DAddr address, size_t size);
53
54 void TrackContinuityImpl(DAddr address, VAddr virtual_address, size_t size, Asid asid);
55 void TrackContinuity(DAddr address, VAddr virtual_address, size_t size, Asid asid) {
56 std::scoped_lock lk(mapping_guard);
57 TrackContinuityImpl(address, virtual_address, size, asid);
58 }
59
60 // Write / Read
61 template <typename T>
62 T* GetPointer(DAddr address);
63
64 template <typename T>
65 const T* GetPointer(DAddr address) const;
66
67 template <typename Func>
68 void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
69 DAddr subbits = static_cast<DAddr>(address & page_mask);
70 const u32 base = compressed_device_addr[(address >> page_bits)];
71 if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
72 const DAddr d_address = (static_cast<DAddr>(base) << page_bits) + subbits;
73 operation(d_address);
74 return;
75 }
76 InnerGatherDeviceAddresses(buffer, address);
77 for (u32 value : buffer) {
78 operation((static_cast<DAddr>(value) << page_bits) + subbits);
79 }
80 }
81
82 template <typename Func>
83 void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
84 PAddr address = GetRawPhysicalAddr<u8>(p);
85 ApplyOpOnPAddr(address, buffer, operation);
86 }
87
88 PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
89 PAddr subbits = static_cast<PAddr>(address & page_mask);
90 auto paddr = compressed_physical_ptr[(address >> page_bits)];
91 if (paddr == 0) {
92 return 0;
93 }
94 return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits;
95 }
96
97 template <typename T>
98 void Write(DAddr address, T value);
99
100 template <typename T>
101 T Read(DAddr address) const;
102
103 u8* GetSpan(const DAddr src_addr, const std::size_t size);
104 const u8* GetSpan(const DAddr src_addr, const std::size_t size) const;
105
106 void ReadBlock(DAddr address, void* dest_pointer, size_t size);
107 void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
108 void WriteBlock(DAddr address, const void* src_pointer, size_t size);
109 void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
110
111 Asid RegisterProcess(Memory::Memory* memory);
112 void UnregisterProcess(Asid id);
113
114 void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
115
116 static constexpr size_t AS_BITS = Traits::device_virtual_bits;
117
118private:
119 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
120 static constexpr size_t device_as_size = 1ULL << device_virtual_bits;
121 static constexpr size_t physical_min_bits = 32;
122 static constexpr size_t physical_max_bits = 33;
123 static constexpr size_t page_bits = 12;
124 static constexpr size_t page_size = 1ULL << page_bits;
125 static constexpr size_t page_mask = page_size - 1ULL;
126 static constexpr u32 physical_address_base = 1U << page_bits;
127 static constexpr u32 MULTI_FLAG_BITS = 31;
128 static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
129 static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
130
131 template <typename T>
132 T* GetPointerFromRaw(PAddr addr) {
133 return reinterpret_cast<T*>(physical_base + addr);
134 }
135
136 template <typename T>
137 const T* GetPointerFromRaw(PAddr addr) const {
138 return reinterpret_cast<T*>(physical_base + addr);
139 }
140
141 template <typename T>
142 PAddr GetRawPhysicalAddr(const T* ptr) const {
143 return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - physical_base);
144 }
145
146 void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
147 auto increment);
148
149 void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
150
151 std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
152
153 const uintptr_t physical_base;
154 DeviceInterface* device_inter;
155 Common::VirtualBuffer<u32> compressed_physical_ptr;
156 Common::VirtualBuffer<u32> compressed_device_addr;
157 Common::VirtualBuffer<u32> continuity_tracker;
158
159 // Process memory interfaces
160
161 std::deque<size_t> id_pool;
162 std::deque<Memory::Memory*> registered_processes;
163
164 // Memory protection management
165
166 static constexpr size_t guest_max_as_bits = 39;
167 static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits;
168 static constexpr size_t guest_mask = guest_as_size - 1ULL;
169 static constexpr size_t asid_start_bit = guest_max_as_bits;
170
171 std::pair<Asid, VAddr> ExtractCPUBacking(size_t page_index) {
172 auto content = cpu_backing_address[page_index];
173 const VAddr address = content & guest_mask;
174 const Asid asid{static_cast<size_t>(content >> asid_start_bit)};
175 return std::make_pair(asid, address);
176 }
177
178 void InsertCPUBacking(size_t page_index, VAddr address, Asid asid) {
179 cpu_backing_address[page_index] = address | (asid.id << asid_start_bit);
180 }
181
182 Common::VirtualBuffer<VAddr> cpu_backing_address;
183 static constexpr size_t subentries = 8 / sizeof(u8);
184 static constexpr size_t subentries_mask = subentries - 1;
185 class CounterEntry final {
186 public:
187 CounterEntry() = default;
188
189 std::atomic_uint8_t& Count(std::size_t page) {
190 return values[page & subentries_mask];
191 }
192
193 const std::atomic_uint8_t& Count(std::size_t page) const {
194 return values[page & subentries_mask];
195 }
196
197 private:
198 std::array<std::atomic_uint8_t, subentries> values{};
199 };
200 static_assert(sizeof(CounterEntry) == subentries * sizeof(u8),
201 "CounterEntry should be 8 bytes!");
202
203 static constexpr size_t num_counter_entries =
204 (1ULL << (device_virtual_bits - page_bits)) / subentries;
205 using CachedPages = std::array<CounterEntry, num_counter_entries>;
206 std::unique_ptr<CachedPages> cached_pages;
207 std::mutex counter_guard;
208 std::mutex mapping_guard;
209};
210
211} // namespace Core
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
new file mode 100644
index 000000000..8ce122872
--- /dev/null
+++ b/src/core/device_memory_manager.inc
@@ -0,0 +1,582 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <atomic>
5#include <limits>
6#include <memory>
7#include <type_traits>
8
9#include "common/address_space.h"
10#include "common/address_space.inc"
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/div_ceil.h"
14#include "common/scope_exit.h"
15#include "common/settings.h"
16#include "core/device_memory.h"
17#include "core/device_memory_manager.h"
18#include "core/memory.h"
19
20namespace Core {
21
22namespace {
23
24class MultiAddressContainer {
25public:
26 MultiAddressContainer() = default;
27 ~MultiAddressContainer() = default;
28
29 void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
30 buffer.resize(8);
31 buffer.resize(0);
32 size_t index = 0;
33 const auto add_value = [&](u32 value) {
34 buffer[index] = value;
35 index++;
36 buffer.resize(index);
37 };
38
39 u32 iter_entry = start_entry;
40 Entry* current = &storage[iter_entry - 1];
41 add_value(current->value);
42 while (current->next_entry != 0) {
43 iter_entry = current->next_entry;
44 current = &storage[iter_entry - 1];
45 add_value(current->value);
46 }
47 }
48
49 u32 Register(u32 value) {
50 return RegisterImplementation(value);
51 }
52
53 void Register(u32 value, u32 start_entry) {
54 auto entry_id = RegisterImplementation(value);
55 u32 iter_entry = start_entry;
56 Entry* current = &storage[iter_entry - 1];
57 while (current->next_entry != 0) {
58 iter_entry = current->next_entry;
59 current = &storage[iter_entry - 1];
60 }
61 current->next_entry = entry_id;
62 }
63
64 std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
65 u32 iter_entry = start_entry;
66 Entry* previous{};
67 Entry* current = &storage[iter_entry - 1];
68 Entry* next{};
69 bool more_than_one_remaining = false;
70 u32 result_start{start_entry};
71 size_t count = 0;
72 while (current->value != value) {
73 count++;
74 previous = current;
75 iter_entry = current->next_entry;
76 current = &storage[iter_entry - 1];
77 }
78 // Find next
79 u32 next_entry = current->next_entry;
80 if (next_entry != 0) {
81 next = &storage[next_entry - 1];
82 more_than_one_remaining = next->next_entry != 0 || previous != nullptr;
83 }
84 if (previous) {
85 previous->next_entry = next_entry;
86 } else {
87 result_start = next_entry;
88 }
89 free_entries.emplace_back(iter_entry);
90 return std::make_pair(more_than_one_remaining || count > 1, result_start);
91 }
92
93 u32 ReleaseEntry(u32 start_entry) {
94 Entry* current = &storage[start_entry - 1];
95 free_entries.emplace_back(start_entry);
96 return current->value;
97 }
98
99private:
100 u32 RegisterImplementation(u32 value) {
101 auto entry_id = GetNewEntry();
102 auto& entry = storage[entry_id - 1];
103 entry.next_entry = 0;
104 entry.value = value;
105 return entry_id;
106 }
107 u32 GetNewEntry() {
108 if (!free_entries.empty()) {
109 u32 result = free_entries.front();
110 free_entries.pop_front();
111 return result;
112 }
113 storage.emplace_back();
114 u32 new_entry = static_cast<u32>(storage.size());
115 return new_entry;
116 }
117
118 struct Entry {
119 u32 next_entry{};
120 u32 value{};
121 };
122
123 std::deque<Entry> storage;
124 std::deque<u32> free_entries;
125};
126
127struct EmptyAllocator {
128 EmptyAllocator([[maybe_unused]] DAddr address) {}
129};
130
131} // namespace
132
133template <typename DTraits>
134struct DeviceMemoryManagerAllocator {
135 static constexpr size_t device_virtual_bits = DTraits::device_virtual_bits;
136 static constexpr DAddr first_address = 1ULL << Memory::YUZU_PAGEBITS;
137 static constexpr DAddr max_device_area = 1ULL << device_virtual_bits;
138
139 DeviceMemoryManagerAllocator() : main_allocator(first_address) {}
140
141 Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
142 MultiAddressContainer multi_dev_address;
143
144 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
145 template <bool pin_area>
146 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
147 return addr >= 0 && addr + size <= max_device_area;
148 }
149
150 DAddr Allocate(size_t size) {
151 return main_allocator.Allocate(size);
152 }
153
154 void AllocateFixed(DAddr b_address, size_t b_size) {
155 main_allocator.AllocateFixed(b_address, b_size);
156 }
157
158 void Free(DAddr b_address, size_t b_size) {
159 main_allocator.Free(b_address, b_size);
160 }
161};
162
163template <typename Traits>
164DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_)
165 : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())},
166 device_inter{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
167 compressed_device_addr(1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
168 Settings::MemoryLayout::Memory_4Gb
169 ? physical_min_bits
170 : physical_max_bits) -
171 Memory::YUZU_PAGEBITS)),
172 continuity_tracker(device_as_size >> Memory::YUZU_PAGEBITS),
173 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
174 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
175 cached_pages = std::make_unique<CachedPages>();
176
177 const size_t total_virtual = device_as_size >> Memory::YUZU_PAGEBITS;
178 for (size_t i = 0; i < total_virtual; i++) {
179 compressed_physical_ptr[i] = 0;
180 continuity_tracker[i] = 1;
181 cpu_backing_address[i] = 0;
182 }
183 const size_t total_phys = 1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
184 Settings::MemoryLayout::Memory_4Gb
185 ? physical_min_bits
186 : physical_max_bits) -
187 Memory::YUZU_PAGEBITS);
188 for (size_t i = 0; i < total_phys; i++) {
189 compressed_device_addr[i] = 0;
190 }
191}
192
193template <typename Traits>
194DeviceMemoryManager<Traits>::~DeviceMemoryManager() = default;
195
196template <typename Traits>
197void DeviceMemoryManager<Traits>::BindInterface(DeviceInterface* device_inter_) {
198 device_inter = device_inter_;
199}
200
201template <typename Traits>
202DAddr DeviceMemoryManager<Traits>::Allocate(size_t size) {
203 return impl->Allocate(size);
204}
205
206template <typename Traits>
207void DeviceMemoryManager<Traits>::AllocateFixed(DAddr start, size_t size) {
208 return impl->AllocateFixed(start, size);
209}
210
211template <typename Traits>
212void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
213 impl->Free(start, size);
214}
215
216template <typename Traits>
217void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
218 Asid asid, bool track) {
219 Core::Memory::Memory* process_memory = registered_processes[asid.id];
220 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
221 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
222 std::scoped_lock lk(mapping_guard);
223 for (size_t i = 0; i < num_pages; i++) {
224 const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
225 auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress));
226 if (ptr == nullptr) [[unlikely]] {
227 compressed_physical_ptr[start_page_d + i] = 0;
228 continue;
229 }
230 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
231 compressed_physical_ptr[start_page_d + i] = phys_addr;
232 InsertCPUBacking(start_page_d + i, new_vaddress, asid);
233 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
234 const u32 new_dev = static_cast<u32>(start_page_d + i);
235 if (base_dev == 0) [[likely]] {
236 compressed_device_addr[phys_addr - 1U] = new_dev;
237 continue;
238 }
239 u32 start_id = base_dev & MULTI_MASK;
240 if ((base_dev >> MULTI_FLAG_BITS) == 0) {
241 start_id = impl->multi_dev_address.Register(base_dev);
242 compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
243 }
244 impl->multi_dev_address.Register(new_dev, start_id);
245 }
246 if (track) {
247 TrackContinuityImpl(address, virtual_address, size, asid);
248 }
249}
250
251template <typename Traits>
252void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
253 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
254 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
255 device_inter->InvalidateRegion(address, size);
256 std::scoped_lock lk(mapping_guard);
257 for (size_t i = 0; i < num_pages; i++) {
258 auto phys_addr = compressed_physical_ptr[start_page_d + i];
259 compressed_physical_ptr[start_page_d + i] = 0;
260 cpu_backing_address[start_page_d + i] = 0;
261 if (phys_addr != 0) [[likely]] {
262 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
263 if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
264 compressed_device_addr[phys_addr - 1] = 0;
265 continue;
266 }
267 const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
268 static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
269 if (!more_entries) {
270 compressed_device_addr[phys_addr - 1] =
271 impl->multi_dev_address.ReleaseEntry(new_start);
272 continue;
273 }
274 compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
275 }
276 }
277}
278template <typename Traits>
279void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
280 size_t size, Asid asid) {
281 Core::Memory::Memory* process_memory = registered_processes[asid.id];
282 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
283 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
284 uintptr_t last_ptr = 0;
285 size_t page_count = 1;
286 for (size_t i = num_pages; i > 0; i--) {
287 size_t index = i - 1;
288 const VAddr new_vaddress = virtual_address + index * Memory::YUZU_PAGESIZE;
289 const uintptr_t new_ptr = reinterpret_cast<uintptr_t>(
290 process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)));
291 if (new_ptr + page_size == last_ptr) {
292 page_count++;
293 } else {
294 page_count = 1;
295 }
296 last_ptr = new_ptr;
297 continuity_tracker[start_page_d + index] = static_cast<u32>(page_count);
298 }
299}
300template <typename Traits>
301u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) {
302 size_t page_index = src_addr >> page_bits;
303 size_t subbits = src_addr & page_mask;
304 if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
305 return GetPointer<u8>(src_addr);
306 }
307 return nullptr;
308}
309
310template <typename Traits>
311const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) const {
312 size_t page_index = src_addr >> page_bits;
313 size_t subbits = src_addr & page_mask;
314 if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
315 return GetPointer<u8>(src_addr);
316 }
317 return nullptr;
318}
319
320template <typename Traits>
321void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
322 PAddr address) {
323 size_t phys_addr = address >> page_bits;
324 std::scoped_lock lk(mapping_guard);
325 u32 backing = compressed_device_addr[phys_addr];
326 if ((backing >> MULTI_FLAG_BITS) != 0) {
327 impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
328 return;
329 }
330 buffer.resize(1);
331 buffer[0] = backing;
332}
333
334template <typename Traits>
335template <typename T>
336T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
337 const size_t index = address >> Memory::YUZU_PAGEBITS;
338 const size_t offset = address & Memory::YUZU_PAGEMASK;
339 auto phys_addr = compressed_physical_ptr[index];
340 if (phys_addr == 0) [[unlikely]] {
341 return nullptr;
342 }
343 return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
344 offset);
345}
346
347template <typename Traits>
348template <typename T>
349const T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) const {
350 const size_t index = address >> Memory::YUZU_PAGEBITS;
351 const size_t offset = address & Memory::YUZU_PAGEMASK;
352 auto phys_addr = compressed_physical_ptr[index];
353 if (phys_addr == 0) [[unlikely]] {
354 return nullptr;
355 }
356 return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
357 offset);
358}
359
360template <typename Traits>
361template <typename T>
362void DeviceMemoryManager<Traits>::Write(DAddr address, T value) {
363 T* ptr = GetPointer<T>(address);
364 if (!ptr) [[unlikely]] {
365 return;
366 }
367 std::memcpy(ptr, &value, sizeof(T));
368}
369
370template <typename Traits>
371template <typename T>
372T DeviceMemoryManager<Traits>::Read(DAddr address) const {
373 const T* ptr = GetPointer<T>(address);
374 T result{};
375 if (!ptr) [[unlikely]] {
376 return result;
377 }
378 std::memcpy(&result, ptr, sizeof(T));
379 return result;
380}
381
382template <typename Traits>
383void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto on_unmapped,
384 auto on_memory, auto increment) {
385 std::size_t remaining_size = size;
386 std::size_t page_index = addr >> Memory::YUZU_PAGEBITS;
387 std::size_t page_offset = addr & Memory::YUZU_PAGEMASK;
388
389 while (remaining_size) {
390 const size_t next_pages = static_cast<std::size_t>(continuity_tracker[page_index]);
391 const std::size_t copy_amount =
392 std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size);
393 const auto current_vaddr =
394 static_cast<u64>((page_index << Memory::YUZU_PAGEBITS) + page_offset);
395 SCOPE_EXIT({
396 page_index += next_pages;
397 page_offset = 0;
398 increment(copy_amount);
399 remaining_size -= copy_amount;
400 });
401
402 auto phys_addr = compressed_physical_ptr[page_index];
403 if (phys_addr == 0) {
404 on_unmapped(copy_amount, current_vaddr);
405 continue;
406 }
407 auto* mem_ptr = GetPointerFromRaw<u8>(
408 (static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + page_offset);
409 on_memory(copy_amount, mem_ptr);
410 }
411}
412
413template <typename Traits>
414void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) {
415 device_inter->FlushRegion(address, size);
416 WalkBlock(
417 address, size,
418 [&](size_t copy_amount, DAddr current_vaddr) {
419 LOG_ERROR(
420 HW_Memory,
421 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
422 current_vaddr, address, size);
423 std::memset(dest_pointer, 0, copy_amount);
424 },
425 [&](size_t copy_amount, const u8* const src_ptr) {
426 std::memcpy(dest_pointer, src_ptr, copy_amount);
427 },
428 [&](const std::size_t copy_amount) {
429 dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
430 });
431}
432
433template <typename Traits>
434void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) {
435 WalkBlock(
436 address, size,
437 [&](size_t copy_amount, DAddr current_vaddr) {
438 LOG_ERROR(
439 HW_Memory,
440 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
441 current_vaddr, address, size);
442 },
443 [&](size_t copy_amount, u8* const dst_ptr) {
444 std::memcpy(dst_ptr, src_pointer, copy_amount);
445 },
446 [&](const std::size_t copy_amount) {
447 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
448 });
449 device_inter->InvalidateRegion(address, size);
450}
451
452template <typename Traits>
453void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) {
454 WalkBlock(
455 address, size,
456 [&](size_t copy_amount, DAddr current_vaddr) {
457 LOG_ERROR(
458 HW_Memory,
459 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
460 current_vaddr, address, size);
461 std::memset(dest_pointer, 0, copy_amount);
462 },
463 [&](size_t copy_amount, const u8* const src_ptr) {
464 std::memcpy(dest_pointer, src_ptr, copy_amount);
465 },
466 [&](const std::size_t copy_amount) {
467 dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
468 });
469}
470
471template <typename Traits>
472void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
473 size_t size) {
474 WalkBlock(
475 address, size,
476 [&](size_t copy_amount, DAddr current_vaddr) {
477 LOG_ERROR(
478 HW_Memory,
479 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
480 current_vaddr, address, size);
481 },
482 [&](size_t copy_amount, u8* const dst_ptr) {
483 std::memcpy(dst_ptr, src_pointer, copy_amount);
484 },
485 [&](const std::size_t copy_amount) {
486 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
487 });
488}
489
490template <typename Traits>
491Asid DeviceMemoryManager<Traits>::RegisterProcess(Memory::Memory* memory_device_inter) {
492 size_t new_id{};
493 if (!id_pool.empty()) {
494 new_id = id_pool.front();
495 id_pool.pop_front();
496 registered_processes[new_id] = memory_device_inter;
497 } else {
498 registered_processes.emplace_back(memory_device_inter);
499 new_id = registered_processes.size() - 1U;
500 }
501 return Asid{new_id};
502}
503
504template <typename Traits>
505void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) {
506 registered_processes[asid.id] = nullptr;
507 id_pool.push_front(asid.id);
508}
509
510template <typename Traits>
511void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
512 std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock);
513 const auto Lock = [&] {
514 if (!lk) {
515 lk.lock();
516 }
517 };
518 u64 uncache_begin = 0;
519 u64 cache_begin = 0;
520 u64 uncache_bytes = 0;
521 u64 cache_bytes = 0;
522 const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
523
524 std::atomic_thread_fence(std::memory_order_acquire);
525 const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
526 size_t page = addr >> Memory::YUZU_PAGEBITS;
527 auto [asid, base_vaddress] = ExtractCPUBacking(page);
528 size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
529 auto* memory_device_inter = registered_processes[asid.id];
530 for (; page != page_end; ++page) {
531 std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page);
532
533 if (delta > 0) {
534 ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(),
535 "Count may overflow!");
536 } else if (delta < 0) {
537 ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
538 } else {
539 ASSERT_MSG(false, "Delta must be non-zero!");
540 }
541
542 // Adds or subtracts 1, as count is a unsigned 8-bit value
543 count.fetch_add(static_cast<u8>(delta), std::memory_order_release);
544
545 // Assume delta is either -1 or 1
546 if (count.load(std::memory_order::relaxed) == 0) {
547 if (uncache_bytes == 0) {
548 uncache_begin = vpage;
549 }
550 uncache_bytes += Memory::YUZU_PAGESIZE;
551 } else if (uncache_bytes > 0) {
552 Lock();
553 MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
554 uncache_bytes, false);
555 uncache_bytes = 0;
556 }
557 if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
558 if (cache_bytes == 0) {
559 cache_begin = vpage;
560 }
561 cache_bytes += Memory::YUZU_PAGESIZE;
562 } else if (cache_bytes > 0) {
563 Lock();
564 MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
565 true);
566 cache_bytes = 0;
567 }
568 vpage++;
569 }
570 if (uncache_bytes > 0) {
571 Lock();
572 MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
573 false);
574 }
575 if (cache_bytes > 0) {
576 Lock();
577 MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
578 true);
579 }
580}
581
582} // namespace Core
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
index 9687531e8..cc8fc176f 100644
--- a/src/core/gpu_dirty_memory_manager.h
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -10,7 +10,7 @@
10#include <utility> 10#include <utility>
11#include <vector> 11#include <vector>
12 12
13#include "core/memory.h" 13#include "core/device_memory_manager.h"
14 14
15namespace Core { 15namespace Core {
16 16
@@ -23,7 +23,7 @@ public:
23 23
24 ~GPUDirtyMemoryManager() = default; 24 ~GPUDirtyMemoryManager() = default;
25 25
26 void Collect(VAddr address, size_t size) { 26 void Collect(PAddr address, size_t size) {
27 TransformAddress t = BuildTransform(address, size); 27 TransformAddress t = BuildTransform(address, size);
28 TransformAddress tmp, original; 28 TransformAddress tmp, original;
29 do { 29 do {
@@ -47,7 +47,7 @@ public:
47 std::memory_order_relaxed)); 47 std::memory_order_relaxed));
48 } 48 }
49 49
50 void Gather(std::function<void(VAddr, size_t)>& callback) { 50 void Gather(std::function<void(PAddr, size_t)>& callback) {
51 { 51 {
52 std::scoped_lock lk(guard); 52 std::scoped_lock lk(guard);
53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); 53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
@@ -65,7 +65,7 @@ public:
65 mask = mask >> empty_bits; 65 mask = mask >> empty_bits;
66 66
67 const size_t continuous_bits = std::countr_one(mask); 67 const size_t continuous_bits = std::countr_one(mask);
68 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, 68 callback((static_cast<PAddr>(transform.address) << page_bits) + offset,
69 continuous_bits << align_bits); 69 continuous_bits << align_bits);
70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; 70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
71 offset += continuous_bits << align_bits; 71 offset += continuous_bits << align_bits;
@@ -80,7 +80,7 @@ private:
80 u32 mask; 80 u32 mask;
81 }; 81 };
82 82
83 constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; 83 constexpr static size_t page_bits = DEVICE_PAGEBITS - 1;
84 constexpr static size_t page_size = 1ULL << page_bits; 84 constexpr static size_t page_size = 1ULL << page_bits;
85 constexpr static size_t page_mask = page_size - 1; 85 constexpr static size_t page_mask = page_size - 1;
86 86
@@ -89,7 +89,7 @@ private:
89 constexpr static size_t align_mask = align_size - 1; 89 constexpr static size_t align_mask = align_size - 1;
90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; 90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
91 91
92 bool IsValid(VAddr address) { 92 bool IsValid(PAddr address) {
93 return address < (1ULL << 39); 93 return address < (1ULL << 39);
94 } 94 }
95 95
@@ -103,7 +103,7 @@ private:
103 return mask; 103 return mask;
104 } 104 }
105 105
106 TransformAddress BuildTransform(VAddr address, size_t size) { 106 TransformAddress BuildTransform(PAddr address, size_t size) {
107 const size_t minor_address = address & page_mask; 107 const size_t minor_address = address & page_mask;
108 const size_t minor_bit = minor_address >> align_bits; 108 const size_t minor_bit = minor_address >> align_bits;
109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits; 109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h
new file mode 100644
index 000000000..7ee18c126
--- /dev/null
+++ b/src/core/guest_memory.h
@@ -0,0 +1,214 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <iterator>
7#include <memory>
8#include <optional>
9#include <span>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/scratch_buffer.h"
14
15namespace Core::Memory {
16
17enum GuestMemoryFlags : u32 {
18 Read = 1 << 0,
19 Write = 1 << 1,
20 Safe = 1 << 2,
21 Cached = 1 << 3,
22
23 SafeRead = Read | Safe,
24 SafeWrite = Write | Safe,
25 SafeReadWrite = SafeRead | SafeWrite,
26 SafeReadCachedWrite = SafeReadWrite | Cached,
27
28 UnsafeRead = Read,
29 UnsafeWrite = Write,
30 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
31 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
32};
33
34namespace {
35template <typename M, typename T, GuestMemoryFlags FLAGS>
36class GuestMemory {
37 using iterator = T*;
38 using const_iterator = const T*;
39 using value_type = T;
40 using element_type = T;
41 using iterator_category = std::contiguous_iterator_tag;
42
43public:
44 GuestMemory() = delete;
45 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
46 Common::ScratchBuffer<T>* backup = nullptr)
47 : m_memory{memory}, m_addr{addr}, m_size{size} {
48 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
49 if constexpr (FLAGS & GuestMemoryFlags::Read) {
50 Read(addr, size, backup);
51 }
52 }
53
54 ~GuestMemory() = default;
55
56 T* data() noexcept {
57 return m_data_span.data();
58 }
59
60 const T* data() const noexcept {
61 return m_data_span.data();
62 }
63
64 size_t size() const noexcept {
65 return m_size;
66 }
67
68 size_t size_bytes() const noexcept {
69 return this->size() * sizeof(T);
70 }
71
72 [[nodiscard]] T* begin() noexcept {
73 return this->data();
74 }
75
76 [[nodiscard]] const T* begin() const noexcept {
77 return this->data();
78 }
79
80 [[nodiscard]] T* end() noexcept {
81 return this->data() + this->size();
82 }
83
84 [[nodiscard]] const T* end() const noexcept {
85 return this->data() + this->size();
86 }
87
88 T& operator[](size_t index) noexcept {
89 return m_data_span[index];
90 }
91
92 const T& operator[](size_t index) const noexcept {
93 return m_data_span[index];
94 }
95
96 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
97 m_addr = addr;
98 m_size = size;
99 m_addr_changed = true;
100 }
101
102 std::span<T> Read(u64 addr, std::size_t size,
103 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
104 m_addr = addr;
105 m_size = size;
106 if (m_size == 0) {
107 m_is_data_copy = true;
108 return {};
109 }
110
111 if (this->TrySetSpan()) {
112 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
113 m_memory.FlushRegion(m_addr, this->size_bytes());
114 }
115 } else {
116 if (backup) {
117 backup->resize_destructive(this->size());
118 m_data_span = *backup;
119 } else {
120 m_data_copy.resize(this->size());
121 m_data_span = std::span(m_data_copy);
122 }
123 m_is_data_copy = true;
124 m_span_valid = true;
125 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
126 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
127 } else {
128 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
129 }
130 }
131 return m_data_span;
132 }
133
134 void Write(std::span<T> write_data) noexcept {
135 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
136 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
137 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
138 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
139 } else {
140 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
141 }
142 }
143
144 bool TrySetSpan() noexcept {
145 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
146 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
147 m_span_valid = true;
148 return true;
149 }
150 return false;
151 }
152
153protected:
154 bool IsDataCopy() const noexcept {
155 return m_is_data_copy;
156 }
157
158 bool AddressChanged() const noexcept {
159 return m_addr_changed;
160 }
161
162 M& m_memory;
163 u64 m_addr{};
164 size_t m_size{};
165 std::span<T> m_data_span{};
166 std::vector<T> m_data_copy{};
167 bool m_span_valid{false};
168 bool m_is_data_copy{false};
169 bool m_addr_changed{false};
170};
171
172template <typename M, typename T, GuestMemoryFlags FLAGS>
173class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
174public:
175 GuestMemoryScoped() = delete;
176 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
177 Common::ScratchBuffer<T>* backup = nullptr)
178 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
179 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
180 if (!this->TrySetSpan()) {
181 if (backup) {
182 this->m_data_span = *backup;
183 this->m_span_valid = true;
184 this->m_is_data_copy = true;
185 }
186 }
187 }
188 }
189
190 ~GuestMemoryScoped() {
191 if constexpr (FLAGS & GuestMemoryFlags::Write) {
192 if (this->size() == 0) [[unlikely]] {
193 return;
194 }
195
196 if (this->AddressChanged() || this->IsDataCopy()) {
197 ASSERT(this->m_span_valid);
198 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
199 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
200 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
201 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
202 } else {
203 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
204 }
205 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
206 (FLAGS & GuestMemoryFlags::Cached)) {
207 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
208 }
209 }
210 }
211};
212} // namespace
213
214} // namespace Core::Memory
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 53735a225..0b08e877e 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -5,6 +5,7 @@
5#include "common/scope_exit.h" 5#include "common/scope_exit.h"
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/gpu_dirty_memory_manager.h"
8#include "core/hle/kernel/k_process.h" 9#include "core/hle/kernel/k_process.h"
9#include "core/hle/kernel/k_scoped_resource_reservation.h" 10#include "core/hle/kernel/k_scoped_resource_reservation.h"
10#include "core/hle/kernel/k_shared_memory.h" 11#include "core/hle/kernel/k_shared_memory.h"
@@ -320,7 +321,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
320 321
321 // Ensure our memory is initialized. 322 // Ensure our memory is initialized.
322 m_memory.SetCurrentPageTable(*this); 323 m_memory.SetCurrentPageTable(*this);
323 m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); 324 m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager());
324 325
325 // Ensure we can insert the code region. 326 // Ensure we can insert the code region.
326 R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, 327 R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize,
@@ -417,7 +418,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
417 418
418 // Ensure our memory is initialized. 419 // Ensure our memory is initialized.
419 m_memory.SetCurrentPageTable(*this); 420 m_memory.SetCurrentPageTable(*this);
420 m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); 421 m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager());
421 422
422 // Ensure we can insert the code region. 423 // Ensure we can insert the code region.
423 R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), 424 R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code),
@@ -1141,8 +1142,7 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {}
1141KProcess::KProcess(KernelCore& kernel) 1142KProcess::KProcess(KernelCore& kernel)
1142 : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, 1143 : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel},
1143 m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, 1144 m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()},
1144 m_handle_table{kernel}, m_dirty_memory_managers{}, 1145 m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {}
1145 m_exclusive_monitor{}, m_memory{kernel.System()} {}
1146KProcess::~KProcess() = default; 1146KProcess::~KProcess() = default;
1147 1147
1148Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, 1148Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
@@ -1324,10 +1324,4 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT
1324 return true; 1324 return true;
1325} 1325}
1326 1326
1327void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
1328 for (auto& manager : m_dirty_memory_managers) {
1329 manager.Gather(callback);
1330 }
1331}
1332
1333} // namespace Kernel 1327} // namespace Kernel
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 53c0e3316..ab1358a12 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -7,7 +7,6 @@
7 7
8#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
9#include "core/file_sys/program_metadata.h" 9#include "core/file_sys/program_metadata.h"
10#include "core/gpu_dirty_memory_manager.h"
11#include "core/hle/kernel/code_set.h" 10#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/k_address_arbiter.h" 11#include "core/hle/kernel/k_address_arbiter.h"
13#include "core/hle/kernel/k_capabilities.h" 12#include "core/hle/kernel/k_capabilities.h"
@@ -128,7 +127,6 @@ private:
128#ifdef HAS_NCE 127#ifdef HAS_NCE
129 std::unordered_map<u64, u64> m_post_handlers{}; 128 std::unordered_map<u64, u64> m_post_handlers{};
130#endif 129#endif
131 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers;
132 std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; 130 std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
133 Core::Memory::Memory m_memory; 131 Core::Memory::Memory m_memory;
134 132
@@ -511,8 +509,6 @@ public:
511 return m_memory; 509 return m_memory;
512 } 510 }
513 511
514 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
515
516 Core::ExclusiveMonitor& GetExclusiveMonitor() const { 512 Core::ExclusiveMonitor& GetExclusiveMonitor() const {
517 return *m_exclusive_monitor; 513 return *m_exclusive_monitor;
518 } 514 }
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp
index 3f38ceb03..e491dd260 100644
--- a/src/core/hle/service/hle_ipc.cpp
+++ b/src/core/hle/service/hle_ipc.cpp
@@ -12,6 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "common/scratch_buffer.h" 14#include "common/scratch_buffer.h"
15#include "core/guest_memory.h"
15#include "core/hle/kernel/k_auto_object.h" 16#include "core/hle/kernel/k_auto_object.h"
16#include "core/hle/kernel/k_handle_table.h" 17#include "core/hle/kernel/k_handle_table.h"
17#include "core/hle/kernel/k_process.h" 18#include "core/hle/kernel/k_process.h"
@@ -23,19 +24,6 @@
23#include "core/hle/service/ipc_helpers.h" 24#include "core/hle/service/ipc_helpers.h"
24#include "core/memory.h" 25#include "core/memory.h"
25 26
26namespace {
27static thread_local std::array read_buffer_data_a{
28 Common::ScratchBuffer<u8>(),
29 Common::ScratchBuffer<u8>(),
30 Common::ScratchBuffer<u8>(),
31};
32static thread_local std::array read_buffer_data_x{
33 Common::ScratchBuffer<u8>(),
34 Common::ScratchBuffer<u8>(),
35 Common::ScratchBuffer<u8>(),
36};
37} // Anonymous namespace
38
39namespace Service { 27namespace Service {
40 28
41SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) 29SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_)
@@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
343} 331}
344 332
345std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { 333std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const {
346 static thread_local std::array read_buffer_a{ 334 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
347 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
348 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
349 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
350 };
351 335
352 ASSERT_OR_EXECUTE_MSG( 336 ASSERT_OR_EXECUTE_MSG(
353 BufferDescriptorA().size() > buffer_index, { return {}; }, 337 BufferDescriptorA().size() > buffer_index, { return {}; },
354 "BufferDescriptorA invalid buffer_index {}", buffer_index); 338 "BufferDescriptorA invalid buffer_index {}", buffer_index);
355 auto& read_buffer = read_buffer_a[buffer_index]; 339 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
356 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 340 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
357 BufferDescriptorA()[buffer_index].Size(),
358 &read_buffer_data_a[buffer_index]);
359} 341}
360 342
361std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { 343std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const {
362 static thread_local std::array read_buffer_x{ 344 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
363 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
364 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
365 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
366 };
367 345
368 ASSERT_OR_EXECUTE_MSG( 346 ASSERT_OR_EXECUTE_MSG(
369 BufferDescriptorX().size() > buffer_index, { return {}; }, 347 BufferDescriptorX().size() > buffer_index, { return {}; },
370 "BufferDescriptorX invalid buffer_index {}", buffer_index); 348 "BufferDescriptorX invalid buffer_index {}", buffer_index);
371 auto& read_buffer = read_buffer_x[buffer_index]; 349 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
372 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 350 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
373 BufferDescriptorX()[buffer_index].Size(),
374 &read_buffer_data_x[buffer_index]);
375} 351}
376 352
377std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 353std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
378 static thread_local std::array read_buffer_a{ 354 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
379 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
380 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
381 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
382 };
383 static thread_local std::array read_buffer_x{
384 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
385 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
386 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
387 };
388 355
389 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 356 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
390 BufferDescriptorA()[buffer_index].Size()}; 357 BufferDescriptorA()[buffer_index].Size()};
@@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
401 ASSERT_OR_EXECUTE_MSG( 368 ASSERT_OR_EXECUTE_MSG(
402 BufferDescriptorA().size() > buffer_index, { return {}; }, 369 BufferDescriptorA().size() > buffer_index, { return {}; },
403 "BufferDescriptorA invalid buffer_index {}", buffer_index); 370 "BufferDescriptorA invalid buffer_index {}", buffer_index);
404 auto& read_buffer = read_buffer_a[buffer_index]; 371 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
405 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 372 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
406 BufferDescriptorA()[buffer_index].Size(),
407 &read_buffer_data_a[buffer_index]);
408 } else { 373 } else {
409 ASSERT_OR_EXECUTE_MSG( 374 ASSERT_OR_EXECUTE_MSG(
410 BufferDescriptorX().size() > buffer_index, { return {}; }, 375 BufferDescriptorX().size() > buffer_index, { return {}; },
411 "BufferDescriptorX invalid buffer_index {}", buffer_index); 376 "BufferDescriptorX invalid buffer_index {}", buffer_index);
412 auto& read_buffer = read_buffer_x[buffer_index]; 377 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
413 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 378 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
414 BufferDescriptorX()[buffer_index].Size(),
415 &read_buffer_data_x[buffer_index]);
416 } 379 }
417} 380}
418 381
diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h
index 440737db5..8329d7265 100644
--- a/src/core/hle/service/hle_ipc.h
+++ b/src/core/hle/service/hle_ipc.h
@@ -41,6 +41,8 @@ class KernelCore;
41class KHandleTable; 41class KHandleTable;
42class KProcess; 42class KProcess;
43class KServerSession; 43class KServerSession;
44template <typename T>
45class KScopedAutoObject;
44class KThread; 46class KThread;
45} // namespace Kernel 47} // namespace Kernel
46 48
@@ -424,6 +426,9 @@ private:
424 426
425 Kernel::KernelCore& kernel; 427 Kernel::KernelCore& kernel;
426 Core::Memory::Memory& memory; 428 Core::Memory::Memory& memory;
429
430 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{};
431 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{};
427}; 432};
428 433
429} // namespace Service 434} // namespace Service
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index 37ca24f5d..21ef57d27 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -2,27 +2,135 @@
2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors 2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include <atomic>
6#include <deque>
7#include <mutex>
8
9#include "core/hle/kernel/k_process.h"
5#include "core/hle/service/nvdrv/core/container.h" 10#include "core/hle/service/nvdrv/core/container.h"
11#include "core/hle/service/nvdrv/core/heap_mapper.h"
6#include "core/hle/service/nvdrv/core/nvmap.h" 12#include "core/hle/service/nvdrv/core/nvmap.h"
7#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 13#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
14#include "core/memory.h"
8#include "video_core/host1x/host1x.h" 15#include "video_core/host1x/host1x.h"
9 16
10namespace Service::Nvidia::NvCore { 17namespace Service::Nvidia::NvCore {
11 18
19Session::Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_)
20 : id{id_}, process{process_}, asid{asid_}, has_preallocated_area{}, mapper{}, is_active{} {}
21
22Session::~Session() = default;
23
12struct ContainerImpl { 24struct ContainerImpl {
13 explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) 25 explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_)
14 : file{host1x_}, manager{host1x_}, device_file_data{} {} 26 : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {}
27 Tegra::Host1x::Host1x& host1x;
15 NvMap file; 28 NvMap file;
16 SyncpointManager manager; 29 SyncpointManager manager;
17 Container::Host1xDeviceFileData device_file_data; 30 Container::Host1xDeviceFileData device_file_data;
31 std::deque<Session> sessions;
32 size_t new_ids{};
33 std::deque<size_t> id_pool;
34 std::mutex session_guard;
18}; 35};
19 36
20Container::Container(Tegra::Host1x::Host1x& host1x_) { 37Container::Container(Tegra::Host1x::Host1x& host1x_) {
21 impl = std::make_unique<ContainerImpl>(host1x_); 38 impl = std::make_unique<ContainerImpl>(*this, host1x_);
22} 39}
23 40
24Container::~Container() = default; 41Container::~Container() = default;
25 42
43SessionId Container::OpenSession(Kernel::KProcess* process) {
44 using namespace Common::Literals;
45
46 std::scoped_lock lk(impl->session_guard);
47 for (auto& session : impl->sessions) {
48 if (!session.is_active) {
49 continue;
50 }
51 if (session.process == process) {
52 return session.id;
53 }
54 }
55 size_t new_id{};
56 auto* memory_interface = &process->GetMemory();
57 auto& smmu = impl->host1x.MemoryManager();
58 auto asid = smmu.RegisterProcess(memory_interface);
59 if (!impl->id_pool.empty()) {
60 new_id = impl->id_pool.front();
61 impl->id_pool.pop_front();
62 impl->sessions[new_id] = Session{SessionId{new_id}, process, asid};
63 } else {
64 new_id = impl->new_ids++;
65 impl->sessions.emplace_back(SessionId{new_id}, process, asid);
66 }
67 auto& session = impl->sessions[new_id];
68 session.is_active = true;
69 // Optimization
70 if (process->IsApplication()) {
71 auto& page_table = process->GetPageTable().GetBasePageTable();
72 auto heap_start = page_table.GetHeapRegionStart();
73
74 Kernel::KProcessAddress cur_addr = heap_start;
75 size_t region_size = 0;
76 VAddr region_start = 0;
77 while (true) {
78 Kernel::KMemoryInfo mem_info{};
79 Kernel::Svc::PageInfo page_info{};
80 R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
81 cur_addr));
82 auto svc_mem_info = mem_info.GetSvcMemoryInfo();
83
84 // Check if this memory block is heap.
85 if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
86 if (svc_mem_info.size > region_size) {
87 region_size = svc_mem_info.size;
88 region_start = svc_mem_info.base_address;
89 }
90 }
91
92 // Check if we're done.
93 const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
94 if (next_address <= GetInteger(cur_addr)) {
95 break;
96 }
97
98 cur_addr = next_address;
99 }
100 session.has_preallocated_area = false;
101 auto start_region = region_size >= 32_MiB ? smmu.Allocate(region_size) : 0;
102 if (start_region != 0) {
103 session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
104 asid, impl->host1x);
105 smmu.TrackContinuity(start_region, region_start, region_size, asid);
106 session.has_preallocated_area = true;
107 LOG_DEBUG(Debug, "Preallocation created!");
108 }
109 }
110 return SessionId{new_id};
111}
112
113void Container::CloseSession(SessionId session_id) {
114 std::scoped_lock lk(impl->session_guard);
115 auto& session = impl->sessions[session_id.id];
116 auto& smmu = impl->host1x.MemoryManager();
117 if (session.has_preallocated_area) {
118 const DAddr region_start = session.mapper->GetRegionStart();
119 const size_t region_size = session.mapper->GetRegionSize();
120 session.mapper.reset();
121 smmu.Free(region_start, region_size);
122 session.has_preallocated_area = false;
123 }
124 session.is_active = false;
125 smmu.UnregisterProcess(impl->sessions[session_id.id].asid);
126 impl->id_pool.emplace_front(session_id.id);
127}
128
129Session* Container::GetSession(SessionId session_id) {
130 std::atomic_thread_fence(std::memory_order_acquire);
131 return &impl->sessions[session_id.id];
132}
133
26NvMap& Container::GetNvMapFile() { 134NvMap& Container::GetNvMapFile() {
27 return impl->file; 135 return impl->file;
28} 136}
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index b4b63ac90..b4d3938a8 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -8,24 +8,56 @@
8#include <memory> 8#include <memory>
9#include <unordered_map> 9#include <unordered_map>
10 10
11#include "core/device_memory_manager.h"
11#include "core/hle/service/nvdrv/nvdata.h" 12#include "core/hle/service/nvdrv/nvdata.h"
12 13
14namespace Kernel {
15class KProcess;
16}
17
13namespace Tegra::Host1x { 18namespace Tegra::Host1x {
14class Host1x; 19class Host1x;
15} // namespace Tegra::Host1x 20} // namespace Tegra::Host1x
16 21
17namespace Service::Nvidia::NvCore { 22namespace Service::Nvidia::NvCore {
18 23
24class HeapMapper;
19class NvMap; 25class NvMap;
20class SyncpointManager; 26class SyncpointManager;
21 27
22struct ContainerImpl; 28struct ContainerImpl;
23 29
30struct SessionId {
31 size_t id;
32};
33
34struct Session {
35 Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_);
36 ~Session();
37
38 Session(const Session&) = delete;
39 Session& operator=(const Session&) = delete;
40 Session(Session&&) = default;
41 Session& operator=(Session&&) = default;
42
43 SessionId id;
44 Kernel::KProcess* process;
45 Core::Asid asid;
46 bool has_preallocated_area{};
47 std::unique_ptr<HeapMapper> mapper{};
48 bool is_active{};
49};
50
24class Container { 51class Container {
25public: 52public:
26 explicit Container(Tegra::Host1x::Host1x& host1x); 53 explicit Container(Tegra::Host1x::Host1x& host1x);
27 ~Container(); 54 ~Container();
28 55
56 SessionId OpenSession(Kernel::KProcess* process);
57 void CloseSession(SessionId id);
58
59 Session* GetSession(SessionId id);
60
29 NvMap& GetNvMapFile(); 61 NvMap& GetNvMapFile();
30 62
31 const NvMap& GetNvMapFile() const; 63 const NvMap& GetNvMapFile() const;
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
new file mode 100644
index 000000000..096dc5deb
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
@@ -0,0 +1,175 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <mutex>
5
6#include <boost/container/small_vector.hpp>
7#define BOOST_NO_MT
8#include <boost/pool/detail/mutex.hpp>
9#undef BOOST_NO_MT
10#include <boost/icl/interval.hpp>
11#include <boost/icl/interval_base_set.hpp>
12#include <boost/icl/interval_set.hpp>
13#include <boost/icl/split_interval_map.hpp>
14#include <boost/pool/pool.hpp>
15#include <boost/pool/pool_alloc.hpp>
16#include <boost/pool/poolfwd.hpp>
17
18#include "core/hle/service/nvdrv/core/heap_mapper.h"
19#include "video_core/host1x/host1x.h"
20
21namespace boost {
22template <typename T>
23class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
24}
25
26namespace Service::Nvidia::NvCore {
27
28using IntervalCompare = std::less<DAddr>;
29using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
30using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
31using IntervalSet = boost::icl::interval_set<DAddr>;
32using IntervalType = typename IntervalSet::interval_type;
33
34template <typename Type>
35struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
36 // types
37 typedef counter_add_functor<Type> type;
38 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
39
40 // public member functions
41 void operator()(Type& current, const Type& added) const {
42 current += added;
43 if (current < base_type::identity_element()) {
44 current = base_type::identity_element();
45 }
46 }
47
48 // public static functions
49 static void version(Type&){};
50};
51
52using OverlapCombine = counter_add_functor<int>;
53using OverlapSection = boost::icl::inter_section<int>;
54using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
55
56struct HeapMapper::HeapMapperInternal {
57 HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
58 ~HeapMapperInternal() = default;
59
60 template <typename Func>
61 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
62 Func&& func) {
63 const DAddr start_address = cpu_addr;
64 const DAddr end_address = start_address + size;
65 const IntervalType search_interval{start_address, end_address};
66 auto it = current_range.lower_bound(search_interval);
67 if (it == current_range.end()) {
68 return;
69 }
70 auto end_it = current_range.upper_bound(search_interval);
71 for (; it != end_it; it++) {
72 auto& inter = it->first;
73 DAddr inter_addr_end = inter.upper();
74 DAddr inter_addr = inter.lower();
75 if (inter_addr_end > end_address) {
76 inter_addr_end = end_address;
77 }
78 if (inter_addr < start_address) {
79 inter_addr = start_address;
80 }
81 func(inter_addr, inter_addr_end, it->second);
82 }
83 }
84
85 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
86 const IntervalType search_interval, int subtract_value) {
87 bool any_removals = false;
88 current_range.add(std::make_pair(search_interval, subtract_value));
89 do {
90 any_removals = false;
91 auto it = current_range.lower_bound(search_interval);
92 if (it == current_range.end()) {
93 return;
94 }
95 auto end_it = current_range.upper_bound(search_interval);
96 for (; it != end_it; it++) {
97 if (it->second <= 0) {
98 any_removals = true;
99 current_range.erase(it);
100 break;
101 }
102 }
103 } while (any_removals);
104 }
105
106 IntervalSet base_set;
107 OverlapCounter mapping_overlaps;
108 Tegra::MaxwellDeviceMemoryManager& device_memory;
109 std::mutex guard;
110};
111
112HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
113 Tegra::Host1x::Host1x& host1x)
114 : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_asid{asid} {
115 m_internal = std::make_unique<HeapMapperInternal>(host1x);
116}
117
118HeapMapper::~HeapMapper() {
119 m_internal->device_memory.Unmap(m_daddress, m_size);
120}
121
122DAddr HeapMapper::Map(VAddr start, size_t size) {
123 std::scoped_lock lk(m_internal->guard);
124 m_internal->base_set.clear();
125 const IntervalType interval{start, start + size};
126 m_internal->base_set.insert(interval);
127 m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
128 [this](VAddr start_addr, VAddr end_addr, int) {
129 const IntervalType other{start_addr, end_addr};
130 m_internal->base_set.subtract(other);
131 });
132 if (!m_internal->base_set.empty()) {
133 auto it = m_internal->base_set.begin();
134 auto end_it = m_internal->base_set.end();
135 for (; it != end_it; it++) {
136 const VAddr inter_addr_end = it->upper();
137 const VAddr inter_addr = it->lower();
138 const size_t offset = inter_addr - m_vaddress;
139 const size_t sub_size = inter_addr_end - inter_addr;
140 m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size,
141 m_asid);
142 }
143 }
144 m_internal->mapping_overlaps += std::make_pair(interval, 1);
145 m_internal->base_set.clear();
146 return m_daddress + (start - m_vaddress);
147}
148
149void HeapMapper::Unmap(VAddr start, size_t size) {
150 std::scoped_lock lk(m_internal->guard);
151 m_internal->base_set.clear();
152 m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
153 [this](VAddr start_addr, VAddr end_addr, int value) {
154 if (value <= 1) {
155 const IntervalType other{start_addr, end_addr};
156 m_internal->base_set.insert(other);
157 }
158 });
159 if (!m_internal->base_set.empty()) {
160 auto it = m_internal->base_set.begin();
161 auto end_it = m_internal->base_set.end();
162 for (; it != end_it; it++) {
163 const VAddr inter_addr_end = it->upper();
164 const VAddr inter_addr = it->lower();
165 const size_t offset = inter_addr - m_vaddress;
166 const size_t sub_size = inter_addr_end - inter_addr;
167 m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
168 }
169 }
170 const IntervalType to_remove{start, start + size};
171 m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
172 m_internal->base_set.clear();
173}
174
175} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h
new file mode 100644
index 000000000..491a12e4f
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.h
@@ -0,0 +1,49 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <memory>
7
8#include "common/common_types.h"
9#include "core/device_memory_manager.h"
10
11namespace Tegra::Host1x {
12class Host1x;
13} // namespace Tegra::Host1x
14
15namespace Service::Nvidia::NvCore {
16
17class HeapMapper {
18public:
19 HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
20 Tegra::Host1x::Host1x& host1x);
21 ~HeapMapper();
22
23 bool IsInBounds(VAddr start, size_t size) const {
24 VAddr end = start + size;
25 return start >= m_vaddress && end <= (m_vaddress + m_size);
26 }
27
28 DAddr Map(VAddr start, size_t size);
29
30 void Unmap(VAddr start, size_t size);
31
32 DAddr GetRegionStart() const {
33 return m_daddress;
34 }
35
36 size_t GetRegionSize() const {
37 return m_size;
38 }
39
40private:
41 struct HeapMapperInternal;
42 VAddr m_vaddress;
43 DAddr m_daddress;
44 size_t m_size;
45 Core::Asid m_asid;
46 std::unique_ptr<HeapMapperInternal> m_internal;
47};
48
49} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 0ca05257e..1b59c6b15 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -2,14 +2,19 @@
2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors 2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include <functional>
6
5#include "common/alignment.h" 7#include "common/alignment.h"
6#include "common/assert.h" 8#include "common/assert.h"
7#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/hle/service/nvdrv/core/container.h"
11#include "core/hle/service/nvdrv/core/heap_mapper.h"
8#include "core/hle/service/nvdrv/core/nvmap.h" 12#include "core/hle/service/nvdrv/core/nvmap.h"
9#include "core/memory.h" 13#include "core/memory.h"
10#include "video_core/host1x/host1x.h" 14#include "video_core/host1x/host1x.h"
11 15
12using Core::Memory::YUZU_PAGESIZE; 16using Core::Memory::YUZU_PAGESIZE;
17constexpr size_t BIG_PAGE_SIZE = YUZU_PAGESIZE * 16;
13 18
14namespace Service::Nvidia::NvCore { 19namespace Service::Nvidia::NvCore {
15NvMap::Handle::Handle(u64 size_, Id id_) 20NvMap::Handle::Handle(u64 size_, Id id_)
@@ -17,9 +22,9 @@ NvMap::Handle::Handle(u64 size_, Id id_)
17 flags.raw = 0; 22 flags.raw = 0;
18} 23}
19 24
20NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { 25NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress,
26 NvCore::SessionId pSessionId) {
21 std::scoped_lock lock(mutex); 27 std::scoped_lock lock(mutex);
22
23 // Handles cannot be allocated twice 28 // Handles cannot be allocated twice
24 if (allocated) { 29 if (allocated) {
25 return NvResult::AccessDenied; 30 return NvResult::AccessDenied;
@@ -28,6 +33,7 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress)
28 flags = pFlags; 33 flags = pFlags;
29 kind = pKind; 34 kind = pKind;
30 align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; 35 align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
36 session_id = pSessionId;
31 37
32 // This flag is only applicable for handles with an address passed 38 // This flag is only applicable for handles with an address passed
33 if (pAddress) { 39 if (pAddress) {
@@ -63,7 +69,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) {
63 return NvResult::Success; 69 return NvResult::Success;
64} 70}
65 71
66NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} 72NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {}
67 73
68void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { 74void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
69 std::scoped_lock lock(handles_lock); 75 std::scoped_lock lock(handles_lock);
@@ -78,12 +84,30 @@ void NvMap::UnmapHandle(Handle& handle_description) {
78 handle_description.unmap_queue_entry.reset(); 84 handle_description.unmap_queue_entry.reset();
79 } 85 }
80 86
87 // Free and unmap the handle from Host1x GMMU
88 if (handle_description.pin_virt_address) {
89 host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
90 handle_description.aligned_size);
91 host1x.Allocator().Free(handle_description.pin_virt_address,
92 static_cast<u32>(handle_description.aligned_size));
93 handle_description.pin_virt_address = 0;
94 }
95
81 // Free and unmap the handle from the SMMU 96 // Free and unmap the handle from the SMMU
82 host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), 97 const size_t map_size = handle_description.aligned_size;
83 handle_description.aligned_size); 98 if (!handle_description.in_heap) {
84 host1x.Allocator().Free(handle_description.pin_virt_address, 99 auto& smmu = host1x.MemoryManager();
85 static_cast<u32>(handle_description.aligned_size)); 100 size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE);
86 handle_description.pin_virt_address = 0; 101 smmu.Unmap(handle_description.d_address, map_size);
102 smmu.Free(handle_description.d_address, static_cast<size_t>(aligned_up));
103 handle_description.d_address = 0;
104 return;
105 }
106 const VAddr vaddress = handle_description.address;
107 auto* session = core.GetSession(handle_description.session_id);
108 session->mapper->Unmap(vaddress, map_size);
109 handle_description.d_address = 0;
110 handle_description.in_heap = false;
87} 111}
88 112
89bool NvMap::TryRemoveHandle(const Handle& handle_description) { 113bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -124,22 +148,33 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
124 } 148 }
125} 149}
126 150
127VAddr NvMap::GetHandleAddress(Handle::Id handle) { 151DAddr NvMap::GetHandleAddress(Handle::Id handle) {
128 std::scoped_lock lock(handles_lock); 152 std::scoped_lock lock(handles_lock);
129 try { 153 try {
130 return handles.at(handle)->address; 154 return handles.at(handle)->d_address;
131 } catch (std::out_of_range&) { 155 } catch (std::out_of_range&) {
132 return 0; 156 return 0;
133 } 157 }
134} 158}
135 159
136u32 NvMap::PinHandle(NvMap::Handle::Id handle) { 160DAddr NvMap::PinHandle(NvMap::Handle::Id handle, bool low_area_pin) {
137 auto handle_description{GetHandle(handle)}; 161 auto handle_description{GetHandle(handle)};
138 if (!handle_description) [[unlikely]] { 162 if (!handle_description) [[unlikely]] {
139 return 0; 163 return 0;
140 } 164 }
141 165
142 std::scoped_lock lock(handle_description->mutex); 166 std::scoped_lock lock(handle_description->mutex);
167 const auto map_low_area = [&] {
168 if (handle_description->pin_virt_address == 0) {
169 auto& gmmu_allocator = host1x.Allocator();
170 auto& gmmu = host1x.GMMU();
171 u32 address =
172 gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size));
173 gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address,
174 handle_description->aligned_size);
175 handle_description->pin_virt_address = address;
176 }
177 };
143 if (!handle_description->pins) { 178 if (!handle_description->pins) {
144 // If we're in the unmap queue we can just remove ourselves and return since we're already 179 // If we're in the unmap queue we can just remove ourselves and return since we're already
145 // mapped 180 // mapped
@@ -151,37 +186,58 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
151 unmap_queue.erase(*handle_description->unmap_queue_entry); 186 unmap_queue.erase(*handle_description->unmap_queue_entry);
152 handle_description->unmap_queue_entry.reset(); 187 handle_description->unmap_queue_entry.reset();
153 188
189 if (low_area_pin) {
190 map_low_area();
191 handle_description->pins++;
192 return static_cast<DAddr>(handle_description->pin_virt_address);
193 }
194
154 handle_description->pins++; 195 handle_description->pins++;
155 return handle_description->pin_virt_address; 196 return handle_description->d_address;
156 } 197 }
157 } 198 }
158 199
200 using namespace std::placeholders;
159 // If not then allocate some space and map it 201 // If not then allocate some space and map it
160 u32 address{}; 202 DAddr address{};
161 auto& smmu_allocator = host1x.Allocator(); 203 auto& smmu = host1x.MemoryManager();
162 auto& smmu_memory_manager = host1x.MemoryManager(); 204 auto* session = core.GetSession(handle_description->session_id);
163 while ((address = smmu_allocator.Allocate( 205 const VAddr vaddress = handle_description->address;
164 static_cast<u32>(handle_description->aligned_size))) == 0) { 206 const size_t map_size = handle_description->aligned_size;
165 // Free handles until the allocation succeeds 207 if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
166 std::scoped_lock queueLock(unmap_queue_lock); 208 handle_description->d_address = session->mapper->Map(vaddress, map_size);
167 if (auto freeHandleDesc{unmap_queue.front()}) { 209 handle_description->in_heap = true;
168 // Handles in the unmap queue are guaranteed not to be pinned so don't bother 210 } else {
169 // checking if they are before unmapping 211 size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE);
170 std::scoped_lock freeLock(freeHandleDesc->mutex); 212 while ((address = smmu.Allocate(aligned_up)) == 0) {
171 if (handle_description->pin_virt_address) 213 // Free handles until the allocation succeeds
172 UnmapHandle(*freeHandleDesc); 214 std::scoped_lock queueLock(unmap_queue_lock);
173 } else { 215 if (auto freeHandleDesc{unmap_queue.front()}) {
174 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); 216 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
217 // checking if they are before unmapping
218 std::scoped_lock freeLock(freeHandleDesc->mutex);
219 if (handle_description->d_address)
220 UnmapHandle(*freeHandleDesc);
221 } else {
222 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
223 }
175 } 224 }
225
226 handle_description->d_address = address;
227 smmu.Map(address, vaddress, map_size, session->asid, true);
228 handle_description->in_heap = false;
176 } 229 }
230 }
177 231
178 smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address, 232 if (low_area_pin) {
179 handle_description->aligned_size); 233 map_low_area();
180 handle_description->pin_virt_address = address;
181 } 234 }
182 235
183 handle_description->pins++; 236 handle_description->pins++;
184 return handle_description->pin_virt_address; 237 if (low_area_pin) {
238 return static_cast<DAddr>(handle_description->pin_virt_address);
239 }
240 return handle_description->d_address;
185} 241}
186 242
187void NvMap::UnpinHandle(Handle::Id handle) { 243void NvMap::UnpinHandle(Handle::Id handle) {
@@ -232,7 +288,7 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna
232 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); 288 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
233 } else if (handle_description->dupes == 0) { 289 } else if (handle_description->dupes == 0) {
234 // Force unmap the handle 290 // Force unmap the handle
235 if (handle_description->pin_virt_address) { 291 if (handle_description->d_address) {
236 std::scoped_lock queueLock(unmap_queue_lock); 292 std::scoped_lock queueLock(unmap_queue_lock);
237 UnmapHandle(*handle_description); 293 UnmapHandle(*handle_description);
238 } 294 }
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index a8e573890..d7f695845 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -14,6 +14,7 @@
14 14
15#include "common/bit_field.h" 15#include "common/bit_field.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "core/hle/service/nvdrv/core/container.h"
17#include "core/hle/service/nvdrv/nvdata.h" 18#include "core/hle/service/nvdrv/nvdata.h"
18 19
19namespace Tegra { 20namespace Tegra {
@@ -25,6 +26,8 @@ class Host1x;
25} // namespace Tegra 26} // namespace Tegra
26 27
27namespace Service::Nvidia::NvCore { 28namespace Service::Nvidia::NvCore {
29
30class Container;
28/** 31/**
29 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage 32 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage
30 * handles 33 * handles
@@ -48,7 +51,7 @@ public:
48 using Id = u32; 51 using Id = u32;
49 Id id; //!< A globally unique identifier for this handle 52 Id id; //!< A globally unique identifier for this handle
50 53
51 s32 pins{}; 54 s64 pins{};
52 u32 pin_virt_address{}; 55 u32 pin_virt_address{};
53 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; 56 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
54 57
@@ -61,15 +64,18 @@ public:
61 } flags{}; 64 } flags{};
62 static_assert(sizeof(Flags) == sizeof(u32)); 65 static_assert(sizeof(Flags) == sizeof(u32));
63 66
64 u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to, 67 VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to,
65 //!< this can also be in the nvdrv tmem 68 //!< this can also be in the nvdrv tmem
66 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC 69 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
67 //!< call 70 //!< call
68 71
69 u8 kind{}; //!< Used for memory compression 72 u8 kind{}; //!< Used for memory compression
70 bool allocated{}; //!< If the handle has been allocated with `Alloc` 73 bool allocated{}; //!< If the handle has been allocated with `Alloc`
74 bool in_heap{};
75 NvCore::SessionId session_id{};
71 76
72 u64 dma_map_addr{}; //! remove me after implementing pinning. 77 DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds
78 //!< to, this can also be in the nvdrv tmem
73 79
74 Handle(u64 size, Id id); 80 Handle(u64 size, Id id);
75 81
@@ -77,7 +83,8 @@ public:
77 * @brief Sets up the handle with the given memory config, can allocate memory from the tmem 83 * @brief Sets up the handle with the given memory config, can allocate memory from the tmem
78 * if a 0 address is passed 84 * if a 0 address is passed
79 */ 85 */
80 [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress); 86 [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress,
87 NvCore::SessionId pSessionId);
81 88
82 /** 89 /**
83 * @brief Increases the dupe counter of the handle for the given session 90 * @brief Increases the dupe counter of the handle for the given session
@@ -108,7 +115,7 @@ public:
108 bool can_unlock; //!< If the address region is ready to be unlocked 115 bool can_unlock; //!< If the address region is ready to be unlocked
109 }; 116 };
110 117
111 explicit NvMap(Tegra::Host1x::Host1x& host1x); 118 explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x);
112 119
113 /** 120 /**
114 * @brief Creates an unallocated handle of the given size 121 * @brief Creates an unallocated handle of the given size
@@ -117,7 +124,7 @@ public:
117 124
118 std::shared_ptr<Handle> GetHandle(Handle::Id handle); 125 std::shared_ptr<Handle> GetHandle(Handle::Id handle);
119 126
120 VAddr GetHandleAddress(Handle::Id handle); 127 DAddr GetHandleAddress(Handle::Id handle);
121 128
122 /** 129 /**
123 * @brief Maps a handle into the SMMU address space 130 * @brief Maps a handle into the SMMU address space
@@ -125,7 +132,7 @@ public:
125 * number of calls to `UnpinHandle` 132 * number of calls to `UnpinHandle`
126 * @return The SMMU virtual address that the handle has been mapped to 133 * @return The SMMU virtual address that the handle has been mapped to
127 */ 134 */
128 u32 PinHandle(Handle::Id handle); 135 DAddr PinHandle(Handle::Id handle, bool low_area_pin);
129 136
130 /** 137 /**
131 * @brief When this has been called an equal number of times to `PinHandle` for the supplied 138 * @brief When this has been called an equal number of times to `PinHandle` for the supplied
@@ -172,5 +179,7 @@ private:
172 * @return If the handle was removed from the map 179 * @return If the handle was removed from the map
173 */ 180 */
174 bool TryRemoveHandle(const Handle& handle_description); 181 bool TryRemoveHandle(const Handle& handle_description);
182
183 Container& core;
175}; 184};
176} // namespace Service::Nvidia::NvCore 185} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index a04538d5d..8adaddc60 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -7,6 +7,7 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/nvdrv/core/container.h"
10#include "core/hle/service/nvdrv/nvdata.h" 11#include "core/hle/service/nvdrv/nvdata.h"
11 12
12namespace Core { 13namespace Core {
@@ -62,7 +63,7 @@ public:
62 * Called once a device is opened 63 * Called once a device is opened
63 * @param fd The device fd 64 * @param fd The device fd
64 */ 65 */
65 virtual void OnOpen(DeviceFD fd) = 0; 66 virtual void OnOpen(NvCore::SessionId session_id, DeviceFD fd) = 0;
66 67
67 /** 68 /**
68 * Called once a device is closed 69 * Called once a device is closed
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 05a43d8dc..c1ebbd62d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -35,14 +35,14 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
35 return NvResult::NotImplemented; 35 return NvResult::NotImplemented;
36} 36}
37 37
38void nvdisp_disp0::OnOpen(DeviceFD fd) {} 38void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
39void nvdisp_disp0::OnClose(DeviceFD fd) {} 39void nvdisp_disp0::OnClose(DeviceFD fd) {}
40 40
41void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, 41void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
42 u32 height, u32 stride, android::BufferTransformFlags transform, 42 u32 height, u32 stride, android::BufferTransformFlags transform,
43 const Common::Rectangle<int>& crop_rect, 43 const Common::Rectangle<int>& crop_rect,
44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { 44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
45 const VAddr addr = nvmap.GetHandleAddress(buffer_handle); 45 const DAddr addr = nvmap.GetHandleAddress(buffer_handle);
46 LOG_TRACE(Service, 46 LOG_TRACE(Service,
47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
48 addr, offset, width, height, stride, format); 48 addr, offset, width, height, stride, format);
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index daee05fe8..5f13a50a2 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -32,7 +32,7 @@ public:
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::span<u8> inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
37 37
38 /// Performs a screen flip, drawing the buffer pointed to by the handle. 38 /// Performs a screen flip, drawing the buffer pointed to by the handle.
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 6b3639008..e6646ba04 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -86,7 +86,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
86 return NvResult::NotImplemented; 86 return NvResult::NotImplemented;
87} 87}
88 88
89void nvhost_as_gpu::OnOpen(DeviceFD fd) {} 89void nvhost_as_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
90void nvhost_as_gpu::OnClose(DeviceFD fd) {} 90void nvhost_as_gpu::OnClose(DeviceFD fd) {}
91 91
92NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { 92NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
@@ -206,6 +206,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
206 static_cast<u32>(aligned_size >> page_size_bits)); 206 static_cast<u32>(aligned_size >> page_size_bits));
207 } 207 }
208 208
209 nvmap.UnpinHandle(mapping->handle);
210
209 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state 211 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
210 // Only FreeSpace can unmap them fully 212 // Only FreeSpace can unmap them fully
211 if (mapping->sparse_alloc) { 213 if (mapping->sparse_alloc) {
@@ -293,12 +295,12 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) {
293 return NvResult::BadValue; 295 return NvResult::BadValue;
294 } 296 }
295 297
296 VAddr cpu_address{static_cast<VAddr>( 298 DAddr base = nvmap.PinHandle(entry.handle, false);
297 handle->address + 299 DAddr device_address{static_cast<DAddr>(
298 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; 300 base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
299 301
300 gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), 302 gmmu->Map(virtual_address, device_address, size,
301 use_big_pages); 303 static_cast<Tegra::PTEKind>(entry.kind), use_big_pages);
302 } 304 }
303 } 305 }
304 306
@@ -331,9 +333,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
331 } 333 }
332 334
333 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; 335 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
334 VAddr cpu_address{mapping->ptr + params.buffer_offset}; 336 VAddr device_address{mapping->ptr + params.buffer_offset};
335 337
336 gmmu->Map(gpu_address, cpu_address, params.mapping_size, 338 gmmu->Map(gpu_address, device_address, params.mapping_size,
337 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); 339 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page);
338 340
339 return NvResult::Success; 341 return NvResult::Success;
@@ -349,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
349 return NvResult::BadValue; 351 return NvResult::BadValue;
350 } 352 }
351 353
352 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; 354 DAddr device_address{
355 static_cast<DAddr>(nvmap.PinHandle(params.handle, false) + params.buffer_offset)};
353 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; 356 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
354 357
355 bool big_page{[&]() { 358 bool big_page{[&]() {
@@ -373,15 +376,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
373 } 376 }
374 377
375 const bool use_big_pages = alloc->second.big_pages && big_page; 378 const bool use_big_pages = alloc->second.big_pages && big_page;
376 gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), 379 gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind),
377 use_big_pages); 380 use_big_pages);
378 381
379 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, 382 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
380 use_big_pages, alloc->second.sparse)}; 383 true, use_big_pages, alloc->second.sparse)};
381 alloc->second.mappings.push_back(mapping); 384 alloc->second.mappings.push_back(mapping);
382 mapping_map[params.offset] = mapping; 385 mapping_map[params.offset] = mapping;
383 } else { 386 } else {
384
385 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; 387 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
386 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; 388 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
387 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; 389 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
@@ -394,11 +396,11 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
394 return NvResult::InsufficientMemory; 396 return NvResult::InsufficientMemory;
395 } 397 }
396 398
397 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), 399 gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size),
398 static_cast<Tegra::PTEKind>(params.kind), big_page); 400 static_cast<Tegra::PTEKind>(params.kind), big_page);
399 401
400 auto mapping{ 402 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
401 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; 403 false, big_page, false)};
402 mapping_map[params.offset] = mapping; 404 mapping_map[params.offset] = mapping;
403 } 405 }
404 406
@@ -433,6 +435,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) {
433 gmmu->Unmap(params.offset, mapping->size); 435 gmmu->Unmap(params.offset, mapping->size);
434 } 436 }
435 437
438 nvmap.UnpinHandle(mapping->handle);
439
436 mapping_map.erase(params.offset); 440 mapping_map.erase(params.offset);
437 } catch (const std::out_of_range&) { 441 } catch (const std::out_of_range&) {
438 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); 442 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 79a21683d..7d0a99988 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -55,7 +55,7 @@ public:
55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
56 std::span<u8> inline_output) override; 56 std::span<u8> inline_output) override;
57 57
58 void OnOpen(DeviceFD fd) override; 58 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
59 void OnClose(DeviceFD fd) override; 59 void OnClose(DeviceFD fd) override;
60 60
61 Kernel::KEvent* QueryEvent(u32 event_id) override; 61 Kernel::KEvent* QueryEvent(u32 event_id) override;
@@ -159,16 +159,18 @@ private:
159 NvCore::NvMap& nvmap; 159 NvCore::NvMap& nvmap;
160 160
161 struct Mapping { 161 struct Mapping {
162 VAddr ptr; 162 NvCore::NvMap::Handle::Id handle;
163 DAddr ptr;
163 u64 offset; 164 u64 offset;
164 u64 size; 165 u64 size;
165 bool fixed; 166 bool fixed;
166 bool big_page; // Only valid if fixed == false 167 bool big_page; // Only valid if fixed == false
167 bool sparse_alloc; 168 bool sparse_alloc;
168 169
169 Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) 170 Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_,
170 : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), 171 bool big_page_, bool sparse_alloc_)
171 sparse_alloc(sparse_alloc_) {} 172 : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_),
173 big_page(big_page_), sparse_alloc(sparse_alloc_) {}
172 }; 174 };
173 175
174 struct Allocation { 176 struct Allocation {
@@ -212,9 +214,6 @@ private:
212 bool initialised{}; 214 bool initialised{};
213 } vm; 215 } vm;
214 std::shared_ptr<Tegra::MemoryManager> gmmu; 216 std::shared_ptr<Tegra::MemoryManager> gmmu;
215
216 // s32 channel{};
217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
218}; 217};
219 218
220} // namespace Service::Nvidia::Devices 219} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b8dd34e24..250d01de3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -76,7 +76,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inp
76 return NvResult::NotImplemented; 76 return NvResult::NotImplemented;
77} 77}
78 78
79void nvhost_ctrl::OnOpen(DeviceFD fd) {} 79void nvhost_ctrl::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
80 80
81void nvhost_ctrl::OnClose(DeviceFD fd) {} 81void nvhost_ctrl::OnClose(DeviceFD fd) {}
82 82
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 992124b60..403f1a746 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -32,7 +32,7 @@ public:
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::span<u8> inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
37 37
38 Kernel::KEvent* QueryEvent(u32 event_id) override; 38 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 3e0c96456..ddd85678b 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
82 return NvResult::NotImplemented; 82 return NvResult::NotImplemented;
83} 83}
84 84
85void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} 85void nvhost_ctrl_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
86void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} 86void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
87 87
88NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { 88NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index d170299bd..d2ab05b21 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -28,7 +28,7 @@ public:
28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
29 std::span<u8> inline_output) override; 29 std::span<u8> inline_output) override;
30 30
31 void OnOpen(DeviceFD fd) override; 31 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
32 void OnClose(DeviceFD fd) override; 32 void OnClose(DeviceFD fd) override;
33 33
34 Kernel::KEvent* QueryEvent(u32 event_id) override; 34 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index b0395c2f0..bf12d69a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -120,7 +120,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
120 return NvResult::NotImplemented; 120 return NvResult::NotImplemented;
121} 121}
122 122
123void nvhost_gpu::OnOpen(DeviceFD fd) {} 123void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
124void nvhost_gpu::OnClose(DeviceFD fd) {} 124void nvhost_gpu::OnClose(DeviceFD fd) {}
125 125
126NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) { 126NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 88fd228ff..e34a978db 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -47,7 +47,7 @@ public:
47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
48 std::span<u8> inline_output) override; 48 std::span<u8> inline_output) override;
49 49
50 void OnOpen(DeviceFD fd) override; 50 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
51 void OnClose(DeviceFD fd) override; 51 void OnClose(DeviceFD fd) override;
52 52
53 Kernel::KEvent* QueryEvent(u32 event_id) override; 53 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index f43914e1b..2c0ac2a46 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -35,7 +35,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
35 case 0x7: 35 case 0x7:
36 return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); 36 return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output);
37 case 0x9: 37 case 0x9:
38 return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output); 38 return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output, fd);
39 case 0xa: 39 case 0xa:
40 return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); 40 return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output);
41 default: 41 default:
@@ -68,9 +68,10 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
68 return NvResult::NotImplemented; 68 return NvResult::NotImplemented;
69} 69}
70 70
71void nvhost_nvdec::OnOpen(DeviceFD fd) { 71void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
72 LOG_INFO(Service_NVDRV, "NVDEC video stream started"); 72 LOG_INFO(Service_NVDRV, "NVDEC video stream started");
73 system.SetNVDECActive(true); 73 system.SetNVDECActive(true);
74 sessions[fd] = session_id;
74} 75}
75 76
76void nvhost_nvdec::OnClose(DeviceFD fd) { 77void nvhost_nvdec::OnClose(DeviceFD fd) {
@@ -81,6 +82,10 @@ void nvhost_nvdec::OnClose(DeviceFD fd) {
81 system.GPU().ClearCdmaInstance(iter->second); 82 system.GPU().ClearCdmaInstance(iter->second);
82 } 83 }
83 system.SetNVDECActive(false); 84 system.SetNVDECActive(false);
85 auto it = sessions.find(fd);
86 if (it != sessions.end()) {
87 sessions.erase(it);
88 }
84} 89}
85 90
86} // namespace Service::Nvidia::Devices 91} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index ad2233c49..627686757 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -20,7 +20,7 @@ public:
20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
21 std::span<u8> inline_output) override; 21 std::span<u8> inline_output) override;
22 22
23 void OnOpen(DeviceFD fd) override; 23 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
24 void OnClose(DeviceFD fd) override; 24 void OnClose(DeviceFD fd) override;
25}; 25};
26 26
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 74c701b95..a0a7bfa40 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -8,6 +8,7 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/kernel/k_process.h"
11#include "core/hle/service/nvdrv/core/container.h" 12#include "core/hle/service/nvdrv/core/container.h"
12#include "core/hle/service/nvdrv/core/nvmap.h" 13#include "core/hle/service/nvdrv/core/nvmap.h"
13#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 14#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@@ -95,6 +96,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
95 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); 96 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
96 97
97 auto& gpu = system.GPU(); 98 auto& gpu = system.GPU();
99 auto* session = core.GetSession(sessions[fd]);
100
98 if (gpu.UseNvdec()) { 101 if (gpu.UseNvdec()) {
99 for (std::size_t i = 0; i < syncpt_increments.size(); i++) { 102 for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
100 const SyncptIncr& syncpt_incr = syncpt_increments[i]; 103 const SyncptIncr& syncpt_incr = syncpt_increments[i];
@@ -106,8 +109,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
106 const auto object = nvmap.GetHandle(cmd_buffer.memory_id); 109 const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
107 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 110 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
108 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); 111 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
109 system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), 112 session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
110 cmdlist.size() * sizeof(u32)); 113 cmdlist.size() * sizeof(u32));
111 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); 114 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
112 } 115 }
113 // Some games expect command_buffers to be written back 116 // Some games expect command_buffers to be written back
@@ -133,10 +136,12 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
133 return NvResult::Success; 136 return NvResult::Success;
134} 137}
135 138
136NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) { 139NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
140 DeviceFD fd) {
137 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); 141 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
138 for (size_t i = 0; i < num_entries; i++) { 142 for (size_t i = 0; i < num_entries; i++) {
139 entries[i].map_address = nvmap.PinHandle(entries[i].map_handle); 143 DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, true);
144 entries[i].map_address = static_cast<u32>(pin_address);
140 } 145 }
141 146
142 return NvResult::Success; 147 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 7ce748e18..900db81d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -4,7 +4,9 @@
4#pragma once 4#pragma once
5 5
6#include <deque> 6#include <deque>
7#include <unordered_map>
7#include <vector> 8#include <vector>
9
8#include "common/common_types.h" 10#include "common/common_types.h"
9#include "common/swap.h" 11#include "common/swap.h"
10#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 12#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@@ -111,7 +113,7 @@ protected:
111 NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd); 113 NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd);
112 NvResult GetSyncpoint(IoctlGetSyncpoint& params); 114 NvResult GetSyncpoint(IoctlGetSyncpoint& params);
113 NvResult GetWaitbase(IoctlGetWaitbase& params); 115 NvResult GetWaitbase(IoctlGetWaitbase& params);
114 NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); 116 NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd);
115 NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); 117 NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
116 NvResult SetSubmitTimeout(u32 timeout); 118 NvResult SetSubmitTimeout(u32 timeout);
117 119
@@ -125,6 +127,7 @@ protected:
125 NvCore::NvMap& nvmap; 127 NvCore::NvMap& nvmap;
126 NvCore::ChannelType channel_type; 128 NvCore::ChannelType channel_type;
127 std::array<u32, MaxSyncPoints> device_syncpoints{}; 129 std::array<u32, MaxSyncPoints> device_syncpoints{};
130 std::unordered_map<DeviceFD, NvCore::SessionId> sessions;
128}; 131};
129}; // namespace Devices 132}; // namespace Devices
130} // namespace Service::Nvidia 133} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 9e6b86458..f87d53f12 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -44,7 +44,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
44 return NvResult::NotImplemented; 44 return NvResult::NotImplemented;
45} 45}
46 46
47void nvhost_nvjpg::OnOpen(DeviceFD fd) {} 47void nvhost_nvjpg::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
48void nvhost_nvjpg::OnClose(DeviceFD fd) {} 48void nvhost_nvjpg::OnClose(DeviceFD fd) {}
49 49
50NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) { 50NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 790c97f6a..def9c254d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -22,7 +22,7 @@ public:
22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
23 std::span<u8> inline_output) override; 23 std::span<u8> inline_output) override;
24 24
25 void OnOpen(DeviceFD fd) override; 25 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
26 void OnClose(DeviceFD fd) override; 26 void OnClose(DeviceFD fd) override;
27 27
28private: 28private:
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 87f8d7c22..bf090f5eb 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -33,7 +33,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
33 case 0x3: 33 case 0x3:
34 return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); 34 return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output);
35 case 0x9: 35 case 0x9:
36 return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output); 36 return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output, fd);
37 case 0xa: 37 case 0xa:
38 return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); 38 return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output);
39 default: 39 default:
@@ -68,7 +68,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
68 return NvResult::NotImplemented; 68 return NvResult::NotImplemented;
69} 69}
70 70
71void nvhost_vic::OnOpen(DeviceFD fd) {} 71void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
72 sessions[fd] = session_id;
73}
72 74
73void nvhost_vic::OnClose(DeviceFD fd) { 75void nvhost_vic::OnClose(DeviceFD fd) {
74 auto& host1x_file = core.Host1xDeviceFile(); 76 auto& host1x_file = core.Host1xDeviceFile();
@@ -76,6 +78,7 @@ void nvhost_vic::OnClose(DeviceFD fd) {
76 if (iter != host1x_file.fd_to_id.end()) { 78 if (iter != host1x_file.fd_to_id.end()) {
77 system.GPU().ClearCdmaInstance(iter->second); 79 system.GPU().ClearCdmaInstance(iter->second);
78 } 80 }
81 sessions.erase(fd);
79} 82}
80 83
81} // namespace Service::Nvidia::Devices 84} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index cadbcb0a5..0cc04354a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -19,7 +19,7 @@ public:
19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
20 std::span<u8> inline_output) override; 20 std::span<u8> inline_output) override;
21 21
22 void OnOpen(DeviceFD fd) override; 22 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
23 void OnClose(DeviceFD fd) override; 23 void OnClose(DeviceFD fd) override;
24}; 24};
25} // namespace Service::Nvidia::Devices 25} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 71b2e62ec..da61a3bfe 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -36,9 +36,9 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
36 case 0x3: 36 case 0x3:
37 return WrapFixed(this, &nvmap::IocFromId, input, output); 37 return WrapFixed(this, &nvmap::IocFromId, input, output);
38 case 0x4: 38 case 0x4:
39 return WrapFixed(this, &nvmap::IocAlloc, input, output); 39 return WrapFixed(this, &nvmap::IocAlloc, input, output, fd);
40 case 0x5: 40 case 0x5:
41 return WrapFixed(this, &nvmap::IocFree, input, output); 41 return WrapFixed(this, &nvmap::IocFree, input, output, fd);
42 case 0x9: 42 case 0x9:
43 return WrapFixed(this, &nvmap::IocParam, input, output); 43 return WrapFixed(this, &nvmap::IocParam, input, output);
44 case 0xe: 44 case 0xe:
@@ -67,8 +67,15 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, st
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
69 69
70void nvmap::OnOpen(DeviceFD fd) {} 70void nvmap::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
71void nvmap::OnClose(DeviceFD fd) {} 71 sessions[fd] = session_id;
72}
73void nvmap::OnClose(DeviceFD fd) {
74 auto it = sessions.find(fd);
75 if (it != sessions.end()) {
76 sessions.erase(it);
77 }
78}
72 79
73NvResult nvmap::IocCreate(IocCreateParams& params) { 80NvResult nvmap::IocCreate(IocCreateParams& params) {
74 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); 81 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@@ -87,7 +94,7 @@ NvResult nvmap::IocCreate(IocCreateParams& params) {
87 return NvResult::Success; 94 return NvResult::Success;
88} 95}
89 96
90NvResult nvmap::IocAlloc(IocAllocParams& params) { 97NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) {
91 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); 98 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
92 99
93 if (!params.handle) { 100 if (!params.handle) {
@@ -116,15 +123,15 @@ NvResult nvmap::IocAlloc(IocAllocParams& params) {
116 return NvResult::InsufficientMemory; 123 return NvResult::InsufficientMemory;
117 } 124 }
118 125
119 const auto result = 126 const auto result = handle_description->Alloc(params.flags, params.align, params.kind,
120 handle_description->Alloc(params.flags, params.align, params.kind, params.address); 127 params.address, sessions[fd]);
121 if (result != NvResult::Success) { 128 if (result != NvResult::Success) {
122 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); 129 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
123 return result; 130 return result;
124 } 131 }
125 bool is_out_io{}; 132 bool is_out_io{};
126 ASSERT(system.ApplicationProcess() 133 auto process = container.GetSession(sessions[fd])->process;
127 ->GetPageTable() 134 ASSERT(process->GetPageTable()
128 .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address, 135 .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address,
129 handle_description->size, 136 handle_description->size,
130 Kernel::KMemoryPermission::None, true, false) 137 Kernel::KMemoryPermission::None, true, false)
@@ -224,7 +231,7 @@ NvResult nvmap::IocParam(IocParamParams& params) {
224 return NvResult::Success; 231 return NvResult::Success;
225} 232}
226 233
227NvResult nvmap::IocFree(IocFreeParams& params) { 234NvResult nvmap::IocFree(IocFreeParams& params, DeviceFD fd) {
228 LOG_DEBUG(Service_NVDRV, "called"); 235 LOG_DEBUG(Service_NVDRV, "called");
229 236
230 if (!params.handle) { 237 if (!params.handle) {
@@ -233,9 +240,9 @@ NvResult nvmap::IocFree(IocFreeParams& params) {
233 } 240 }
234 241
235 if (auto freeInfo{file.FreeHandle(params.handle, false)}) { 242 if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
243 auto process = container.GetSession(sessions[fd])->process;
236 if (freeInfo->can_unlock) { 244 if (freeInfo->can_unlock) {
237 ASSERT(system.ApplicationProcess() 245 ASSERT(process->GetPageTable()
238 ->GetPageTable()
239 .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) 246 .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
240 .IsSuccess()); 247 .IsSuccess());
241 } 248 }
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 049c11028..d07d85f88 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -33,7 +33,7 @@ public:
33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
34 std::span<u8> inline_output) override; 34 std::span<u8> inline_output) override;
35 35
36 void OnOpen(DeviceFD fd) override; 36 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
37 void OnClose(DeviceFD fd) override; 37 void OnClose(DeviceFD fd) override;
38 38
39 enum class HandleParameterType : u32_le { 39 enum class HandleParameterType : u32_le {
@@ -100,11 +100,11 @@ public:
100 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); 100 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
101 101
102 NvResult IocCreate(IocCreateParams& params); 102 NvResult IocCreate(IocCreateParams& params);
103 NvResult IocAlloc(IocAllocParams& params); 103 NvResult IocAlloc(IocAllocParams& params, DeviceFD fd);
104 NvResult IocGetId(IocGetIdParams& params); 104 NvResult IocGetId(IocGetIdParams& params);
105 NvResult IocFromId(IocFromIdParams& params); 105 NvResult IocFromId(IocFromIdParams& params);
106 NvResult IocParam(IocParamParams& params); 106 NvResult IocParam(IocParamParams& params);
107 NvResult IocFree(IocFreeParams& params); 107 NvResult IocFree(IocFreeParams& params, DeviceFD fd);
108 108
109private: 109private:
110 /// Id to use for the next handle that is created. 110 /// Id to use for the next handle that is created.
@@ -115,6 +115,7 @@ private:
115 115
116 NvCore::Container& container; 116 NvCore::Container& container;
117 NvCore::NvMap& file; 117 NvCore::NvMap& file;
118 std::unordered_map<DeviceFD, NvCore::SessionId> sessions;
118}; 119};
119 120
120} // namespace Service::Nvidia::Devices 121} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 9e46ee8dd..cb256e5b4 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -45,13 +45,22 @@ void EventInterface::FreeEvent(Kernel::KEvent* event) {
45void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) { 45void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) {
46 auto server_manager = std::make_unique<ServerManager>(system); 46 auto server_manager = std::make_unique<ServerManager>(system);
47 auto module = std::make_shared<Module>(system); 47 auto module = std::make_shared<Module>(system);
48 server_manager->RegisterNamedService("nvdrv", std::make_shared<NVDRV>(system, module, "nvdrv")); 48 const auto NvdrvInterfaceFactoryForApplication = [&, module] {
49 server_manager->RegisterNamedService("nvdrv:a", 49 return std::make_shared<NVDRV>(system, module, "nvdrv");
50 std::make_shared<NVDRV>(system, module, "nvdrv:a")); 50 };
51 server_manager->RegisterNamedService("nvdrv:s", 51 const auto NvdrvInterfaceFactoryForApplets = [&, module] {
52 std::make_shared<NVDRV>(system, module, "nvdrv:s")); 52 return std::make_shared<NVDRV>(system, module, "nvdrv:a");
53 server_manager->RegisterNamedService("nvdrv:t", 53 };
54 std::make_shared<NVDRV>(system, module, "nvdrv:t")); 54 const auto NvdrvInterfaceFactoryForSysmodules = [&, module] {
55 return std::make_shared<NVDRV>(system, module, "nvdrv:s");
56 };
57 const auto NvdrvInterfaceFactoryForTesting = [&, module] {
58 return std::make_shared<NVDRV>(system, module, "nvdrv:t");
59 };
60 server_manager->RegisterNamedService("nvdrv", NvdrvInterfaceFactoryForApplication);
61 server_manager->RegisterNamedService("nvdrv:a", NvdrvInterfaceFactoryForApplets);
62 server_manager->RegisterNamedService("nvdrv:s", NvdrvInterfaceFactoryForSysmodules);
63 server_manager->RegisterNamedService("nvdrv:t", NvdrvInterfaceFactoryForTesting);
55 server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system)); 64 server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system));
56 nvnflinger.SetNVDrvInstance(module); 65 nvnflinger.SetNVDrvInstance(module);
57 ServerManager::RunServer(std::move(server_manager)); 66 ServerManager::RunServer(std::move(server_manager));
@@ -113,7 +122,7 @@ NvResult Module::VerifyFD(DeviceFD fd) const {
113 return NvResult::Success; 122 return NvResult::Success;
114} 123}
115 124
116DeviceFD Module::Open(const std::string& device_name) { 125DeviceFD Module::Open(const std::string& device_name, NvCore::SessionId session_id) {
117 auto it = builders.find(device_name); 126 auto it = builders.find(device_name);
118 if (it == builders.end()) { 127 if (it == builders.end()) {
119 LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name); 128 LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name);
@@ -124,7 +133,7 @@ DeviceFD Module::Open(const std::string& device_name) {
124 auto& builder = it->second; 133 auto& builder = it->second;
125 auto device = builder(fd)->second; 134 auto device = builder(fd)->second;
126 135
127 device->OnOpen(fd); 136 device->OnOpen(session_id, fd);
128 137
129 return fd; 138 return fd;
130} 139}
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index d8622b3ca..c594f0e5e 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -77,7 +77,7 @@ public:
77 NvResult VerifyFD(DeviceFD fd) const; 77 NvResult VerifyFD(DeviceFD fd) const;
78 78
79 /// Opens a device node and returns a file descriptor to it. 79 /// Opens a device node and returns a file descriptor to it.
80 DeviceFD Open(const std::string& device_name); 80 DeviceFD Open(const std::string& device_name, NvCore::SessionId session_id);
81 81
82 /// Sends an ioctl command to the specified file descriptor. 82 /// Sends an ioctl command to the specified file descriptor.
83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); 83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
@@ -93,6 +93,10 @@ public:
93 93
94 NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event); 94 NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event);
95 95
96 NvCore::Container& GetContainer() {
97 return container;
98 }
99
96private: 100private:
97 friend class EventInterface; 101 friend class EventInterface;
98 friend class Service::Nvnflinger::Nvnflinger; 102 friend class Service::Nvnflinger::Nvnflinger;
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index c8a880e84..6e4825313 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -3,8 +3,10 @@
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/scope_exit.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/hle/kernel/k_event.h" 8#include "core/hle/kernel/k_event.h"
9#include "core/hle/kernel/k_process.h"
8#include "core/hle/kernel/k_readable_event.h" 10#include "core/hle/kernel/k_readable_event.h"
9#include "core/hle/service/ipc_helpers.h" 11#include "core/hle/service/ipc_helpers.h"
10#include "core/hle/service/nvdrv/nvdata.h" 12#include "core/hle/service/nvdrv/nvdata.h"
@@ -37,7 +39,7 @@ void NVDRV::Open(HLERequestContext& ctx) {
37 return; 39 return;
38 } 40 }
39 41
40 DeviceFD fd = nvdrv->Open(device_name); 42 DeviceFD fd = nvdrv->Open(device_name, session_id);
41 43
42 rb.Push<DeviceFD>(fd); 44 rb.Push<DeviceFD>(fd);
43 rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed); 45 rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed);
@@ -150,12 +152,29 @@ void NVDRV::Close(HLERequestContext& ctx) {
150 152
151void NVDRV::Initialize(HLERequestContext& ctx) { 153void NVDRV::Initialize(HLERequestContext& ctx) {
152 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 154 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
155 IPC::ResponseBuilder rb{ctx, 3};
156 SCOPE_EXIT({
157 rb.Push(ResultSuccess);
158 rb.PushEnum(NvResult::Success);
159 });
153 160
154 is_initialized = true; 161 if (is_initialized) {
162 // No need to initialize again
163 return;
164 }
155 165
156 IPC::ResponseBuilder rb{ctx, 3}; 166 IPC::RequestParser rp{ctx};
157 rb.Push(ResultSuccess); 167 const auto process_handle{ctx.GetCopyHandle(0)};
158 rb.PushEnum(NvResult::Success); 168 // The transfer memory is lent to nvdrv as a work buffer since nvdrv is
169 // unable to allocate as much memory on its own. For HLE it's unnecessary to handle it
170 [[maybe_unused]] const auto transfer_memory_handle{ctx.GetCopyHandle(1)};
171 [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>();
172
173 auto& container = nvdrv->GetContainer();
174 auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle);
175 session_id = container.OpenSession(process.GetPointerUnsafe());
176
177 is_initialized = true;
159} 178}
160 179
161void NVDRV::QueryEvent(HLERequestContext& ctx) { 180void NVDRV::QueryEvent(HLERequestContext& ctx) {
@@ -242,6 +261,9 @@ NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char*
242 RegisterHandlers(functions); 261 RegisterHandlers(functions);
243} 262}
244 263
245NVDRV::~NVDRV() = default; 264NVDRV::~NVDRV() {
265 auto& container = nvdrv->GetContainer();
266 container.CloseSession(session_id);
267}
246 268
247} // namespace Service::Nvidia 269} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 6e98115dc..f2195ae1e 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -35,6 +35,7 @@ private:
35 35
36 u64 pid{}; 36 u64 pid{};
37 bool is_initialized{}; 37 bool is_initialized{};
38 NvCore::SessionId session_id{};
38 Common::ScratchBuffer<u8> output_buffer; 39 Common::ScratchBuffer<u8> output_buffer;
39 Common::ScratchBuffer<u8> inline_output_buffer; 40 Common::ScratchBuffer<u8> inline_output_buffer;
40}; 41};
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
index 2fef6cc1a..86e272b41 100644
--- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
+++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
@@ -87,19 +87,20 @@ Result CreateNvMapHandle(u32* out_nv_map_handle, Nvidia::Devices::nvmap& nvmap,
87 R_SUCCEED(); 87 R_SUCCEED();
88} 88}
89 89
90Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle) { 90Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Nvidia::DeviceFD nvmap_fd) {
91 // Free the handle. 91 // Free the handle.
92 Nvidia::Devices::nvmap::IocFreeParams free_params{ 92 Nvidia::Devices::nvmap::IocFreeParams free_params{
93 .handle = handle, 93 .handle = handle,
94 }; 94 };
95 R_UNLESS(nvmap.IocFree(free_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); 95 R_UNLESS(nvmap.IocFree(free_params, nvmap_fd) == Nvidia::NvResult::Success,
96 VI::ResultOperationFailed);
96 97
97 // We succeeded. 98 // We succeeded.
98 R_SUCCEED(); 99 R_SUCCEED();
99} 100}
100 101
101Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, 102Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer,
102 u32 size) { 103 u32 size, Nvidia::DeviceFD nvmap_fd) {
103 // Assign the allocated memory to the handle. 104 // Assign the allocated memory to the handle.
104 Nvidia::Devices::nvmap::IocAllocParams alloc_params{ 105 Nvidia::Devices::nvmap::IocAllocParams alloc_params{
105 .handle = handle, 106 .handle = handle,
@@ -109,16 +110,16 @@ Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::Proce
109 .kind = 0, 110 .kind = 0,
110 .address = GetInteger(buffer), 111 .address = GetInteger(buffer),
111 }; 112 };
112 R_UNLESS(nvmap.IocAlloc(alloc_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); 113 R_UNLESS(nvmap.IocAlloc(alloc_params, nvmap_fd) == Nvidia::NvResult::Success,
114 VI::ResultOperationFailed);
113 115
114 // We succeeded. 116 // We succeeded.
115 R_SUCCEED(); 117 R_SUCCEED();
116} 118}
117 119
118Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, 120Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd,
119 Common::ProcessAddress buffer, u32 size) { 121 Common::ProcessAddress buffer, u32 size) {
120 // Get the nvmap device. 122 // Get the nvmap device.
121 auto nvmap_fd = nvdrv.Open("/dev/nvmap");
122 auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd); 123 auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd);
123 ASSERT(nvmap != nullptr); 124 ASSERT(nvmap != nullptr);
124 125
@@ -127,11 +128,11 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv,
127 128
128 // Ensure we maintain a clean state on failure. 129 // Ensure we maintain a clean state on failure.
129 ON_RESULT_FAILURE { 130 ON_RESULT_FAILURE {
130 ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle))); 131 ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle, nvmap_fd)));
131 }; 132 };
132 133
133 // Assign the allocated memory to the handle. 134 // Assign the allocated memory to the handle.
134 R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size)); 135 R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd));
135} 136}
136 137
137constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; 138constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888;
@@ -197,9 +198,13 @@ Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u
197 std::addressof(m_buffer_page_group), m_system, 198 std::addressof(m_buffer_page_group), m_system,
198 SharedBufferSize)); 199 SharedBufferSize));
199 200
201 auto& container = m_nvdrv->GetContainer();
202 m_session_id = container.OpenSession(m_system.ApplicationProcess());
203 m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id);
204
200 // Create an nvmap handle for the buffer and assign the memory to it. 205 // Create an nvmap handle for the buffer and assign the memory to it.
201 R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, map_address, 206 R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd,
202 SharedBufferSize)); 207 map_address, SharedBufferSize));
203 208
204 // Record the display id. 209 // Record the display id.
205 m_display_id = display_id; 210 m_display_id = display_id;
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
index c809c01b4..033bf4bbe 100644
--- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
+++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
@@ -4,6 +4,8 @@
4#pragma once 4#pragma once
5 5
6#include "common/math_util.h" 6#include "common/math_util.h"
7#include "core/hle/service/nvdrv/core/container.h"
8#include "core/hle/service/nvdrv/nvdata.h"
7#include "core/hle/service/nvnflinger/nvnflinger.h" 9#include "core/hle/service/nvnflinger/nvnflinger.h"
8#include "core/hle/service/nvnflinger/ui/fence.h" 10#include "core/hle/service/nvnflinger/ui/fence.h"
9 11
@@ -53,7 +55,8 @@ private:
53 u64 m_layer_id = 0; 55 u64 m_layer_id = 0;
54 u32 m_buffer_nvmap_handle = 0; 56 u32 m_buffer_nvmap_handle = 0;
55 SharedMemoryPoolLayout m_pool_layout = {}; 57 SharedMemoryPoolLayout m_pool_layout = {};
56 58 Nvidia::DeviceFD m_nvmap_fd = {};
59 Nvidia::NvCore::SessionId m_session_id = {};
57 std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group; 60 std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group;
58 61
59 std::mutex m_guard; 62 std::mutex m_guard;
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp
index af6591370..71d6fdb0c 100644
--- a/src/core/hle/service/nvnflinger/nvnflinger.cpp
+++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp
@@ -124,7 +124,7 @@ void Nvnflinger::ShutdownLayers() {
124 124
125void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 125void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
126 nvdrv = std::move(instance); 126 nvdrv = std::move(instance);
127 disp_fd = nvdrv->Open("/dev/nvdisp_disp0"); 127 disp_fd = nvdrv->Open("/dev/nvdisp_disp0", {});
128} 128}
129 129
130std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) { 130std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) {
diff --git a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
index ce70946ec..ede2a1193 100644
--- a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
+++ b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
@@ -22,11 +22,13 @@ GraphicBuffer::GraphicBuffer(Service::Nvidia::NvCore::NvMap& nvmap,
22 : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) { 22 : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) {
23 if (this->BufferId() > 0) { 23 if (this->BufferId() > 0) {
24 m_nvmap->DuplicateHandle(this->BufferId(), true); 24 m_nvmap->DuplicateHandle(this->BufferId(), true);
25 m_nvmap->PinHandle(this->BufferId(), false);
25 } 26 }
26} 27}
27 28
28GraphicBuffer::~GraphicBuffer() { 29GraphicBuffer::~GraphicBuffer() {
29 if (m_nvmap != nullptr && this->BufferId() > 0) { 30 if (m_nvmap != nullptr && this->BufferId() > 0) {
31 m_nvmap->UnpinHandle(this->BufferId());
30 m_nvmap->FreeHandle(this->BufferId(), true); 32 m_nvmap->FreeHandle(this->BufferId(), true);
31 } 33 }
32} 34}
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 8176a41be..1c218566f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -24,6 +24,8 @@
24#include "core/hle/kernel/k_process.h" 24#include "core/hle/kernel/k_process.h"
25#include "core/memory.h" 25#include "core/memory.h"
26#include "video_core/gpu.h" 26#include "video_core/gpu.h"
27#include "video_core/host1x/gpu_device_memory_manager.h"
28#include "video_core/host1x/host1x.h"
27#include "video_core/rasterizer_download_area.h" 29#include "video_core/rasterizer_download_area.h"
28 30
29namespace Core::Memory { 31namespace Core::Memory {
@@ -637,17 +639,6 @@ struct Memory::Impl {
637 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), 639 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
638 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); 640 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
639 641
640 // During boot, current_page_table might not be set yet, in which case we need not flush
641 if (system.IsPoweredOn()) {
642 auto& gpu = system.GPU();
643 for (u64 i = 0; i < size; i++) {
644 const auto page = base + i;
645 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
646 gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
647 }
648 }
649 }
650
651 const auto end = base + size; 642 const auto end = base + size;
652 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 643 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
653 base + page_table.pointers.size()); 644 base + page_table.pointers.size());
@@ -811,21 +802,33 @@ struct Memory::Impl {
811 return true; 802 return true;
812 } 803 }
813 804
814 void HandleRasterizerDownload(VAddr address, size_t size) { 805 void HandleRasterizerDownload(VAddr v_address, size_t size) {
806 const auto* p = GetPointerImpl(
807 v_address, []() {}, []() {});
808 if (!gpu_device_memory) [[unlikely]] {
809 gpu_device_memory = &system.Host1x().MemoryManager();
810 }
815 const size_t core = system.GetCurrentHostThreadID(); 811 const size_t core = system.GetCurrentHostThreadID();
816 auto& current_area = rasterizer_read_areas[core]; 812 auto& current_area = rasterizer_read_areas[core];
817 const VAddr end_address = address + size; 813 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
818 if (current_area.start_address <= address && end_address <= current_area.end_address) 814 const DAddr end_address = address + size;
819 [[likely]] { 815 if (current_area.start_address <= address && end_address <= current_area.end_address)
820 return; 816 [[likely]] {
821 } 817 return;
822 current_area = system.GPU().OnCPURead(address, size); 818 }
819 current_area = system.GPU().OnCPURead(address, size);
820 });
823 } 821 }
824 822
825 void HandleRasterizerWrite(VAddr address, size_t size) { 823 void HandleRasterizerWrite(VAddr v_address, size_t size) {
824 const auto* p = GetPointerImpl(
825 v_address, []() {}, []() {});
826 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; 826 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
827 const size_t core = std::min(system.GetCurrentHostThreadID(), 827 const size_t core = std::min(system.GetCurrentHostThreadID(),
828 sys_core); // any other calls threads go to syscore. 828 sys_core); // any other calls threads go to syscore.
829 if (!gpu_device_memory) [[unlikely]] {
830 gpu_device_memory = &system.Host1x().MemoryManager();
831 }
829 // Guard on sys_core; 832 // Guard on sys_core;
830 if (core == sys_core) [[unlikely]] { 833 if (core == sys_core) [[unlikely]] {
831 sys_core_guard.lock(); 834 sys_core_guard.lock();
@@ -835,36 +838,53 @@ struct Memory::Impl {
835 sys_core_guard.unlock(); 838 sys_core_guard.unlock();
836 } 839 }
837 }); 840 });
838 auto& current_area = rasterizer_write_areas[core]; 841 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
839 VAddr subaddress = address >> YUZU_PAGEBITS; 842 auto& current_area = rasterizer_write_areas[core];
840 bool do_collection = current_area.last_address == subaddress; 843 PAddr subaddress = address >> YUZU_PAGEBITS;
841 if (!do_collection) [[unlikely]] { 844 bool do_collection = current_area.last_address == subaddress;
842 do_collection = system.GPU().OnCPUWrite(address, size); 845 if (!do_collection) [[unlikely]] {
843 if (!do_collection) { 846 do_collection = system.GPU().OnCPUWrite(address, size);
844 return; 847 if (!do_collection) {
848 return;
849 }
850 current_area.last_address = subaddress;
845 } 851 }
846 current_area.last_address = subaddress; 852 gpu_dirty_managers[core].Collect(address, size);
847 } 853 });
848 gpu_dirty_managers[core].Collect(address, size);
849 } 854 }
850 855
851 struct GPUDirtyState { 856 struct GPUDirtyState {
852 VAddr last_address; 857 PAddr last_address;
853 }; 858 };
854 859
855 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { 860 void InvalidateGPUMemory(u8* p, size_t size) {
856 system.GPU().InvalidateRegion(GetInteger(dest_addr), size); 861 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
857 } 862 const size_t core = std::min(system.GetCurrentHostThreadID(),
858 863 sys_core); // any other calls threads go to syscore.
859 void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { 864 if (!gpu_device_memory) [[unlikely]] {
860 system.GPU().FlushRegion(GetInteger(dest_addr), size); 865 gpu_device_memory = &system.Host1x().MemoryManager();
866 }
867 // Guard on sys_core;
868 if (core == sys_core) [[unlikely]] {
869 sys_core_guard.lock();
870 }
871 SCOPE_EXIT({
872 if (core == sys_core) [[unlikely]] {
873 sys_core_guard.unlock();
874 }
875 });
876 auto& gpu = system.GPU();
877 gpu_device_memory->ApplyOpOnPointer(
878 p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
861 } 879 }
862 880
863 Core::System& system; 881 Core::System& system;
882 Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
864 Common::PageTable* current_page_table = nullptr; 883 Common::PageTable* current_page_table = nullptr;
865 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> 884 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
866 rasterizer_read_areas{}; 885 rasterizer_read_areas{};
867 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; 886 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
887 std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{};
868 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; 888 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
869 std::mutex sys_core_guard; 889 std::mutex sys_core_guard;
870 890
@@ -1059,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
1059 impl->MarkRegionDebug(GetInteger(vaddr), size, debug); 1079 impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
1060} 1080}
1061 1081
1062void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
1063 impl->InvalidateRegion(dest_addr, size);
1064}
1065
1066void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
1067 impl->FlushRegion(dest_addr, size);
1068}
1069
1070bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { 1082bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1071 [[maybe_unused]] bool mapped = true; 1083 [[maybe_unused]] bool mapped = true;
1072 [[maybe_unused]] bool rasterizer = false; 1084 [[maybe_unused]] bool rasterizer = false;
@@ -1078,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1078 GetInteger(vaddr)); 1090 GetInteger(vaddr));
1079 mapped = false; 1091 mapped = false;
1080 }, 1092 },
1081 [&] { 1093 [&] { rasterizer = true; });
1082 impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); 1094 if (rasterizer) {
1083 rasterizer = true; 1095 impl->InvalidateGPUMemory(ptr, size);
1084 }); 1096 }
1085 1097
1086#ifdef __linux__ 1098#ifdef __linux__
1087 if (!rasterizer && mapped) { 1099 if (!rasterizer && mapped) {
diff --git a/src/core/memory.h b/src/core/memory.h
index dddfaf4a4..f7e6b297f 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -12,6 +12,7 @@
12 12
13#include "common/scratch_buffer.h" 13#include "common/scratch_buffer.h"
14#include "common/typed_address.h" 14#include "common/typed_address.h"
15#include "core/guest_memory.h"
15#include "core/hle/result.h" 16#include "core/hle/result.h"
16 17
17namespace Common { 18namespace Common {
@@ -486,10 +487,10 @@ public:
486 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 487 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
487 488
488 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 489 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
489 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); 490
490 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); 491 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
492
491 bool InvalidateSeparateHeap(void* fault_address); 493 bool InvalidateSeparateHeap(void* fault_address);
492 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
493 494
494private: 495private:
495 Core::System& system; 496 Core::System& system;
@@ -498,209 +499,9 @@ private:
498 std::unique_ptr<Impl> impl; 499 std::unique_ptr<Impl> impl;
499}; 500};
500 501
501enum GuestMemoryFlags : u32 {
502 Read = 1 << 0,
503 Write = 1 << 1,
504 Safe = 1 << 2,
505 Cached = 1 << 3,
506
507 SafeRead = Read | Safe,
508 SafeWrite = Write | Safe,
509 SafeReadWrite = SafeRead | SafeWrite,
510 SafeReadCachedWrite = SafeReadWrite | Cached,
511
512 UnsafeRead = Read,
513 UnsafeWrite = Write,
514 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
515 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
516};
517
518namespace {
519template <typename M, typename T, GuestMemoryFlags FLAGS>
520class GuestMemory {
521 using iterator = T*;
522 using const_iterator = const T*;
523 using value_type = T;
524 using element_type = T;
525 using iterator_category = std::contiguous_iterator_tag;
526
527public:
528 GuestMemory() = delete;
529 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
530 Common::ScratchBuffer<T>* backup = nullptr)
531 : m_memory{memory}, m_addr{addr}, m_size{size} {
532 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
533 if constexpr (FLAGS & GuestMemoryFlags::Read) {
534 Read(addr, size, backup);
535 }
536 }
537
538 ~GuestMemory() = default;
539
540 T* data() noexcept {
541 return m_data_span.data();
542 }
543
544 const T* data() const noexcept {
545 return m_data_span.data();
546 }
547
548 size_t size() const noexcept {
549 return m_size;
550 }
551
552 size_t size_bytes() const noexcept {
553 return this->size() * sizeof(T);
554 }
555
556 [[nodiscard]] T* begin() noexcept {
557 return this->data();
558 }
559
560 [[nodiscard]] const T* begin() const noexcept {
561 return this->data();
562 }
563
564 [[nodiscard]] T* end() noexcept {
565 return this->data() + this->size();
566 }
567
568 [[nodiscard]] const T* end() const noexcept {
569 return this->data() + this->size();
570 }
571
572 T& operator[](size_t index) noexcept {
573 return m_data_span[index];
574 }
575
576 const T& operator[](size_t index) const noexcept {
577 return m_data_span[index];
578 }
579
580 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
581 m_addr = addr;
582 m_size = size;
583 m_addr_changed = true;
584 }
585
586 std::span<T> Read(u64 addr, std::size_t size,
587 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
588 m_addr = addr;
589 m_size = size;
590 if (m_size == 0) {
591 m_is_data_copy = true;
592 return {};
593 }
594
595 if (this->TrySetSpan()) {
596 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
597 m_memory.FlushRegion(m_addr, this->size_bytes());
598 }
599 } else {
600 if (backup) {
601 backup->resize_destructive(this->size());
602 m_data_span = *backup;
603 } else {
604 m_data_copy.resize(this->size());
605 m_data_span = std::span(m_data_copy);
606 }
607 m_is_data_copy = true;
608 m_span_valid = true;
609 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
610 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
611 } else {
612 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
613 }
614 }
615 return m_data_span;
616 }
617
618 void Write(std::span<T> write_data) noexcept {
619 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
620 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
621 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
622 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
623 } else {
624 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
625 }
626 }
627
628 bool TrySetSpan() noexcept {
629 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
630 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
631 m_span_valid = true;
632 return true;
633 }
634 return false;
635 }
636
637protected:
638 bool IsDataCopy() const noexcept {
639 return m_is_data_copy;
640 }
641
642 bool AddressChanged() const noexcept {
643 return m_addr_changed;
644 }
645
646 M& m_memory;
647 u64 m_addr{};
648 size_t m_size{};
649 std::span<T> m_data_span{};
650 std::vector<T> m_data_copy{};
651 bool m_span_valid{false};
652 bool m_is_data_copy{false};
653 bool m_addr_changed{false};
654};
655
656template <typename M, typename T, GuestMemoryFlags FLAGS>
657class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
658public:
659 GuestMemoryScoped() = delete;
660 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
661 Common::ScratchBuffer<T>* backup = nullptr)
662 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
663 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
664 if (!this->TrySetSpan()) {
665 if (backup) {
666 this->m_data_span = *backup;
667 this->m_span_valid = true;
668 this->m_is_data_copy = true;
669 }
670 }
671 }
672 }
673
674 ~GuestMemoryScoped() {
675 if constexpr (FLAGS & GuestMemoryFlags::Write) {
676 if (this->size() == 0) [[unlikely]] {
677 return;
678 }
679
680 if (this->AddressChanged() || this->IsDataCopy()) {
681 ASSERT(this->m_span_valid);
682 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
683 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
684 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
685 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
686 } else {
687 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
688 }
689 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
690 (FLAGS & GuestMemoryFlags::Cached)) {
691 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
692 }
693 }
694 }
695};
696} // namespace
697
698template <typename T, GuestMemoryFlags FLAGS> 502template <typename T, GuestMemoryFlags FLAGS>
699using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; 503using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>;
700template <typename T, GuestMemoryFlags FLAGS> 504template <typename T, GuestMemoryFlags FLAGS>
701using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; 505using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>;
702template <typename T, GuestMemoryFlags FLAGS> 506
703using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
704template <typename T, GuestMemoryFlags FLAGS>
705using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
706} // namespace Core::Memory 507} // namespace Core::Memory