diff options
| author | 2023-12-29 07:53:52 +0100 | |
|---|---|---|
| committer | 2024-01-18 21:12:30 -0500 | |
| commit | 34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718 (patch) | |
| tree | afa899bb63e97df9c80e5de49395495143799dbd | |
| parent | SMMU: Initial adaptation to video_core. (diff) | |
| download | yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.gz yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.xz yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.zip | |
SMMU: Implement physical memory mirroring
| -rw-r--r-- | src/core/device_memory_manager.h | 27 | ||||
| -rw-r--r-- | src/core/device_memory_manager.inc | 154 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/container.cpp | 6 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/nvmap.cpp | 7 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/nvmap.h | 6 | ||||
| -rw-r--r-- | src/core/memory.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 |
8 files changed, 226 insertions, 40 deletions
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 1a63cbd09..7c7726348 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h | |||
| @@ -10,8 +10,10 @@ | |||
| 10 | #include <mutex> | 10 | #include <mutex> |
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/scratch_buffer.h" | ||
| 13 | #include "common/virtual_buffer.h" | 14 | #include "common/virtual_buffer.h" |
| 14 | 15 | ||
| 16 | |||
| 15 | namespace Core { | 17 | namespace Core { |
| 16 | 18 | ||
| 17 | class DeviceMemory; | 19 | class DeviceMemory; |
| @@ -49,9 +51,25 @@ public: | |||
| 49 | template <typename T> | 51 | template <typename T> |
| 50 | const T* GetPointer(DAddr address) const; | 52 | const T* GetPointer(DAddr address) const; |
| 51 | 53 | ||
| 52 | DAddr GetAddressFromPAddr(PAddr address) const { | 54 | template <typename Func> |
| 55 | void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) { | ||
| 53 | DAddr subbits = static_cast<DAddr>(address & page_mask); | 56 | DAddr subbits = static_cast<DAddr>(address & page_mask); |
| 54 | return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; | 57 | const u32 base = compressed_device_addr[(address >> page_bits)]; |
| 58 | if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] { | ||
| 59 | const DAddr d_address = static_cast<DAddr>(base << page_bits) + subbits; | ||
| 60 | operation(d_address); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | InnerGatherDeviceAddresses(buffer, address); | ||
| 64 | for (u32 value : buffer) { | ||
| 65 | operation(static_cast<DAddr>(value << page_bits) + subbits); | ||
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | template <typename Func> | ||
| 70 | void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) { | ||
| 71 | PAddr address = GetRawPhysicalAddr<u8>(p); | ||
| 72 | ApplyOpOnPAddr(address, buffer, operation); | ||
| 55 | } | 73 | } |
| 56 | 74 | ||
| 57 | PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { | 75 | PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { |
| @@ -98,6 +116,9 @@ private: | |||
| 98 | static constexpr size_t page_size = 1ULL << page_bits; | 116 | static constexpr size_t page_size = 1ULL << page_bits; |
| 99 | static constexpr size_t page_mask = page_size - 1ULL; | 117 | static constexpr size_t page_mask = page_size - 1ULL; |
| 100 | static constexpr u32 physical_address_base = 1U << page_bits; | 118 | static constexpr u32 physical_address_base = 1U << page_bits; |
| 119 | static constexpr u32 MULTI_FLAG_BITS = 31; | ||
| 120 | static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS; | ||
| 121 | static constexpr u32 MULTI_MASK = ~MULTI_FLAG; | ||
| 101 | 122 | ||
| 102 | template <typename T> | 123 | template <typename T> |
| 103 | T* GetPointerFromRaw(PAddr addr) { | 124 | T* GetPointerFromRaw(PAddr addr) { |
| @@ -117,6 +138,8 @@ private: | |||
| 117 | void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, | 138 | void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, |
| 118 | auto increment); | 139 | auto increment); |
| 119 | 140 | ||
| 141 | void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address); | ||
| 142 | |||
| 120 | std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl; | 143 | std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl; |
| 121 | 144 | ||
| 122 | const uintptr_t physical_base; | 145 | const uintptr_t physical_base; |
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 8c5f82d31..4fb3ad3ab 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc | |||
| @@ -18,10 +18,117 @@ | |||
| 18 | 18 | ||
| 19 | namespace Core { | 19 | namespace Core { |
| 20 | 20 | ||
| 21 | namespace { | ||
| 22 | |||
| 23 | class PhysicalAddressContainer { | ||
| 24 | public: | ||
| 25 | PhysicalAddressContainer() = default; | ||
| 26 | ~PhysicalAddressContainer() = default; | ||
| 27 | |||
| 28 | void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) { | ||
| 29 | buffer.resize(8); | ||
| 30 | buffer.resize(0); | ||
| 31 | size_t index = 0; | ||
| 32 | const auto add_value = [&](u32 value) { | ||
| 33 | buffer[index] = value; | ||
| 34 | index++; | ||
| 35 | buffer.resize(index); | ||
| 36 | }; | ||
| 37 | |||
| 38 | u32 iter_entry = start_entry; | ||
| 39 | Entry* current = &storage[iter_entry - 1]; | ||
| 40 | add_value(current->value); | ||
| 41 | while (current->next_entry != 0) { | ||
| 42 | iter_entry = current->next_entry; | ||
| 43 | current = &storage[iter_entry - 1]; | ||
| 44 | add_value(current->value); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | u32 Register(u32 value) { | ||
| 49 | return RegisterImplementation(value); | ||
| 50 | } | ||
| 51 | |||
| 52 | void Register(u32 value, u32 start_entry) { | ||
| 53 | auto entry_id = RegisterImplementation(value); | ||
| 54 | u32 iter_entry = start_entry; | ||
| 55 | Entry* current = &storage[iter_entry - 1]; | ||
| 56 | while (current->next_entry != 0) { | ||
| 57 | iter_entry = current->next_entry; | ||
| 58 | current = &storage[iter_entry - 1]; | ||
| 59 | } | ||
| 60 | current->next_entry = entry_id; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::pair<bool, u32> Unregister(u32 value, u32 start_entry) { | ||
| 64 | u32 iter_entry = start_entry; | ||
| 65 | Entry* previous{}; | ||
| 66 | Entry* current = &storage[iter_entry - 1]; | ||
| 67 | Entry* next{}; | ||
| 68 | bool more_than_one_remaining = false; | ||
| 69 | u32 result_start{start_entry}; | ||
| 70 | size_t count = 0; | ||
| 71 | while (current->value != value) { | ||
| 72 | count++; | ||
| 73 | previous = current; | ||
| 74 | iter_entry = current->next_entry; | ||
| 75 | current = &storage[iter_entry - 1]; | ||
| 76 | } | ||
| 77 | // Find next | ||
| 78 | u32 next_entry = current->next_entry; | ||
| 79 | if (next_entry != 0) { | ||
| 80 | next = &storage[next_entry - 1]; | ||
| 81 | more_than_one_remaining = next->next_entry != 0; | ||
| 82 | } | ||
| 83 | if (previous) { | ||
| 84 | previous->next_entry = next_entry; | ||
| 85 | } else { | ||
| 86 | result_start = next_entry; | ||
| 87 | } | ||
| 88 | free_entries.emplace_back(iter_entry); | ||
| 89 | return std::make_pair(more_than_one_remaining || count > 1, result_start); | ||
| 90 | } | ||
| 91 | |||
| 92 | u32 ReleaseEntry(u32 start_entry) { | ||
| 93 | Entry* current = &storage[start_entry - 1]; | ||
| 94 | free_entries.emplace_back(start_entry); | ||
| 95 | return current->value; | ||
| 96 | } | ||
| 97 | |||
| 98 | private: | ||
| 99 | u32 RegisterImplementation(u32 value) { | ||
| 100 | auto entry_id = GetNewEntry(); | ||
| 101 | auto& entry = storage[entry_id - 1]; | ||
| 102 | entry.next_entry = 0; | ||
| 103 | entry.value = value; | ||
| 104 | return entry_id; | ||
| 105 | } | ||
| 106 | u32 GetNewEntry() { | ||
| 107 | if (!free_entries.empty()) { | ||
| 108 | u32 result = free_entries.front(); | ||
| 109 | free_entries.pop_front(); | ||
| 110 | return result; | ||
| 111 | } | ||
| 112 | storage.emplace_back(); | ||
| 113 | u32 new_entry = static_cast<u32>(storage.size()); | ||
| 114 | return new_entry; | ||
| 115 | } | ||
| 116 | |||
| 117 | struct Entry { | ||
| 118 | u32 next_entry{}; | ||
| 119 | u32 value{}; | ||
| 120 | }; | ||
| 121 | |||
| 122 | std::deque<Entry> storage; | ||
| 123 | std::deque<u32> free_entries; | ||
| 124 | }; | ||
| 125 | |||
| 21 | struct EmptyAllocator { | 126 | struct EmptyAllocator { |
| 22 | EmptyAllocator([[maybe_unused]] DAddr address) {} | 127 | EmptyAllocator([[maybe_unused]] DAddr address) {} |
| 23 | }; | 128 | }; |
| 24 | 129 | ||
| 130 | } // namespace | ||
| 131 | |||
| 25 | template <typename DTraits> | 132 | template <typename DTraits> |
| 26 | struct DeviceMemoryManagerAllocator { | 133 | struct DeviceMemoryManagerAllocator { |
| 27 | static constexpr bool supports_pinning = DTraits::supports_pinning; | 134 | static constexpr bool supports_pinning = DTraits::supports_pinning; |
| @@ -38,6 +145,7 @@ struct DeviceMemoryManagerAllocator { | |||
| 38 | std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator> | 145 | std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator> |
| 39 | pin_allocator; | 146 | pin_allocator; |
| 40 | Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; | 147 | Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; |
| 148 | PhysicalAddressContainer multi_dev_address; | ||
| 41 | 149 | ||
| 42 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer | 150 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer |
| 43 | template <bool pin_area> | 151 | template <bool pin_area> |
| @@ -109,6 +217,9 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo | |||
| 109 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | 217 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { |
| 110 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | 218 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |
| 111 | cached_pages = std::make_unique<CachedPages>(); | 219 | cached_pages = std::make_unique<CachedPages>(); |
| 220 | for (size_t i = 0; i < 1ULL << (33 - 12); i++) { | ||
| 221 | compressed_device_addr[i] = 0; | ||
| 222 | } | ||
| 112 | } | 223 | } |
| 113 | 224 | ||
| 114 | template <typename Traits> | 225 | template <typename Traits> |
| @@ -155,8 +266,19 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
| 155 | } | 266 | } |
| 156 | auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; | 267 | auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; |
| 157 | compressed_physical_ptr[start_page_d + i] = phys_addr; | 268 | compressed_physical_ptr[start_page_d + i] = phys_addr; |
| 158 | compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | ||
| 159 | InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | 269 | InsertCPUBacking(start_page_d + i, new_vaddress, process_id); |
| 270 | const u32 base_dev = compressed_device_addr[phys_addr - 1U]; | ||
| 271 | const u32 new_dev = static_cast<u32>(start_page_d + i); | ||
| 272 | if (base_dev == 0) [[likely]] { | ||
| 273 | compressed_device_addr[phys_addr - 1U] = new_dev; | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | u32 start_id = base_dev & MULTI_MASK; | ||
| 277 | if ((base_dev >> MULTI_FLAG_BITS) == 0) { | ||
| 278 | start_id = impl->multi_dev_address.Register(base_dev); | ||
| 279 | compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id; | ||
| 280 | } | ||
| 281 | impl->multi_dev_address.Register(new_dev, start_id); | ||
| 160 | } | 282 | } |
| 161 | } | 283 | } |
| 162 | 284 | ||
| @@ -170,13 +292,39 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | |||
| 170 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; | 292 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; |
| 171 | compressed_physical_ptr[start_page_d + i] = 0; | 293 | compressed_physical_ptr[start_page_d + i] = 0; |
| 172 | cpu_backing_address[start_page_d + i] = 0; | 294 | cpu_backing_address[start_page_d + i] = 0; |
| 173 | if (phys_addr != 0) { | 295 | if (phys_addr != 0) [[likely]] { |
| 174 | compressed_device_addr[phys_addr - 1] = 0; | 296 | const u32 base_dev = compressed_device_addr[phys_addr - 1U]; |
| 297 | if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] { | ||
| 298 | compressed_device_addr[phys_addr - 1] = 0; | ||
| 299 | continue; | ||
| 300 | } | ||
| 301 | const auto [more_entries, new_start] = impl->multi_dev_address.Unregister( | ||
| 302 | static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK); | ||
| 303 | if (!more_entries) { | ||
| 304 | compressed_device_addr[phys_addr - 1] = | ||
| 305 | impl->multi_dev_address.ReleaseEntry(new_start); | ||
| 306 | continue; | ||
| 307 | } | ||
| 308 | compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG; | ||
| 175 | } | 309 | } |
| 176 | } | 310 | } |
| 177 | } | 311 | } |
| 178 | 312 | ||
| 179 | template <typename Traits> | 313 | template <typename Traits> |
| 314 | void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, | ||
| 315 | PAddr address) { | ||
| 316 | size_t phys_addr = address >> page_bits; | ||
| 317 | std::scoped_lock lk(mapping_guard); | ||
| 318 | u32 backing = compressed_device_addr[phys_addr]; | ||
| 319 | if ((backing >> MULTI_FLAG_BITS) != 0) { | ||
| 320 | impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer); | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | buffer.resize(1); | ||
| 324 | buffer[0] = backing; | ||
| 325 | } | ||
| 326 | |||
| 327 | template <typename Traits> | ||
| 180 | template <typename T> | 328 | template <typename T> |
| 181 | T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) { | 329 | T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) { |
| 182 | const size_t index = address >> Memory::YUZU_PAGEBITS; | 330 | const size_t index = address >> Memory::YUZU_PAGEBITS; |
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index 7c2231fe6..e12ce05c1 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp | |||
| @@ -16,8 +16,8 @@ | |||
| 16 | namespace Service::Nvidia::NvCore { | 16 | namespace Service::Nvidia::NvCore { |
| 17 | 17 | ||
| 18 | struct ContainerImpl { | 18 | struct ContainerImpl { |
| 19 | explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) | 19 | explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_) |
| 20 | : host1x{host1x_}, file{host1x_}, manager{host1x_}, device_file_data{} {} | 20 | : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {} |
| 21 | Tegra::Host1x::Host1x& host1x; | 21 | Tegra::Host1x::Host1x& host1x; |
| 22 | NvMap file; | 22 | NvMap file; |
| 23 | SyncpointManager manager; | 23 | SyncpointManager manager; |
| @@ -29,7 +29,7 @@ struct ContainerImpl { | |||
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | Container::Container(Tegra::Host1x::Host1x& host1x_) { | 31 | Container::Container(Tegra::Host1x::Host1x& host1x_) { |
| 32 | impl = std::make_unique<ContainerImpl>(host1x_); | 32 | impl = std::make_unique<ContainerImpl>(*this, host1x_); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | Container::~Container() = default; | 35 | Container::~Container() = default; |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 7879c6f04..e4168a37c 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/hle/service/nvdrv/core/container.h" | ||
| 10 | #include "core/hle/service/nvdrv/core/nvmap.h" | 11 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 11 | #include "core/memory.h" | 12 | #include "core/memory.h" |
| 12 | #include "video_core/host1x/host1x.h" | 13 | #include "video_core/host1x/host1x.h" |
| @@ -64,7 +65,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) { | |||
| 64 | return NvResult::Success; | 65 | return NvResult::Success; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} | 68 | NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {} |
| 68 | 69 | ||
| 69 | void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { | 70 | void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { |
| 70 | std::scoped_lock lock(handles_lock); | 71 | std::scoped_lock lock(handles_lock); |
| @@ -160,6 +161,8 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are | |||
| 160 | // If not then allocate some space and map it | 161 | // If not then allocate some space and map it |
| 161 | DAddr address{}; | 162 | DAddr address{}; |
| 162 | auto& smmu = host1x.MemoryManager(); | 163 | auto& smmu = host1x.MemoryManager(); |
| 164 | auto* session = core.GetSession(session_id); | ||
| 165 | |||
| 163 | auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); | 166 | auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); |
| 164 | //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); | 167 | //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); |
| 165 | while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { | 168 | while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { |
| @@ -179,7 +182,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are | |||
| 179 | handle_description->d_address = address; | 182 | handle_description->d_address = address; |
| 180 | 183 | ||
| 181 | smmu.Map(address, handle_description->address, handle_description->aligned_size, | 184 | smmu.Map(address, handle_description->address, handle_description->aligned_size, |
| 182 | session_id); | 185 | session->smmu_id); |
| 183 | } | 186 | } |
| 184 | 187 | ||
| 185 | handle_description->pins++; | 188 | handle_description->pins++; |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index e9e9e8b5b..7dd6d26c3 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h | |||
| @@ -25,6 +25,8 @@ class Host1x; | |||
| 25 | } // namespace Tegra | 25 | } // namespace Tegra |
| 26 | 26 | ||
| 27 | namespace Service::Nvidia::NvCore { | 27 | namespace Service::Nvidia::NvCore { |
| 28 | |||
| 29 | class Container; | ||
| 28 | /** | 30 | /** |
| 29 | * @brief The nvmap core class holds the global state for nvmap and provides methods to manage | 31 | * @brief The nvmap core class holds the global state for nvmap and provides methods to manage |
| 30 | * handles | 32 | * handles |
| @@ -109,7 +111,7 @@ public: | |||
| 109 | bool can_unlock; //!< If the address region is ready to be unlocked | 111 | bool can_unlock; //!< If the address region is ready to be unlocked |
| 110 | }; | 112 | }; |
| 111 | 113 | ||
| 112 | explicit NvMap(Tegra::Host1x::Host1x& host1x); | 114 | explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x); |
| 113 | 115 | ||
| 114 | /** | 116 | /** |
| 115 | * @brief Creates an unallocated handle of the given size | 117 | * @brief Creates an unallocated handle of the given size |
| @@ -173,5 +175,7 @@ private: | |||
| 173 | * @return If the handle was removed from the map | 175 | * @return If the handle was removed from the map |
| 174 | */ | 176 | */ |
| 175 | bool TryRemoveHandle(const Handle& handle_description); | 177 | bool TryRemoveHandle(const Handle& handle_description); |
| 178 | |||
| 179 | Container& core; | ||
| 176 | }; | 180 | }; |
| 177 | } // namespace Service::Nvidia::NvCore | 181 | } // namespace Service::Nvidia::NvCore |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 609e775ae..f126840cb 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -44,7 +44,8 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA | |||
| 44 | // from outside classes. This also allows modification to the internals of the memory | 44 | // from outside classes. This also allows modification to the internals of the memory |
| 45 | // subsystem without needing to rebuild all files that make use of the memory interface. | 45 | // subsystem without needing to rebuild all files that make use of the memory interface. |
| 46 | struct Memory::Impl { | 46 | struct Memory::Impl { |
| 47 | explicit Impl(Core::System& system_) : system{system_} {} | 47 | explicit Impl(Core::System& system_) |
| 48 | : system{system_} {} | ||
| 48 | 49 | ||
| 49 | void SetCurrentPageTable(Kernel::KProcess& process) { | 50 | void SetCurrentPageTable(Kernel::KProcess& process) { |
| 50 | current_page_table = &process.GetPageTable().GetImpl(); | 51 | current_page_table = &process.GetPageTable().GetImpl(); |
| @@ -817,26 +818,31 @@ struct Memory::Impl { | |||
| 817 | void HandleRasterizerDownload(VAddr v_address, size_t size) { | 818 | void HandleRasterizerDownload(VAddr v_address, size_t size) { |
| 818 | const auto* p = GetPointerImpl( | 819 | const auto* p = GetPointerImpl( |
| 819 | v_address, []() {}, []() {}); | 820 | v_address, []() {}, []() {}); |
| 820 | auto& gpu_device_memory = system.Host1x().MemoryManager(); | 821 | if (!gpu_device_memory) [[unlikely]] { |
| 821 | DAddr address = | 822 | gpu_device_memory = &system.Host1x().MemoryManager(); |
| 822 | gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); | 823 | } |
| 823 | const size_t core = system.GetCurrentHostThreadID(); | 824 | const size_t core = system.GetCurrentHostThreadID(); |
| 824 | auto& current_area = rasterizer_read_areas[core]; | 825 | auto& current_area = rasterizer_read_areas[core]; |
| 825 | const DAddr end_address = address + size; | 826 | gpu_device_memory->ApplyOpOnPointer( |
| 826 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 827 | p, scratch_buffers[core], [&](DAddr address) { |
| 827 | [[likely]] { | 828 | const DAddr end_address = address + size; |
| 828 | return; | 829 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 829 | } | 830 | [[likely]] { |
| 830 | current_area = system.GPU().OnCPURead(address, size); | 831 | return; |
| 832 | } | ||
| 833 | current_area = system.GPU().OnCPURead(address, size); | ||
| 834 | }); | ||
| 831 | } | 835 | } |
| 832 | 836 | ||
| 833 | void HandleRasterizerWrite(VAddr v_address, size_t size) { | 837 | void HandleRasterizerWrite(VAddr v_address, size_t size) { |
| 834 | const auto* p = GetPointerImpl( | 838 | const auto* p = GetPointerImpl( |
| 835 | v_address, []() {}, []() {}); | 839 | v_address, []() {}, []() {}); |
| 836 | PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p); | ||
| 837 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | 840 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 838 | const size_t core = std::min(system.GetCurrentHostThreadID(), | 841 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 839 | sys_core); // any other calls threads go to syscore. | 842 | sys_core); // any other calls threads go to syscore. |
| 843 | if (!gpu_device_memory) [[unlikely]] { | ||
| 844 | gpu_device_memory = &system.Host1x().MemoryManager(); | ||
| 845 | } | ||
| 840 | // Guard on sys_core; | 846 | // Guard on sys_core; |
| 841 | if (core == sys_core) [[unlikely]] { | 847 | if (core == sys_core) [[unlikely]] { |
| 842 | sys_core_guard.lock(); | 848 | sys_core_guard.lock(); |
| @@ -846,17 +852,20 @@ struct Memory::Impl { | |||
| 846 | sys_core_guard.unlock(); | 852 | sys_core_guard.unlock(); |
| 847 | } | 853 | } |
| 848 | }); | 854 | }); |
| 849 | auto& current_area = rasterizer_write_areas[core]; | 855 | gpu_device_memory->ApplyOpOnPointer( |
| 850 | PAddr subaddress = address >> YUZU_PAGEBITS; | 856 | p, scratch_buffers[core], [&](DAddr address) { |
| 851 | bool do_collection = current_area.last_address == subaddress; | 857 | auto& current_area = rasterizer_write_areas[core]; |
| 852 | if (!do_collection) [[unlikely]] { | 858 | PAddr subaddress = address >> YUZU_PAGEBITS; |
| 853 | do_collection = system.GPU().OnCPUWrite(address, size); | 859 | bool do_collection = current_area.last_address == subaddress; |
| 854 | if (!do_collection) { | 860 | if (!do_collection) [[unlikely]] { |
| 855 | return; | 861 | do_collection = system.GPU().OnCPUWrite(address, size); |
| 862 | if (!do_collection) { | ||
| 863 | return; | ||
| 864 | } | ||
| 865 | current_area.last_address = subaddress; | ||
| 856 | } | 866 | } |
| 857 | current_area.last_address = subaddress; | 867 | gpu_dirty_managers[core].Collect(address, size); |
| 858 | } | 868 | }); |
| 859 | gpu_dirty_managers[core].Collect(address, size); | ||
| 860 | } | 869 | } |
| 861 | 870 | ||
| 862 | struct GPUDirtyState { | 871 | struct GPUDirtyState { |
| @@ -872,10 +881,12 @@ struct Memory::Impl { | |||
| 872 | } | 881 | } |
| 873 | 882 | ||
| 874 | Core::System& system; | 883 | Core::System& system; |
| 884 | Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; | ||
| 875 | Common::PageTable* current_page_table = nullptr; | 885 | Common::PageTable* current_page_table = nullptr; |
| 876 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | 886 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> |
| 877 | rasterizer_read_areas{}; | 887 | rasterizer_read_areas{}; |
| 878 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | 888 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; |
| 889 | std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; | ||
| 879 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | 890 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; |
| 880 | std::mutex sys_core_guard; | 891 | std::mutex sys_core_guard; |
| 881 | 892 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ca31e2fbd..71b748c74 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -554,9 +554,8 @@ void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::Cache | |||
| 554 | } | 554 | } |
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { | 557 | bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) { |
| 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 559 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 560 | if (addr == 0 || size == 0) { | 559 | if (addr == 0 || size == 0) { |
| 561 | return false; | 560 | return false; |
| 562 | } | 561 | } |
| @@ -577,9 +576,9 @@ bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { | |||
| 577 | return false; | 576 | return false; |
| 578 | } | 577 | } |
| 579 | 578 | ||
| 580 | void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) { | 579 | void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) { |
| 581 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 580 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 582 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | 581 | |
| 583 | if (addr == 0 || size == 0) { | 582 | if (addr == 0 || size == 0) { |
| 584 | return; | 583 | return; |
| 585 | } | 584 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index efcc349a0..7db131985 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -602,8 +602,7 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::s | |||
| 602 | } | 602 | } |
| 603 | } | 603 | } |
| 604 | 604 | ||
| 605 | bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { | 605 | bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) { |
| 606 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 607 | if (addr == 0 || size == 0) { | 606 | if (addr == 0 || size == 0) { |
| 608 | return false; | 607 | return false; |
| 609 | } | 608 | } |
| @@ -624,8 +623,7 @@ bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { | |||
| 624 | return false; | 623 | return false; |
| 625 | } | 624 | } |
| 626 | 625 | ||
| 627 | void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) { | 626 | void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) { |
| 628 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 629 | if (addr == 0 || size == 0) { | 627 | if (addr == 0 || size == 0) { |
| 630 | return; | 628 | return; |
| 631 | } | 629 | } |