9 files changed, 329 insertions, 27 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index ca54eb6c6..0f713ead1 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -611,6 +611,8 @@ add_library(core STATIC
    hle/service/ns/pdm_qry.h
    hle/service/nvdrv/core/container.cpp
    hle/service/nvdrv/core/container.h
+    hle/service/nvdrv/core/heap_mapper.cpp
+    hle/service/nvdrv/core/heap_mapper.h
    hle/service/nvdrv/core/nvmap.cpp
    hle/service/nvdrv/core/nvmap.h
    hle/service/nvdrv/core/syncpoint_manager.cpp
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index b3a5f3d8b..138eb5017 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -20,10 +20,10 @@ namespace Core {
 namespace {
-class PhysicalAddressContainer {
+class MultiAddressContainer {
 public:
-    PhysicalAddressContainer() = default;
+    MultiAddressContainer() = default;
-    ~PhysicalAddressContainer() = default;
+    ~MultiAddressContainer() = default;
    void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
        buffer.resize(8);
@@ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator {
    std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
        pin_allocator;
    Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
-    PhysicalAddressContainer multi_dev_address;
+    MultiAddressContainer multi_dev_address;
    /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
    template <bool pin_area>
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index e12ce05c1..ba7eb9e24 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -8,6 +8,7 @@
 #include "core/hle/kernel/k_process.h"
 #include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/core/syncpoint_manager.h"
 #include "core/memory.h"
@@ -36,6 +37,14 @@ Container::~Container() = default;
 size_t Container::OpenSession(Kernel::KProcess* process) {
    std::scoped_lock lk(impl->session_guard);
+    for (auto& session : impl->sessions) {
+        if (!session.is_active) {
+            continue;
+        }
+        if (session.process == process) {
+            return session.id;
+        }
+    }
    size_t new_id{};
    auto* memory_interface = &process->GetMemory();
    auto& smmu = impl->host1x.MemoryManager();
@@ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) {
        impl->sessions.emplace_back(new_id, process, smmu_id);
        new_id = impl->new_ids++;
    }
-    LOG_CRITICAL(Debug, "Created Session {}", new_id);
+    auto& session = impl->sessions[new_id];
+    session.is_active = true;
+    // Optimization
+    if (process->IsApplication()) {
+        auto& page_table = process->GetPageTable().GetBasePageTable();
+        auto heap_start = page_table.GetHeapRegionStart();
+        Kernel::KProcessAddress cur_addr = heap_start;
+        size_t region_size = 0;
+        VAddr region_start = 0;
+        while (true) {
+            Kernel::KMemoryInfo mem_info{};
+            Kernel::Svc::PageInfo page_info{};
+            R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
+                                          cur_addr));
+            auto svc_mem_info = mem_info.GetSvcMemoryInfo();
+            // check if this memory block is heap
+            if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
+                if (svc_mem_info.size > region_size) {
+                    region_size = svc_mem_info.size;
+                    region_start = svc_mem_info.base_address;
+                }
+            }
+            // Check if we're done.
+            const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
+            if (next_address <= GetInteger(cur_addr)) {
+                break;
+            }
+            cur_addr = next_address;
+        }
+        session.has_preallocated_area = false;
+        auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0;
+        if (start_region != 0) {
+            session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
+                                                          smmu_id, impl->host1x);
+            session.has_preallocated_area = true;
+            LOG_CRITICAL(Debug, "Preallocation created!");
+        }
+    }
    return new_id;
 }
 void Container::CloseSession(size_t id) {
    std::scoped_lock lk(impl->session_guard);
+    auto& session = impl->sessions[id];
    auto& smmu = impl->host1x.MemoryManager();
+    if (session.has_preallocated_area) {
+        const DAddr region_start = session.mapper->GetRegionStart();
+        const size_t region_size = session.mapper->GetRegionSize();
+        session.mapper.reset();
+        smmu.Free(region_start, region_size);
+        session.has_preallocated_area = false;
+    }
+    session.is_active = false;
    smmu.UnregisterProcess(impl->sessions[id].smmu_id);
    impl->id_pool.emplace_front(id);
-    LOG_CRITICAL(Debug, "Closed Session {}", id);
 }
 Session* Container::GetSession(size_t id) {
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index a1fd20199..86705cbc8 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -20,6 +20,7 @@ class Host1x;
 namespace Service::Nvidia::NvCore {
+class HeapMapper;
 class NvMap;
 class SyncpointManager;
@@ -29,6 +30,9 @@ struct Session {
    size_t id;
    Kernel::KProcess* process;
    size_t smmu_id;
+    bool has_preallocated_area{};
+    std::unique_ptr<HeapMapper> mapper{};
+    bool is_active{};
 };
 class Container {
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
new file mode 100644
index 000000000..59d993bc6
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
@@ -0,0 +1,172 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+#include <mutex>
+#include <boost/container/small_vector.hpp>
+#define BOOST_NO_MT
+#include <boost/pool/detail/mutex.hpp>
+#undef BOOST_NO_MT
+#include <boost/icl/interval.hpp>
+#include <boost/icl/interval_base_set.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <boost/icl/split_interval_map.hpp>
+#include <boost/pool/pool.hpp>
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/pool/poolfwd.hpp>
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
+#include "video_core/host1x/host1x.h"
+namespace boost {
+template <typename T>
+class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
+}
+namespace Service::Nvidia::NvCore {
+using IntervalCompare = std::less<DAddr>;
+using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
+using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
+using IntervalSet = boost::icl::interval_set<DAddr>;
+using IntervalType = typename IntervalSet::interval_type;
+template <typename Type>
+struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
+    // types
+    typedef counter_add_functor<Type> type;
+    typedef boost::icl::identity_based_inplace_combine<Type> base_type;
+    // public member functions
+    void operator()(Type& current, const Type& added) const {
+        current += added;
+        if (current < base_type::identity_element()) {
+            current = base_type::identity_element();
+        }
+    }
+    // public static functions
+    static void version(Type&){};
+};
+using OverlapCombine = counter_add_functor<int>;
+using OverlapSection = boost::icl::inter_section<int>;
+using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
+struct HeapMapper::HeapMapperInternal {
+    HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
+    ~HeapMapperInternal() = default;
+    template <typename Func>
+    void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
+                                 Func&& func) {
+        const DAddr start_address = cpu_addr;
+        const DAddr end_address = start_address + size;
+        const IntervalType search_interval{start_address, end_address};
+        auto it = current_range.lower_bound(search_interval);
+        if (it == current_range.end()) {
+            return;
+        }
+        auto end_it = current_range.upper_bound(search_interval);
+        for (; it != end_it; it++) {
+            auto& inter = it->first;
+            DAddr inter_addr_end = inter.upper();
+            DAddr inter_addr = inter.lower();
+            if (inter_addr_end > end_address) {
+                inter_addr_end = end_address;
+            }
+            if (inter_addr < start_address) {
+                inter_addr = start_address;
+            }
+            func(inter_addr, inter_addr_end, it->second);
+        }
+    }
+    void RemoveEachInOverlapCounter(OverlapCounter& current_range,
+                                    const IntervalType search_interval, int subtract_value) {
+        bool any_removals = false;
+        current_range.add(std::make_pair(search_interval, subtract_value));
+        do {
+            any_removals = false;
+            auto it = current_range.lower_bound(search_interval);
+            if (it == current_range.end()) {
+                return;
+            }
+            auto end_it = current_range.upper_bound(search_interval);
+            for (; it != end_it; it++) {
+                if (it->second <= 0) {
+                    any_removals = true;
+                    current_range.erase(it);
+                    break;
+                }
+            }
+        } while (any_removals);
+    }
+    IntervalSet base_set;
+    OverlapCounter mapping_overlaps;
+    Tegra::MaxwellDeviceMemoryManager& device_memory;
+    std::mutex guard;
+};
+HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
+                       Tegra::Host1x::Host1x& host1x)
+    : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} {
+    m_internal = std::make_unique<HeapMapperInternal>(host1x);
+}
+HeapMapper::~HeapMapper() {
+    m_internal->device_memory.Unmap(m_daddress, m_size);
+}
+DAddr HeapMapper::Map(VAddr start, size_t size) {
+    std::scoped_lock lk(m_internal->guard);
+    m_internal->base_set.clear();
+    const IntervalType interval{start, start + size};
+    m_internal->base_set.insert(interval);
+    m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){
+        const IntervalType other{start_addr, end_addr};
+        m_internal->base_set.subtract(other);
+    });
+    if (!m_internal->base_set.empty()) {
+        auto it = m_internal->base_set.begin();
+        auto end_it = m_internal->base_set.end();
+        for (; it != end_it; it++) {
+            const VAddr inter_addr_end = it->upper();
+            const VAddr inter_addr = it->lower();
+            const size_t offset = inter_addr - m_vaddress;
+            const size_t sub_size = inter_addr_end - inter_addr;
+            m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id);
+        }
+    }
+    m_internal->mapping_overlaps += std::make_pair(interval, 1);
+    m_internal->base_set.clear();
+    return m_daddress + (start - m_vaddress);
+}
+void HeapMapper::Unmap(VAddr start, size_t size) {
+    std::scoped_lock lk(m_internal->guard);
+    m_internal->base_set.clear();
+    m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) {
+        if (value <= 1) {
+            const IntervalType other{start_addr, end_addr};
+            m_internal->base_set.insert(other);
+        }
+    });
+    if (!m_internal->base_set.empty()) {
+        auto it = m_internal->base_set.begin();
+        auto end_it = m_internal->base_set.end();
+        for (; it != end_it; it++) {
+            const VAddr inter_addr_end = it->upper();
+            const VAddr inter_addr = it->lower();
+            const size_t offset = inter_addr - m_vaddress;
+            const size_t sub_size = inter_addr_end - inter_addr;
+            m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
+        }
+    }
+    const IntervalType to_remove{start, start + size};
+    m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
+    m_internal->base_set.clear();
+}
+} // namespace Service::Nvidia::NvCore
+\ No newline at end of file
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h
new file mode 100644
index 000000000..8b23638b8
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+#pragma once
+#include <memory>
+#include "common/common_types.h"
+namespace Tegra::Host1x {
+class Host1x;
+} // namespace Tegra::Host1x
+namespace Service::Nvidia::NvCore {
+class HeapMapper {
+public:
+    HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
+               Tegra::Host1x::Host1x& host1x);
+    ~HeapMapper();
+    bool IsInBounds(VAddr start, size_t size) const {
+        VAddr end = start + size;
+        return start >= m_vaddress && end <= (m_vaddress + m_size);
+    }
+    DAddr Map(VAddr start, size_t size);
+    void Unmap(VAddr start, size_t size);
+    DAddr GetRegionStart() const {
+        return m_daddress;
+    }
+    size_t GetRegionSize() const {
+        return m_size;
+    }
+private:
+    struct HeapMapperInternal;
+    VAddr m_vaddress;
+    DAddr m_daddress;
+    size_t m_size;
+    size_t m_smmu_id;
+    std::unique_ptr<HeapMapperInternal> m_internal;
+};
+} // namespace Service::Nvidia::NvCore
+\ No newline at end of file
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 0b2ddd980..023c070d9 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -8,10 +8,12 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/memory.h"
 #include "video_core/host1x/host1x.h"
 using Core::Memory::YUZU_PAGESIZE;
 namespace Service::Nvidia::NvCore {
@@ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) {
    }
    // Free and unmap the handle from the SMMU
-    auto& smmu = host1x.MemoryManager();
+    const size_t map_size = handle_description.aligned_size;
-    smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
+    if (!handle_description.in_heap) {
-    smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size));
+        auto& smmu = host1x.MemoryManager();
+        smmu.Unmap(handle_description.d_address, map_size);
+        smmu.Free(handle_description.d_address, static_cast<size_t>(map_size));
+        handle_description.d_address = 0;
+        return;
+    }
+    const VAddr vaddress = handle_description.address;
+    auto* session = core.GetSession(handle_description.session_id);
+    session->mapper->Unmap(vaddress, map_size);
    handle_description.d_address = 0;
+    handle_description.in_heap = false;
 }
 bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -188,24 +199,31 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
        DAddr address{};
        auto& smmu = host1x.MemoryManager();
        auto* session = core.GetSession(session_id);
-        while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {
+        const VAddr vaddress = handle_description->address;
-            // Free handles until the allocation succeeds
+        const size_t map_size = handle_description->aligned_size;
-            std::scoped_lock queueLock(unmap_queue_lock);
+        handle_description->session_id = session_id;
-            if (auto freeHandleDesc{unmap_queue.front()}) {
+        if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
-                // Handles in the unmap queue are guaranteed not to be pinned so don't bother
+            handle_description->d_address = session->mapper->Map(vaddress, map_size);
-                // checking if they are before unmapping
+            handle_description->in_heap = true;
-                std::scoped_lock freeLock(freeHandleDesc->mutex);
+        } else {
-                if (handle_description->d_address)
+            while ((address = smmu.Allocate(map_size)) == 0) {
-                    UnmapHandle(*freeHandleDesc);
+                // Free handles until the allocation succeeds
-            } else {
+                std::scoped_lock queueLock(unmap_queue_lock);
-                LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
+                if (auto freeHandleDesc{unmap_queue.front()}) {
+                    // Handles in the unmap queue are guaranteed not to be pinned so don't bother
+                    // checking if they are before unmapping
+                    std::scoped_lock freeLock(freeHandleDesc->mutex);
+                    if (handle_description->d_address)
+                        UnmapHandle(*freeHandleDesc);
+                } else {
+                    LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
+                }
            }
-        }
-        handle_description->d_address = address;
+            handle_description->d_address = address;
+            smmu.Map(address, vaddress, map_size, session->smmu_id);
-        smmu.Map(address, handle_description->address, handle_description->aligned_size,
+            handle_description->in_heap = false;
-                 session->smmu_id);
+        }
    }
    if (low_area_pin) {
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index 7dd6d26c3..4af61289e 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -70,6 +70,8 @@ public:
        u8 kind{};        //!< Used for memory compression
        bool allocated{}; //!< If the handle has been allocated with `Alloc`
+        bool in_heap{};
+        size_t session_id{};
        DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
                           //!< this can also be in the nvdrv tmem
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ad3b94f8..609704b33 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -34,8 +34,6 @@
 #include "video_core/renderer_base.h"
 #include "video_core/shader_notify.h"
-#pragma optimize("", off)
 namespace Tegra {
 struct GPU::Impl {

diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index ca54eb6c6..0f713ead1 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt
@@ -611,6 +611,8 @@ add_library(core STATIC
611	hle/service/ns/pdm_qry.h	611	hle/service/ns/pdm_qry.h
612	hle/service/nvdrv/core/container.cpp	612	hle/service/nvdrv/core/container.cpp
613	hle/service/nvdrv/core/container.h	613	hle/service/nvdrv/core/container.h
		614	hle/service/nvdrv/core/heap_mapper.cpp
		615	hle/service/nvdrv/core/heap_mapper.h
614	hle/service/nvdrv/core/nvmap.cpp	616	hle/service/nvdrv/core/nvmap.cpp
615	hle/service/nvdrv/core/nvmap.h	617	hle/service/nvdrv/core/nvmap.h
616	hle/service/nvdrv/core/syncpoint_manager.cpp	618	hle/service/nvdrv/core/syncpoint_manager.cpp


diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index b3a5f3d8b..138eb5017 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc
@@ -20,10 +20,10 @@ namespace Core {
20		20
21	namespace {	21	namespace {
22		22
23	class PhysicalAddressContainer {	23	class MultiAddressContainer {
24	public:	24	public:
25	PhysicalAddressContainer() = default;	25	MultiAddressContainer() = default;
26	~PhysicalAddressContainer() = default;	26	~MultiAddressContainer() = default;
27		27
28	void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {	28	void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
29	buffer.resize(8);	29	buffer.resize(8);
@@ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator {
145	std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>	145	std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
146	pin_allocator;	146	pin_allocator;
147	Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;	147	Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
148	PhysicalAddressContainer multi_dev_address;	148	MultiAddressContainer multi_dev_address;
149		149
150	/// Returns true when vaddr -> vaddr+size is fully contained in the buffer	150	/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
151	template <bool pin_area>	151	template <bool pin_area>


diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index e12ce05c1..ba7eb9e24 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -8,6 +8,7 @@
8		8
9	#include "core/hle/kernel/k_process.h"	9	#include "core/hle/kernel/k_process.h"
10	#include "core/hle/service/nvdrv/core/container.h"	10	#include "core/hle/service/nvdrv/core/container.h"
		11	#include "core/hle/service/nvdrv/core/heap_mapper.h"
11	#include "core/hle/service/nvdrv/core/nvmap.h"	12	#include "core/hle/service/nvdrv/core/nvmap.h"
12	#include "core/hle/service/nvdrv/core/syncpoint_manager.h"	13	#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
13	#include "core/memory.h"	14	#include "core/memory.h"
@@ -36,6 +37,14 @@ Container::~Container() = default;
36		37
37	size_t Container::OpenSession(Kernel::KProcess* process) {	38	size_t Container::OpenSession(Kernel::KProcess* process) {
38	std::scoped_lock lk(impl->session_guard);	39	std::scoped_lock lk(impl->session_guard);
		40	for (auto& session : impl->sessions) {
		41	if (!session.is_active) {
		42	continue;
		43	}
		44	if (session.process == process) {
		45	return session.id;
		46	}
		47	}
39	size_t new_id{};	48	size_t new_id{};
40	auto* memory_interface = &process->GetMemory();	49	auto* memory_interface = &process->GetMemory();
41	auto& smmu = impl->host1x.MemoryManager();	50	auto& smmu = impl->host1x.MemoryManager();
@@ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) {
48	impl->sessions.emplace_back(new_id, process, smmu_id);	57	impl->sessions.emplace_back(new_id, process, smmu_id);
49	new_id = impl->new_ids++;	58	new_id = impl->new_ids++;
50	}	59	}
51	LOG_CRITICAL(Debug, "Created Session {}", new_id);	60	auto& session = impl->sessions[new_id];
		61	session.is_active = true;
		62	// Optimization
		63	if (process->IsApplication()) {
		64	auto& page_table = process->GetPageTable().GetBasePageTable();
		65	auto heap_start = page_table.GetHeapRegionStart();
		66
		67	Kernel::KProcessAddress cur_addr = heap_start;
		68	size_t region_size = 0;
		69	VAddr region_start = 0;
		70	while (true) {
		71	Kernel::KMemoryInfo mem_info{};
		72	Kernel::Svc::PageInfo page_info{};
		73	R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
		74	cur_addr));
		75	auto svc_mem_info = mem_info.GetSvcMemoryInfo();
		76
		77	// check if this memory block is heap
		78	if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
		79	if (svc_mem_info.size > region_size) {
		80	region_size = svc_mem_info.size;
		81	region_start = svc_mem_info.base_address;
		82	}
		83	}
		84
		85	// Check if we're done.
		86	const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
		87	if (next_address <= GetInteger(cur_addr)) {
		88	break;
		89	}
		90
		91	cur_addr = next_address;
		92	}
		93	session.has_preallocated_area = false;
		94	auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0;
		95	if (start_region != 0) {
		96	session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
		97	smmu_id, impl->host1x);
		98	session.has_preallocated_area = true;
		99	LOG_CRITICAL(Debug, "Preallocation created!");
		100	}
		101	}
52	return new_id;	102	return new_id;
53	}	103	}
54		104
55	void Container::CloseSession(size_t id) {	105	void Container::CloseSession(size_t id) {
56	std::scoped_lock lk(impl->session_guard);	106	std::scoped_lock lk(impl->session_guard);
		107	auto& session = impl->sessions[id];
57	auto& smmu = impl->host1x.MemoryManager();	108	auto& smmu = impl->host1x.MemoryManager();
		109	if (session.has_preallocated_area) {
		110	const DAddr region_start = session.mapper->GetRegionStart();
		111	const size_t region_size = session.mapper->GetRegionSize();
		112	session.mapper.reset();
		113	smmu.Free(region_start, region_size);
		114	session.has_preallocated_area = false;
		115	}
		116	session.is_active = false;
58	smmu.UnregisterProcess(impl->sessions[id].smmu_id);	117	smmu.UnregisterProcess(impl->sessions[id].smmu_id);
59	impl->id_pool.emplace_front(id);	118	impl->id_pool.emplace_front(id);
60	LOG_CRITICAL(Debug, "Closed Session {}", id);
61	}	119	}
62		120
63	Session* Container::GetSession(size_t id) {	121	Session* Container::GetSession(size_t id) {


diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index a1fd20199..86705cbc8 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h
@@ -20,6 +20,7 @@ class Host1x;
20		20
21	namespace Service::Nvidia::NvCore {	21	namespace Service::Nvidia::NvCore {
22		22
		23	class HeapMapper;
23	class NvMap;	24	class NvMap;
24	class SyncpointManager;	25	class SyncpointManager;
25		26
@@ -29,6 +30,9 @@ struct Session {
29	size_t id;	30	size_t id;
30	Kernel::KProcess* process;	31	Kernel::KProcess* process;
31	size_t smmu_id;	32	size_t smmu_id;
		33	bool has_preallocated_area{};
		34	std::unique_ptr<HeapMapper> mapper{};
		35	bool is_active{};
32	};	36	};
33		37
34	class Container {	38	class Container {


diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp new file mode 100644 index 000000000..59d993bc6 --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
@@ -0,0 +1,172 @@
		1	// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
		2	// SPDX-License-Identifier: GPL-3.0-or-later
		3
		4	#include <mutex>
		5
		6	#include <boost/container/small_vector.hpp>
		7	#define BOOST_NO_MT
		8	#include <boost/pool/detail/mutex.hpp>
		9	#undef BOOST_NO_MT
		10	#include <boost/icl/interval.hpp>
		11	#include <boost/icl/interval_base_set.hpp>
		12	#include <boost/icl/interval_set.hpp>
		13	#include <boost/icl/split_interval_map.hpp>
		14	#include <boost/pool/pool.hpp>
		15	#include <boost/pool/pool_alloc.hpp>
		16	#include <boost/pool/poolfwd.hpp>
		17
		18	#include "core/hle/service/nvdrv/core/heap_mapper.h"
		19	#include "video_core/host1x/host1x.h"
		20
		21	namespace boost {
		22	template <typename T>
		23	class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
		24	}
		25
		26	namespace Service::Nvidia::NvCore {
		27
		28	using IntervalCompare = std::less<DAddr>;
		29	using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
		30	using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
		31	using IntervalSet = boost::icl::interval_set<DAddr>;
		32	using IntervalType = typename IntervalSet::interval_type;
		33
		34	template <typename Type>
		35	struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
		36	// types
		37	typedef counter_add_functor<Type> type;
		38	typedef boost::icl::identity_based_inplace_combine<Type> base_type;
		39
		40	// public member functions
		41	void operator()(Type& current, const Type& added) const {
		42	current += added;
		43	if (current < base_type::identity_element()) {
		44	current = base_type::identity_element();
		45	}
		46	}
		47
		48	// public static functions
		49	static void version(Type&){};
		50	};
		51
		52	using OverlapCombine = counter_add_functor<int>;
		53	using OverlapSection = boost::icl::inter_section<int>;
		54	using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
		55
		56	struct HeapMapper::HeapMapperInternal {
		57	HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
		58	~HeapMapperInternal() = default;
		59
		60	template <typename Func>
		61	void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
		62	Func&& func) {
		63	const DAddr start_address = cpu_addr;
		64	const DAddr end_address = start_address + size;
		65	const IntervalType search_interval{start_address, end_address};
		66	auto it = current_range.lower_bound(search_interval);
		67	if (it == current_range.end()) {
		68	return;
		69	}
		70	auto end_it = current_range.upper_bound(search_interval);
		71	for (; it != end_it; it++) {
		72	auto& inter = it->first;
		73	DAddr inter_addr_end = inter.upper();
		74	DAddr inter_addr = inter.lower();
		75	if (inter_addr_end > end_address) {
		76	inter_addr_end = end_address;
		77	}
		78	if (inter_addr < start_address) {
		79	inter_addr = start_address;
		80	}
		81	func(inter_addr, inter_addr_end, it->second);
		82	}
		83	}
		84
		85	void RemoveEachInOverlapCounter(OverlapCounter& current_range,
		86	const IntervalType search_interval, int subtract_value) {
		87	bool any_removals = false;
		88	current_range.add(std::make_pair(search_interval, subtract_value));
		89	do {
		90	any_removals = false;
		91	auto it = current_range.lower_bound(search_interval);
		92	if (it == current_range.end()) {
		93	return;
		94	}
		95	auto end_it = current_range.upper_bound(search_interval);
		96	for (; it != end_it; it++) {
		97	if (it->second <= 0) {
		98	any_removals = true;
		99	current_range.erase(it);
		100	break;
		101	}
		102	}
		103	} while (any_removals);
		104	}
		105
		106	IntervalSet base_set;
		107	OverlapCounter mapping_overlaps;
		108	Tegra::MaxwellDeviceMemoryManager& device_memory;
		109	std::mutex guard;
		110	};
		111
		112	HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
		113	Tegra::Host1x::Host1x& host1x)
		114	: m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} {
		115	m_internal = std::make_unique<HeapMapperInternal>(host1x);
		116	}
		117
		118	HeapMapper::~HeapMapper() {
		119	m_internal->device_memory.Unmap(m_daddress, m_size);
		120	}
		121
		122	DAddr HeapMapper::Map(VAddr start, size_t size) {
		123	std::scoped_lock lk(m_internal->guard);
		124	m_internal->base_set.clear();
		125	const IntervalType interval{start, start + size};
		126	m_internal->base_set.insert(interval);
		127	m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){
		128	const IntervalType other{start_addr, end_addr};
		129	m_internal->base_set.subtract(other);
		130	});
		131	if (!m_internal->base_set.empty()) {
		132	auto it = m_internal->base_set.begin();
		133	auto end_it = m_internal->base_set.end();
		134	for (; it != end_it; it++) {
		135	const VAddr inter_addr_end = it->upper();
		136	const VAddr inter_addr = it->lower();
		137	const size_t offset = inter_addr - m_vaddress;
		138	const size_t sub_size = inter_addr_end - inter_addr;
		139	m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id);
		140	}
		141	}
		142	m_internal->mapping_overlaps += std::make_pair(interval, 1);
		143	m_internal->base_set.clear();
		144	return m_daddress + (start - m_vaddress);
		145	}
		146
		147	void HeapMapper::Unmap(VAddr start, size_t size) {
		148	std::scoped_lock lk(m_internal->guard);
		149	m_internal->base_set.clear();
		150	m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) {
		151	if (value <= 1) {
		152	const IntervalType other{start_addr, end_addr};
		153	m_internal->base_set.insert(other);
		154	}
		155	});
		156	if (!m_internal->base_set.empty()) {
		157	auto it = m_internal->base_set.begin();
		158	auto end_it = m_internal->base_set.end();
		159	for (; it != end_it; it++) {
		160	const VAddr inter_addr_end = it->upper();
		161	const VAddr inter_addr = it->lower();
		162	const size_t offset = inter_addr - m_vaddress;
		163	const size_t sub_size = inter_addr_end - inter_addr;
		164	m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
		165	}
		166	}
		167	const IntervalType to_remove{start, start + size};
		168	m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
		169	m_internal->base_set.clear();
		170	}
		171
		172	} // namespace Service::Nvidia::NvCore \ No newline at end of file


diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h new file mode 100644 index 000000000..8b23638b8 --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.h
@@ -0,0 +1,48 @@
		1	// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
		2	// SPDX-License-Identifier: GPL-3.0-or-later
		3
		4	#pragma once
		5
		6	#include <memory>
		7
		8	#include "common/common_types.h"
		9
		10	namespace Tegra::Host1x {
		11	class Host1x;
		12	} // namespace Tegra::Host1x
		13
		14	namespace Service::Nvidia::NvCore {
		15
		16	class HeapMapper {
		17	public:
		18	HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
		19	Tegra::Host1x::Host1x& host1x);
		20	~HeapMapper();
		21
		22	bool IsInBounds(VAddr start, size_t size) const {
		23	VAddr end = start + size;
		24	return start >= m_vaddress && end <= (m_vaddress + m_size);
		25	}
		26
		27	DAddr Map(VAddr start, size_t size);
		28
		29	void Unmap(VAddr start, size_t size);
		30
		31	DAddr GetRegionStart() const {
		32	return m_daddress;
		33	}
		34
		35	size_t GetRegionSize() const {
		36	return m_size;
		37	}
		38
		39	private:
		40	struct HeapMapperInternal;
		41	VAddr m_vaddress;
		42	DAddr m_daddress;
		43	size_t m_size;
		44	size_t m_smmu_id;
		45	std::unique_ptr<HeapMapperInternal> m_internal;
		46	};
		47
		48	} // namespace Service::Nvidia::NvCore \ No newline at end of file


diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 0b2ddd980..023c070d9 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -8,10 +8,12 @@
8	#include "common/assert.h"	8	#include "common/assert.h"
9	#include "common/logging/log.h"	9	#include "common/logging/log.h"
10	#include "core/hle/service/nvdrv/core/container.h"	10	#include "core/hle/service/nvdrv/core/container.h"
		11	#include "core/hle/service/nvdrv/core/heap_mapper.h"
11	#include "core/hle/service/nvdrv/core/nvmap.h"	12	#include "core/hle/service/nvdrv/core/nvmap.h"
12	#include "core/memory.h"	13	#include "core/memory.h"
13	#include "video_core/host1x/host1x.h"	14	#include "video_core/host1x/host1x.h"
14		15
		16
15	using Core::Memory::YUZU_PAGESIZE;	17	using Core::Memory::YUZU_PAGESIZE;
16		18
17	namespace Service::Nvidia::NvCore {	19	namespace Service::Nvidia::NvCore {
@@ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) {
90	}	92	}
91		93
92	// Free and unmap the handle from the SMMU	94	// Free and unmap the handle from the SMMU
93	auto& smmu = host1x.MemoryManager();	95	const size_t map_size = handle_description.aligned_size;
94	smmu.Unmap(handle_description.d_address, handle_description.aligned_size);	96	if (!handle_description.in_heap) {
95	smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size));	97	auto& smmu = host1x.MemoryManager();
		98	smmu.Unmap(handle_description.d_address, map_size);
		99	smmu.Free(handle_description.d_address, static_cast<size_t>(map_size));
		100	handle_description.d_address = 0;
		101	return;
		102	}
		103	const VAddr vaddress = handle_description.address;
		104	auto* session = core.GetSession(handle_description.session_id);
		105	session->mapper->Unmap(vaddress, map_size);
96	handle_description.d_address = 0;	106	handle_description.d_address = 0;
		107	handle_description.in_heap = false;
97	}	108	}
98		109
99	bool NvMap::TryRemoveHandle(const Handle& handle_description) {	110	bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -188,24 +199,31 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
188	DAddr address{};	199	DAddr address{};
189	auto& smmu = host1x.MemoryManager();	200	auto& smmu = host1x.MemoryManager();
190	auto* session = core.GetSession(session_id);	201	auto* session = core.GetSession(session_id);
191	while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {	202	const VAddr vaddress = handle_description->address;
192	// Free handles until the allocation succeeds	203	const size_t map_size = handle_description->aligned_size;
193	std::scoped_lock queueLock(unmap_queue_lock);	204	handle_description->session_id = session_id;
194	if (auto freeHandleDesc{unmap_queue.front()}) {	205	if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
195	// Handles in the unmap queue are guaranteed not to be pinned so don't bother	206	handle_description->d_address = session->mapper->Map(vaddress, map_size);
196	// checking if they are before unmapping	207	handle_description->in_heap = true;
197	std::scoped_lock freeLock(freeHandleDesc->mutex);	208	} else {
198	if (handle_description->d_address)	209	while ((address = smmu.Allocate(map_size)) == 0) {
199	UnmapHandle(*freeHandleDesc);	210	// Free handles until the allocation succeeds
200	} else {	211	std::scoped_lock queueLock(unmap_queue_lock);
201	LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");	212	if (auto freeHandleDesc{unmap_queue.front()}) {
		213	// Handles in the unmap queue are guaranteed not to be pinned so don't bother
		214	// checking if they are before unmapping
		215	std::scoped_lock freeLock(freeHandleDesc->mutex);
		216	if (handle_description->d_address)
		217	UnmapHandle(*freeHandleDesc);
		218	} else {
		219	LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
		220	}
202	}	221	}
203	}
204		222
205	handle_description->d_address = address;	223	handle_description->d_address = address;
206		224	smmu.Map(address, vaddress, map_size, session->smmu_id);
207	smmu.Map(address, handle_description->address, handle_description->aligned_size,	225	handle_description->in_heap = false;
208	session->smmu_id);	226	}
209	}	227	}
210		228
211	if (low_area_pin) {	229	if (low_area_pin) {


diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 7dd6d26c3..4af61289e 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -70,6 +70,8 @@ public:
70		70
71	u8 kind{}; //!< Used for memory compression	71	u8 kind{}; //!< Used for memory compression
72	bool allocated{}; //!< If the handle has been allocated with `Alloc`	72	bool allocated{}; //!< If the handle has been allocated with `Alloc`
		73	bool in_heap{};
		74	size_t session_id{};
73		75
74	DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,	76	DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
75	//!< this can also be in the nvdrv tmem	77	//!< this can also be in the nvdrv tmem


diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6ad3b94f8..609704b33 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp
@@ -34,8 +34,6 @@
34	#include "video_core/renderer_base.h"	34	#include "video_core/renderer_base.h"
35	#include "video_core/shader_notify.h"	35	#include "video_core/shader_notify.h"
36		36
37	#pragma optimize("", off)
38
39	namespace Tegra {	37	namespace Tegra {
40		38
41	struct GPU::Impl {	39	struct GPU::Impl {