From a7c1306e2d19f1270f6fa0603ba20043c90e9c05 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Tue, 30 Jan 2024 11:52:41 +0100
Subject: Texture Cache: make sparse texture table per channel
---
src/video_core/texture_cache/texture_cache.h | 87 +++++++++++++----------
src/video_core/texture_cache/texture_cache_base.h | 4 +-
2 files changed, 51 insertions(+), 40 deletions(-)
(limited to 'src')
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7398ed2ec..a7400adfa 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1431,7 +1431,8 @@ ImageId TextureCache
::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
}
}
};
- ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
+ ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes,
+ region_check_gpu);
bool can_rescale = info.rescaleable;
bool any_rescaled = false;
@@ -1842,7 +1843,7 @@ void TextureCache
::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
if (!storage_id) {
return;
}
- auto& gpu_page_table = gpu_page_table_storage[*storage_id];
+ auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2];
ForEachGPUPage(gpu_addr, size,
[this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) {
const auto it = gpu_page_table.find(page);
@@ -1882,41 +1883,48 @@ void TextureCache
::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s
template
template
-void TextureCache::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
+void TextureCache
::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size,
+ Func&& func) {
using FuncReturn = typename std::invoke_result::type;
static constexpr bool BOOL_BREAK = std::is_same_v;
boost::container::small_vector images;
- ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
- const auto it = sparse_page_table.find(page);
- if (it == sparse_page_table.end()) {
- if constexpr (BOOL_BREAK) {
- return false;
- } else {
- return;
- }
- }
- for (const ImageId image_id : it->second) {
- Image& image = slot_images[image_id];
- if (True(image.flags & ImageFlagBits::Picked)) {
- continue;
- }
- if (!image.OverlapsGPU(gpu_addr, size)) {
- continue;
- }
- image.flags |= ImageFlagBits::Picked;
- images.push_back(image_id);
- if constexpr (BOOL_BREAK) {
- if (func(image_id, image)) {
- return true;
- }
- } else {
- func(image_id, image);
- }
- }
- if constexpr (BOOL_BREAK) {
- return false;
- }
- });
+ auto storage_id = getStorageID(as_id);
+ if (!storage_id) {
+ return;
+ }
+ auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1];
+ ForEachGPUPage(gpu_addr, size,
+ [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) {
+ const auto it = sparse_page_table.find(page);
+ if (it == sparse_page_table.end()) {
+ if constexpr (BOOL_BREAK) {
+ return false;
+ } else {
+ return;
+ }
+ }
+ for (const ImageId image_id : it->second) {
+ Image& image = slot_images[image_id];
+ if (True(image.flags & ImageFlagBits::Picked)) {
+ continue;
+ }
+ if (!image.OverlapsGPU(gpu_addr, size)) {
+ continue;
+ }
+ image.flags |= ImageFlagBits::Picked;
+ images.push_back(image_id);
+ if constexpr (BOOL_BREAK) {
+ if (func(image_id, image)) {
+ return true;
+ }
+ } else {
+ func(image_id, image);
+ }
+ }
+ if constexpr (BOOL_BREAK) {
+ return false;
+ }
+ });
for (const ImageId image_id : images) {
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
}
@@ -1988,8 +1996,9 @@ void TextureCache::RegisterImage(ImageId image_id) {
sparse_maps.push_back(map_id);
});
sparse_views.emplace(image_id, std::move(sparse_maps));
- ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
+ (*channel_state->sparse_page_table)[page].push_back(image_id);
+ });
}
template
@@ -2042,7 +2051,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
return;
}
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
- clear_page_table(page, sparse_page_table);
+ clear_page_table(page, (*channel_state->sparse_page_table));
});
auto it = sparse_views.find(image_id);
ASSERT(it != sparse_views.end());
@@ -2496,13 +2505,15 @@ void TextureCache
::CreateChannel(struct Tegra::Control::ChannelState& channel
const auto it = channel_map.find(channel.bind_id);
auto* this_state = &channel_storage[it->second];
const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
- this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
+ this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2];
+ this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1];
}
/// Bind a channel for execution.
template
void TextureCache::OnGPUASRegister([[maybe_unused]] size_t map_id) {
gpu_page_table_storage.emplace_back();
+ gpu_page_table_storage.emplace_back();
}
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 8699d40d4..f9aebb293 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -86,6 +86,7 @@ public:
std::unordered_map samplers;
TextureCacheGPUMap* gpu_page_table;
+ TextureCacheGPUMap* sparse_page_table;
};
template
@@ -357,7 +358,7 @@ private:
void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
template
- void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
+ void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template
@@ -431,7 +432,6 @@ private:
std::unordered_map framebuffers;
std::unordered_map, Common::IdentityHash> page_table;
- std::unordered_map, Common::IdentityHash> sparse_page_table;
std::unordered_map> sparse_views;
DAddr virtual_invalid_space{};
--
cgit v1.2.3
From 8f848f43e979ac4049237d3b6a161696dd85372b Mon Sep 17 00:00:00 2001
From: Liam
Date: Tue, 16 Jan 2024 23:35:48 -0500
Subject: smmu: use new range mutex construction for protecting counters
---
src/common/CMakeLists.txt | 1 +
src/common/range_mutex.h | 93 ++++++++++++++++++++++++++++++++++++++
src/core/device_memory_manager.h | 3 +-
src/core/device_memory_manager.inc | 11 +----
4 files changed, 97 insertions(+), 11 deletions(-)
create mode 100644 src/common/range_mutex.h
(limited to 'src')
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e30fea268..85926fc8f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -106,6 +106,7 @@ add_library(common STATIC
precompiled_headers.h
quaternion.h
range_map.h
+ range_mutex.h
reader_writer_queue.h
ring_buffer.h
${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp
diff --git a/src/common/range_mutex.h b/src/common/range_mutex.h
new file mode 100644
index 000000000..d6c949811
--- /dev/null
+++ b/src/common/range_mutex.h
@@ -0,0 +1,93 @@
+// SPDX-FileCopyrightText: 2024 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+#include
+
+#include "common/intrusive_list.h"
+
+namespace Common {
+
+class ScopedRangeLock;
+
+class RangeMutex {
+public:
+ explicit RangeMutex() = default;
+ ~RangeMutex() = default;
+
+private:
+ friend class ScopedRangeLock;
+
+ void Lock(ScopedRangeLock& l);
+ void Unlock(ScopedRangeLock& l);
+ bool HasIntersectionLocked(ScopedRangeLock& l);
+
+private:
+ std::mutex m_mutex;
+ std::condition_variable m_cv;
+
+ using LockList = Common::IntrusiveListBaseTraits::ListType;
+ LockList m_list;
+};
+
+class ScopedRangeLock : public Common::IntrusiveListBaseNode {
+public:
+ explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size)
+ : m_mutex(mutex), m_address(address), m_size(size) {
+ if (m_size > 0) {
+ m_mutex.Lock(*this);
+ }
+ }
+ ~ScopedRangeLock() {
+ if (m_size > 0) {
+ m_mutex.Unlock(*this);
+ }
+ }
+
+ u64 GetAddress() const {
+ return m_address;
+ }
+
+ u64 GetSize() const {
+ return m_size;
+ }
+
+private:
+ RangeMutex& m_mutex;
+ const u64 m_address{};
+ const u64 m_size{};
+};
+
+inline void RangeMutex::Lock(ScopedRangeLock& l) {
+ std::unique_lock lk{m_mutex};
+ m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); });
+ m_list.push_back(l);
+}
+
+inline void RangeMutex::Unlock(ScopedRangeLock& l) {
+ {
+ std::scoped_lock lk{m_mutex};
+ m_list.erase(m_list.iterator_to(l));
+ }
+ m_cv.notify_all();
+}
+
+inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) {
+ const auto cur_begin = l.GetAddress();
+ const auto cur_last = l.GetAddress() + l.GetSize() - 1;
+
+ for (const auto& other : m_list) {
+ const auto other_begin = other.GetAddress();
+ const auto other_last = other.GetAddress() + other.GetSize() - 1;
+
+ if (cur_begin <= other_last && other_begin <= cur_last) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace Common
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index ffeed46cc..63823602c 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -10,6 +10,7 @@
#include
#include "common/common_types.h"
+#include "common/range_mutex.h"
#include "common/scratch_buffer.h"
#include "common/virtual_buffer.h"
@@ -204,7 +205,7 @@ private:
(1ULL << (device_virtual_bits - page_bits)) / subentries;
using CachedPages = std::array;
std::unique_ptr cached_pages;
- std::mutex counter_guard;
+ Common::RangeMutex counter_guard;
std::mutex mapping_guard;
};
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index eab8a2731..0a59000aa 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -508,12 +508,7 @@ void DeviceMemoryManager::UnregisterProcess(Asid asid) {
template
void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
- std::unique_lock lk(counter_guard, std::defer_lock);
- const auto Lock = [&] {
- if (!lk) {
- lk.lock();
- }
- };
+ Common::ScopedRangeLock lk(counter_guard, addr, size);
u64 uncache_begin = 0;
u64 cache_begin = 0;
u64 uncache_bytes = 0;
@@ -548,7 +543,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
}
uncache_bytes += Memory::YUZU_PAGESIZE;
} else if (uncache_bytes > 0) {
- Lock();
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
uncache_bytes, false);
uncache_bytes = 0;
@@ -559,7 +553,6 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
}
cache_bytes += Memory::YUZU_PAGESIZE;
} else if (cache_bytes > 0) {
- Lock();
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
cache_bytes = 0;
@@ -567,12 +560,10 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
vpage++;
}
if (uncache_bytes > 0) {
- Lock();
MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
false);
}
if (cache_bytes > 0) {
- Lock();
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
true);
}
--
cgit v1.2.3
From aaab11e36f55f6b06f08b10aeca4b14f112914ee Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Tue, 30 Jan 2024 13:46:01 +0100
Subject: NVDRV: Join the heaper optimization blocks
---
src/core/hle/service/nvdrv/core/container.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'src')
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index dc1b4d5be..e89cca6f2 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) {
// Check if this memory block is heap.
if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
- if (svc_mem_info.size > region_size) {
+ if (region_start + region_size == svc_mem_info.base_address) {
+ region_size += svc_mem_info.size;
+ } else if (svc_mem_info.size > region_size) {
region_size = svc_mem_info.size;
region_start = svc_mem_info.base_address;
}
--
cgit v1.2.3
From 738e9a79a06c9d3955488022a1d3afd67bc5a11d Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Tue, 30 Jan 2024 20:14:06 +0100
Subject: DeviceMemory: Make counter types configurable
---
src/core/device_memory_manager.h | 15 ++++++++++-----
src/core/device_memory_manager.inc | 18 +++++++++---------
2 files changed, 19 insertions(+), 14 deletions(-)
(limited to 'src')
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index 63823602c..0568a821b 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -5,6 +5,7 @@
#include
#include
+#include
#include
#include
#include
@@ -181,24 +182,28 @@ private:
}
Common::VirtualBuffer cpu_backing_address;
- static constexpr size_t subentries = 8 / sizeof(u8);
+ using CounterType = u8;
+ using CounterAtomicType = std::atomic_uint8_t;
+ static constexpr size_t subentries = 8 / sizeof(CounterType);
static constexpr size_t subentries_mask = subentries - 1;
+ static constexpr size_t subentries_shift =
+ std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType));
class CounterEntry final {
public:
CounterEntry() = default;
- std::atomic_uint8_t& Count(std::size_t page) {
+ CounterAtomicType& Count(std::size_t page) {
return values[page & subentries_mask];
}
- const std::atomic_uint8_t& Count(std::size_t page) const {
+ const CounterAtomicType& Count(std::size_t page) const {
return values[page & subentries_mask];
}
private:
- std::array values{};
+ std::array values{};
};
- static_assert(sizeof(CounterEntry) == subentries * sizeof(u8),
+ static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType),
"CounterEntry should be 8 bytes!");
static constexpr size_t num_counter_entries =
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 0a59000aa..7afe54949 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -213,8 +213,8 @@ void DeviceMemoryManager::Free(DAddr start, size_t size) {
}
template
-void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size_t size,
- Asid asid, bool track) {
+void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid,
+ bool track) {
Core::Memory::Memory* process_memory = registered_processes[asid.id];
size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
@@ -522,10 +522,10 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
auto* memory_device_inter = registered_processes[asid.id];
for (; page != page_end; ++page) {
- std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page);
+ CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page);
if (delta > 0) {
- ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(),
+ ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(),
"Count may overflow!");
} else if (delta < 0) {
ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
@@ -534,7 +534,7 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
}
// Adds or subtracts 1, as count is a unsigned 8-bit value
- count.fetch_add(static_cast(delta), std::memory_order_release);
+ count.fetch_add(static_cast(delta), std::memory_order_release);
// Assume delta is either -1 or 1
if (count.load(std::memory_order::relaxed) == 0) {
@@ -553,15 +553,15 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
}
cache_bytes += Memory::YUZU_PAGESIZE;
} else if (cache_bytes > 0) {
- MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
- true);
+ MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
+ cache_bytes, true);
cache_bytes = 0;
}
vpage++;
}
if (uncache_bytes > 0) {
- MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
- false);
+ MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
+ uncache_bytes, false);
}
if (cache_bytes > 0) {
MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
--
cgit v1.2.3
From d57165df450e8a2fa811706758fb8ab352f623ae Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Wed, 31 Jan 2024 14:31:13 +0100
Subject: Device Memory Manager: ensure raster protection only within mapped
device addresses.
---
src/core/device_memory_manager.inc | 40 ++++++++++++++++++++++----------------
1 file changed, 23 insertions(+), 17 deletions(-)
(limited to 'src')
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 7afe54949..b026f4220 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -519,18 +519,32 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
size_t page = addr >> Memory::YUZU_PAGEBITS;
auto [asid, base_vaddress] = ExtractCPUBacking(page);
- size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
auto* memory_device_inter = registered_processes[asid.id];
+ const auto release_pending = [&] {
+ if (uncache_bytes > 0) {
+ MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
+ uncache_bytes, false);
+ uncache_bytes = 0;
+ }
+ if (cache_bytes > 0) {
+ MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS,
+ cache_bytes, true);
+ cache_bytes = 0;
+ }
+ };
for (; page != page_end; ++page) {
CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page);
+ auto [asid_2, vpage] = ExtractCPUBacking(page);
+ vpage >>= Memory::YUZU_PAGEBITS;
- if (delta > 0) {
- ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits::max(),
- "Count may overflow!");
- } else if (delta < 0) {
- ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
- } else {
- ASSERT_MSG(false, "Delta must be non-zero!");
+ if (vpage == 0) [[unlikely]] {
+ release_pending();
+ continue;
+ }
+
+ if (asid.id != asid_2.id) [[unlikely]] {
+ release_pending();
+ memory_device_inter = registered_processes[asid_2.id];
}
// Adds or subtracts 1, as count is a unsigned 8-bit value
@@ -557,16 +571,8 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size
cache_bytes, true);
cache_bytes = 0;
}
- vpage++;
- }
- if (uncache_bytes > 0) {
- MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
- uncache_bytes, false);
- }
- if (cache_bytes > 0) {
- MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
- true);
}
+ release_pending();
}
} // namespace Core
--
cgit v1.2.3