diff options
| author | 2024-01-31 11:22:29 -0500 | |
|---|---|---|
| committer | 2024-01-31 11:22:29 -0500 | |
| commit | 22492b68b73b4e8c865c4907cc4609db8cc07afd (patch) | |
| tree | 75d356cf8a9d8e4ae93dda3d35647c80bd51c11c /src | |
| parent | Merge pull request #12864 from Kelebek1/small_time_fixes (diff) | |
| parent | Device Memory Manager: ensure raster protection only within mapped device add... (diff) | |
| download | yuzu-22492b68b73b4e8c865c4907cc4609db8cc07afd.tar.gz yuzu-22492b68b73b4e8c865c4907cc4609db8cc07afd.tar.xz yuzu-22492b68b73b4e8c865c4907cc4609db8cc07afd.zip | |
Merge pull request #12869 from FernandoS27/smmu-fixes
SMMU: A set of different fixes.
Diffstat (limited to '')
| -rw-r--r-- | src/common/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/common/range_mutex.h | 93 | ||||
| -rw-r--r-- | src/core/device_memory_manager.h | 18 | ||||
| -rw-r--r-- | src/core/device_memory_manager.inc | 63 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/container.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 87 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 4 |
7 files changed, 190 insertions, 80 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e30fea268..85926fc8f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -106,6 +106,7 @@ add_library(common STATIC | |||
| 106 | precompiled_headers.h | 106 | precompiled_headers.h |
| 107 | quaternion.h | 107 | quaternion.h |
| 108 | range_map.h | 108 | range_map.h |
| 109 | range_mutex.h | ||
| 109 | reader_writer_queue.h | 110 | reader_writer_queue.h |
| 110 | ring_buffer.h | 111 | ring_buffer.h |
| 111 | ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp | 112 | ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp |
diff --git a/src/common/range_mutex.h b/src/common/range_mutex.h new file mode 100644 index 000000000..d6c949811 --- /dev/null +++ b/src/common/range_mutex.h | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2024 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <condition_variable> | ||
| 7 | #include <mutex> | ||
| 8 | |||
| 9 | #include "common/intrusive_list.h" | ||
| 10 | |||
| 11 | namespace Common { | ||
| 12 | |||
| 13 | class ScopedRangeLock; | ||
| 14 | |||
| 15 | class RangeMutex { | ||
| 16 | public: | ||
| 17 | explicit RangeMutex() = default; | ||
| 18 | ~RangeMutex() = default; | ||
| 19 | |||
| 20 | private: | ||
| 21 | friend class ScopedRangeLock; | ||
| 22 | |||
| 23 | void Lock(ScopedRangeLock& l); | ||
| 24 | void Unlock(ScopedRangeLock& l); | ||
| 25 | bool HasIntersectionLocked(ScopedRangeLock& l); | ||
| 26 | |||
| 27 | private: | ||
| 28 | std::mutex m_mutex; | ||
| 29 | std::condition_variable m_cv; | ||
| 30 | |||
| 31 | using LockList = Common::IntrusiveListBaseTraits<ScopedRangeLock>::ListType; | ||
| 32 | LockList m_list; | ||
| 33 | }; | ||
| 34 | |||
| 35 | class ScopedRangeLock : public Common::IntrusiveListBaseNode<ScopedRangeLock> { | ||
| 36 | public: | ||
| 37 | explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size) | ||
| 38 | : m_mutex(mutex), m_address(address), m_size(size) { | ||
| 39 | if (m_size > 0) { | ||
| 40 | m_mutex.Lock(*this); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | ~ScopedRangeLock() { | ||
| 44 | if (m_size > 0) { | ||
| 45 | m_mutex.Unlock(*this); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | u64 GetAddress() const { | ||
| 50 | return m_address; | ||
| 51 | } | ||
| 52 | |||
| 53 | u64 GetSize() const { | ||
| 54 | return m_size; | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | RangeMutex& m_mutex; | ||
| 59 | const u64 m_address{}; | ||
| 60 | const u64 m_size{}; | ||
| 61 | }; | ||
| 62 | |||
| 63 | inline void RangeMutex::Lock(ScopedRangeLock& l) { | ||
| 64 | std::unique_lock lk{m_mutex}; | ||
| 65 | m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); }); | ||
| 66 | m_list.push_back(l); | ||
| 67 | } | ||
| 68 | |||
| 69 | inline void RangeMutex::Unlock(ScopedRangeLock& l) { | ||
| 70 | { | ||
| 71 | std::scoped_lock lk{m_mutex}; | ||
| 72 | m_list.erase(m_list.iterator_to(l)); | ||
| 73 | } | ||
| 74 | m_cv.notify_all(); | ||
| 75 | } | ||
| 76 | |||
| 77 | inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) { | ||
| 78 | const auto cur_begin = l.GetAddress(); | ||
| 79 | const auto cur_last = l.GetAddress() + l.GetSize() - 1; | ||
| 80 | |||
| 81 | for (const auto& other : m_list) { | ||
| 82 | const auto other_begin = other.GetAddress(); | ||
| 83 | const auto other_last = other.GetAddress() + other.GetSize() - 1; | ||
| 84 | |||
| 85 | if (cur_begin <= other_last && other_begin <= cur_last) { | ||
| 86 | return true; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace Common | ||
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index ffeed46cc..0568a821b 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h | |||
| @@ -5,11 +5,13 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <bit> | ||
| 8 | #include <deque> | 9 | #include <deque> |
| 9 | #include <memory> | 10 | #include <memory> |
| 10 | #include <mutex> | 11 | #include <mutex> |
| 11 | 12 | ||
| 12 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/range_mutex.h" | ||
| 13 | #include "common/scratch_buffer.h" | 15 | #include "common/scratch_buffer.h" |
| 14 | #include "common/virtual_buffer.h" | 16 | #include "common/virtual_buffer.h" |
| 15 | 17 | ||
| @@ -180,31 +182,35 @@ private: | |||
| 180 | } | 182 | } |
| 181 | 183 | ||
| 182 | Common::VirtualBuffer<VAddr> cpu_backing_address; | 184 | Common::VirtualBuffer<VAddr> cpu_backing_address; |
| 183 | static constexpr size_t subentries = 8 / sizeof(u8); | 185 | using CounterType = u8; |
| 186 | using CounterAtomicType = std::atomic_uint8_t; | ||
| 187 | static constexpr size_t subentries = 8 / sizeof(CounterType); | ||
| 184 | static constexpr size_t subentries_mask = subentries - 1; | 188 | static constexpr size_t subentries_mask = subentries - 1; |
| 189 | static constexpr size_t subentries_shift = | ||
| 190 | std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType)); | ||
| 185 | class CounterEntry final { | 191 | class CounterEntry final { |
| 186 | public: | 192 | public: |
| 187 | CounterEntry() = default; | 193 | CounterEntry() = default; |
| 188 | 194 | ||
| 189 | std::atomic_uint8_t& Count(std::size_t page) { | 195 | CounterAtomicType& Count(std::size_t page) { |
| 190 | return values[page & subentries_mask]; | 196 | return values[page & subentries_mask]; |
| 191 | } | 197 | } |
| 192 | 198 | ||
| 193 | const std::atomic_uint8_t& Count(std::size_t page) const { | 199 | const CounterAtomicType& Count(std::size_t page) const { |
| 194 | return values[page & subentries_mask]; | 200 | return values[page & subentries_mask]; |
| 195 | } | 201 | } |
| 196 | 202 | ||
| 197 | private: | 203 | private: |
| 198 | std::array<std::atomic_uint8_t, subentries> values{}; | 204 | std::array<CounterAtomicType, subentries> values{}; |
| 199 | }; | 205 | }; |
| 200 | static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), | 206 | static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType), |
| 201 | "CounterEntry should be 8 bytes!"); | 207 | "CounterEntry should be 8 bytes!"); |
| 202 | 208 | ||
| 203 | static constexpr size_t num_counter_entries = | 209 | static constexpr size_t num_counter_entries = |
| 204 | (1ULL << (device_virtual_bits - page_bits)) / subentries; | 210 | (1ULL << (device_virtual_bits - page_bits)) / subentries; |
| 205 | using CachedPages = std::array<CounterEntry, num_counter_entries>; | 211 | using CachedPages = std::array<CounterEntry, num_counter_entries>; |
| 206 | std::unique_ptr<CachedPages> cached_pages; | 212 | std::unique_ptr<CachedPages> cached_pages; |
| 207 | std::mutex counter_guard; | 213 | Common::RangeMutex counter_guard; |
| 208 | std::mutex mapping_guard; | 214 | std::mutex mapping_guard; |
| 209 | }; | 215 | }; |
| 210 | 216 | ||
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index eab8a2731..b026f4220 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc | |||
| @@ -213,8 +213,8 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { | |||
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | template <typename Traits> | 215 | template <typename Traits> |
| 216 | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | 216 | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, |
| 217 | Asid asid, bool track) { | 217 | bool track) { |
| 218 | Core::Memory::Memory* process_memory = registered_processes[asid.id]; | 218 | Core::Memory::Memory* process_memory = registered_processes[asid.id]; |
| 219 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | 219 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |
| 220 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | 220 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |
| @@ -508,12 +508,7 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) { | |||
| 508 | 508 | ||
| 509 | template <typename Traits> | 509 | template <typename Traits> |
| 510 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | 510 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { |
| 511 | std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock); | 511 | Common::ScopedRangeLock lk(counter_guard, addr, size); |
| 512 | const auto Lock = [&] { | ||
| 513 | if (!lk) { | ||
| 514 | lk.lock(); | ||
| 515 | } | ||
| 516 | }; | ||
| 517 | u64 uncache_begin = 0; | 512 | u64 uncache_begin = 0; |
| 518 | u64 cache_begin = 0; | 513 | u64 cache_begin = 0; |
| 519 | u64 uncache_bytes = 0; | 514 | u64 uncache_bytes = 0; |
| @@ -524,22 +519,36 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 524 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | 519 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); |
| 525 | size_t page = addr >> Memory::YUZU_PAGEBITS; | 520 | size_t page = addr >> Memory::YUZU_PAGEBITS; |
| 526 | auto [asid, base_vaddress] = ExtractCPUBacking(page); | 521 | auto [asid, base_vaddress] = ExtractCPUBacking(page); |
| 527 | size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; | ||
| 528 | auto* memory_device_inter = registered_processes[asid.id]; | 522 | auto* memory_device_inter = registered_processes[asid.id]; |
| 523 | const auto release_pending = [&] { | ||
| 524 | if (uncache_bytes > 0) { | ||
| 525 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, | ||
| 526 | uncache_bytes, false); | ||
| 527 | uncache_bytes = 0; | ||
| 528 | } | ||
| 529 | if (cache_bytes > 0) { | ||
| 530 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, | ||
| 531 | cache_bytes, true); | ||
| 532 | cache_bytes = 0; | ||
| 533 | } | ||
| 534 | }; | ||
| 529 | for (; page != page_end; ++page) { | 535 | for (; page != page_end; ++page) { |
| 530 | std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); | 536 | CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page); |
| 537 | auto [asid_2, vpage] = ExtractCPUBacking(page); | ||
| 538 | vpage >>= Memory::YUZU_PAGEBITS; | ||
| 531 | 539 | ||
| 532 | if (delta > 0) { | 540 | if (vpage == 0) [[unlikely]] { |
| 533 | ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(), | 541 | release_pending(); |
| 534 | "Count may overflow!"); | 542 | continue; |
| 535 | } else if (delta < 0) { | 543 | } |
| 536 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | 544 | |
| 537 | } else { | 545 | if (asid.id != asid_2.id) [[unlikely]] { |
| 538 | ASSERT_MSG(false, "Delta must be non-zero!"); | 546 | release_pending(); |
| 547 | memory_device_inter = registered_processes[asid_2.id]; | ||
| 539 | } | 548 | } |
| 540 | 549 | ||
| 541 | // Adds or subtracts 1, as count is a unsigned 8-bit value | 550 | // Adds or subtracts 1, as count is a unsigned 8-bit value |
| 542 | count.fetch_add(static_cast<u8>(delta), std::memory_order_release); | 551 | count.fetch_add(static_cast<CounterType>(delta), std::memory_order_release); |
| 543 | 552 | ||
| 544 | // Assume delta is either -1 or 1 | 553 | // Assume delta is either -1 or 1 |
| 545 | if (count.load(std::memory_order::relaxed) == 0) { | 554 | if (count.load(std::memory_order::relaxed) == 0) { |
| @@ -548,7 +557,6 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 548 | } | 557 | } |
| 549 | uncache_bytes += Memory::YUZU_PAGESIZE; | 558 | uncache_bytes += Memory::YUZU_PAGESIZE; |
| 550 | } else if (uncache_bytes > 0) { | 559 | } else if (uncache_bytes > 0) { |
| 551 | Lock(); | ||
| 552 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, | 560 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, |
| 553 | uncache_bytes, false); | 561 | uncache_bytes, false); |
| 554 | uncache_bytes = 0; | 562 | uncache_bytes = 0; |
| @@ -559,23 +567,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 559 | } | 567 | } |
| 560 | cache_bytes += Memory::YUZU_PAGESIZE; | 568 | cache_bytes += Memory::YUZU_PAGESIZE; |
| 561 | } else if (cache_bytes > 0) { | 569 | } else if (cache_bytes > 0) { |
| 562 | Lock(); | 570 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, |
| 563 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | 571 | cache_bytes, true); |
| 564 | true); | ||
| 565 | cache_bytes = 0; | 572 | cache_bytes = 0; |
| 566 | } | 573 | } |
| 567 | vpage++; | ||
| 568 | } | ||
| 569 | if (uncache_bytes > 0) { | ||
| 570 | Lock(); | ||
| 571 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||
| 572 | false); | ||
| 573 | } | ||
| 574 | if (cache_bytes > 0) { | ||
| 575 | Lock(); | ||
| 576 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||
| 577 | true); | ||
| 578 | } | 574 | } |
| 575 | release_pending(); | ||
| 579 | } | 576 | } |
| 580 | 577 | ||
| 581 | } // namespace Core | 578 | } // namespace Core |
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index dc1b4d5be..e89cca6f2 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp | |||
| @@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) { | |||
| 83 | 83 | ||
| 84 | // Check if this memory block is heap. | 84 | // Check if this memory block is heap. |
| 85 | if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { | 85 | if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { |
| 86 | if (svc_mem_info.size > region_size) { | 86 | if (region_start + region_size == svc_mem_info.base_address) { |
| 87 | region_size += svc_mem_info.size; | ||
| 88 | } else if (svc_mem_info.size > region_size) { | ||
| 87 | region_size = svc_mem_info.size; | 89 | region_size = svc_mem_info.size; |
| 88 | region_start = svc_mem_info.base_address; | 90 | region_start = svc_mem_info.base_address; |
| 89 | } | 91 | } |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7398ed2ec..a7400adfa 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -1431,7 +1431,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA | |||
| 1431 | } | 1431 | } |
| 1432 | } | 1432 | } |
| 1433 | }; | 1433 | }; |
| 1434 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | 1434 | ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes, |
| 1435 | region_check_gpu); | ||
| 1435 | 1436 | ||
| 1436 | bool can_rescale = info.rescaleable; | 1437 | bool can_rescale = info.rescaleable; |
| 1437 | bool any_rescaled = false; | 1438 | bool any_rescaled = false; |
| @@ -1842,7 +1843,7 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s | |||
| 1842 | if (!storage_id) { | 1843 | if (!storage_id) { |
| 1843 | return; | 1844 | return; |
| 1844 | } | 1845 | } |
| 1845 | auto& gpu_page_table = gpu_page_table_storage[*storage_id]; | 1846 | auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2]; |
| 1846 | ForEachGPUPage(gpu_addr, size, | 1847 | ForEachGPUPage(gpu_addr, size, |
| 1847 | [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { | 1848 | [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { |
| 1848 | const auto it = gpu_page_table.find(page); | 1849 | const auto it = gpu_page_table.find(page); |
| @@ -1882,41 +1883,48 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s | |||
| 1882 | 1883 | ||
| 1883 | template <class P> | 1884 | template <class P> |
| 1884 | template <typename Func> | 1885 | template <typename Func> |
| 1885 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | 1886 | void TextureCache<P>::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, |
| 1887 | Func&& func) { | ||
| 1886 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | 1888 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |
| 1887 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1889 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1888 | boost::container::small_vector<ImageId, 8> images; | 1890 | boost::container::small_vector<ImageId, 8> images; |
| 1889 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | 1891 | auto storage_id = getStorageID(as_id); |
| 1890 | const auto it = sparse_page_table.find(page); | 1892 | if (!storage_id) { |
| 1891 | if (it == sparse_page_table.end()) { | 1893 | return; |
| 1892 | if constexpr (BOOL_BREAK) { | 1894 | } |
| 1893 | return false; | 1895 | auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1]; |
| 1894 | } else { | 1896 | ForEachGPUPage(gpu_addr, size, |
| 1895 | return; | 1897 | [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) { |
| 1896 | } | 1898 | const auto it = sparse_page_table.find(page); |
| 1897 | } | 1899 | if (it == sparse_page_table.end()) { |
| 1898 | for (const ImageId image_id : it->second) { | 1900 | if constexpr (BOOL_BREAK) { |
| 1899 | Image& image = slot_images[image_id]; | 1901 | return false; |
| 1900 | if (True(image.flags & ImageFlagBits::Picked)) { | 1902 | } else { |
| 1901 | continue; | 1903 | return; |
| 1902 | } | 1904 | } |
| 1903 | if (!image.OverlapsGPU(gpu_addr, size)) { | 1905 | } |
| 1904 | continue; | 1906 | for (const ImageId image_id : it->second) { |
| 1905 | } | 1907 | Image& image = slot_images[image_id]; |
| 1906 | image.flags |= ImageFlagBits::Picked; | 1908 | if (True(image.flags & ImageFlagBits::Picked)) { |
| 1907 | images.push_back(image_id); | 1909 | continue; |
| 1908 | if constexpr (BOOL_BREAK) { | 1910 | } |
| 1909 | if (func(image_id, image)) { | 1911 | if (!image.OverlapsGPU(gpu_addr, size)) { |
| 1910 | return true; | 1912 | continue; |
| 1911 | } | 1913 | } |
| 1912 | } else { | 1914 | image.flags |= ImageFlagBits::Picked; |
| 1913 | func(image_id, image); | 1915 | images.push_back(image_id); |
| 1914 | } | 1916 | if constexpr (BOOL_BREAK) { |
| 1915 | } | 1917 | if (func(image_id, image)) { |
| 1916 | if constexpr (BOOL_BREAK) { | 1918 | return true; |
| 1917 | return false; | 1919 | } |
| 1918 | } | 1920 | } else { |
| 1919 | }); | 1921 | func(image_id, image); |
| 1922 | } | ||
| 1923 | } | ||
| 1924 | if constexpr (BOOL_BREAK) { | ||
| 1925 | return false; | ||
| 1926 | } | ||
| 1927 | }); | ||
| 1920 | for (const ImageId image_id : images) { | 1928 | for (const ImageId image_id : images) { |
| 1921 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | 1929 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; |
| 1922 | } | 1930 | } |
| @@ -1988,8 +1996,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1988 | sparse_maps.push_back(map_id); | 1996 | sparse_maps.push_back(map_id); |
| 1989 | }); | 1997 | }); |
| 1990 | sparse_views.emplace(image_id, std::move(sparse_maps)); | 1998 | sparse_views.emplace(image_id, std::move(sparse_maps)); |
| 1991 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1999 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { |
| 1992 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | 2000 | (*channel_state->sparse_page_table)[page].push_back(image_id); |
| 2001 | }); | ||
| 1993 | } | 2002 | } |
| 1994 | 2003 | ||
| 1995 | template <class P> | 2004 | template <class P> |
| @@ -2042,7 +2051,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 2042 | return; | 2051 | return; |
| 2043 | } | 2052 | } |
| 2044 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | 2053 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { |
| 2045 | clear_page_table(page, sparse_page_table); | 2054 | clear_page_table(page, (*channel_state->sparse_page_table)); |
| 2046 | }); | 2055 | }); |
| 2047 | auto it = sparse_views.find(image_id); | 2056 | auto it = sparse_views.find(image_id); |
| 2048 | ASSERT(it != sparse_views.end()); | 2057 | ASSERT(it != sparse_views.end()); |
| @@ -2496,13 +2505,15 @@ void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel | |||
| 2496 | const auto it = channel_map.find(channel.bind_id); | 2505 | const auto it = channel_map.find(channel.bind_id); |
| 2497 | auto* this_state = &channel_storage[it->second]; | 2506 | auto* this_state = &channel_storage[it->second]; |
| 2498 | const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; | 2507 | const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; |
| 2499 | this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; | 2508 | this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2]; |
| 2509 | this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1]; | ||
| 2500 | } | 2510 | } |
| 2501 | 2511 | ||
| 2502 | /// Bind a channel for execution. | 2512 | /// Bind a channel for execution. |
| 2503 | template <class P> | 2513 | template <class P> |
| 2504 | void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) { | 2514 | void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) { |
| 2505 | gpu_page_table_storage.emplace_back(); | 2515 | gpu_page_table_storage.emplace_back(); |
| 2516 | gpu_page_table_storage.emplace_back(); | ||
| 2506 | } | 2517 | } |
| 2507 | 2518 | ||
| 2508 | } // namespace VideoCommon | 2519 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 8699d40d4..f9aebb293 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -86,6 +86,7 @@ public: | |||
| 86 | std::unordered_map<TSCEntry, SamplerId> samplers; | 86 | std::unordered_map<TSCEntry, SamplerId> samplers; |
| 87 | 87 | ||
| 88 | TextureCacheGPUMap* gpu_page_table; | 88 | TextureCacheGPUMap* gpu_page_table; |
| 89 | TextureCacheGPUMap* sparse_page_table; | ||
| 89 | }; | 90 | }; |
| 90 | 91 | ||
| 91 | template <class P> | 92 | template <class P> |
| @@ -357,7 +358,7 @@ private: | |||
| 357 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | 358 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); |
| 358 | 359 | ||
| 359 | template <typename Func> | 360 | template <typename Func> |
| 360 | void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | 361 | void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); |
| 361 | 362 | ||
| 362 | /// Iterates over all the images in a region calling func | 363 | /// Iterates over all the images in a region calling func |
| 363 | template <typename Func> | 364 | template <typename Func> |
| @@ -431,7 +432,6 @@ private: | |||
| 431 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 432 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; |
| 432 | 433 | ||
| 433 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | 434 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; |
| 434 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | ||
| 435 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | 435 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 436 | 436 | ||
| 437 | DAddr virtual_invalid_space{}; | 437 | DAddr virtual_invalid_space{}; |