diff options
| author | 2023-12-12 17:15:52 -0500 | |
|---|---|---|
| committer | 2023-12-14 21:54:36 -0500 | |
| commit | 030e6b3980aa5ce6069041c339d49d21d68ca73b (patch) | |
| tree | 428a3d3a1e322a4cfad696bdcd756dd7332acefa | |
| parent | Merge pull request #12354 from liamwhite/mackage-panager (diff) | |
| download | yuzu-030e6b3980aa5ce6069041c339d49d21d68ca73b.tar.gz yuzu-030e6b3980aa5ce6069041c339d49d21d68ca73b.tar.xz yuzu-030e6b3980aa5ce6069041c339d49d21d68ca73b.zip | |
video_core: use interval map for page count tracking
| -rw-r--r-- | src/tests/video_core/memory_tracker.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/word_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/rasterizer_accelerated.h | 27 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 10 |
7 files changed, 69 insertions, 79 deletions
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 618793668..2dbff21af 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -23,13 +23,13 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE; | |||
| 23 | 23 | ||
| 24 | class RasterizerInterface { | 24 | class RasterizerInterface { |
| 25 | public: | 25 | public: |
| 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { |
| 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; |
| 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> |
| 29 | Core::Memory::YUZU_PAGEBITS}; | 29 | Core::Memory::YUZU_PAGEBITS}; |
| 30 | for (u64 page = page_start; page < page_end; ++page) { | 30 | for (u64 page = page_start; page < page_end; ++page) { |
| 31 | int& value = page_table[page]; | 31 | int& value = page_table[page]; |
| 32 | value += delta; | 32 | value += (cache ? 1 : -1); |
| 33 | if (value < 0) { | 33 | if (value < 0) { |
| 34 | throw std::logic_error{"negative page"}; | 34 | throw std::logic_error{"negative page"}; |
| 35 | } | 35 | } |
| @@ -546,4 +546,4 @@ TEST_CASE("MemoryTracker: Cached write downloads") { | |||
| 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |
| 547 | memory_track->MarkRegionAsCpuModified(c, WORD); | 547 | memory_track->MarkRegionAsCpuModified(c, WORD); |
| 548 | REQUIRE(rasterizer.Count() == 0); | 548 | REQUIRE(rasterizer.Count() == 0); |
| 549 | } \ No newline at end of file | 549 | } |
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..95b752055 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -473,7 +473,7 @@ private: | |||
| 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { |
| 475 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | 475 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, |
| 476 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | 476 | size * BYTES_PER_PAGE, add_to_rasterizer); |
| 477 | }); | 477 | }); |
| 478 | } | 478 | } |
| 479 | 479 | ||
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index f200a650f..3abfd5ff3 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <atomic> | 4 | #include <atomic> |
| 5 | 5 | ||
| 6 | #include "common/alignment.h" | ||
| 6 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 7 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 8 | #include "common/div_ceil.h" | 9 | #include "common/div_ceil.h" |
| @@ -11,61 +12,63 @@ | |||
| 11 | 12 | ||
| 12 | namespace VideoCore { | 13 | namespace VideoCore { |
| 13 | 14 | ||
| 15 | static constexpr u16 IdentityValue = 1; | ||
| 16 | |||
| 14 | using namespace Core::Memory; | 17 | using namespace Core::Memory; |
| 15 | 18 | ||
| 16 | RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) | 19 | RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : map{}, cpu_memory{cpu_memory_} { |
| 17 | : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} | 20 | // We are tracking CPU memory, which cannot map more than 39 bits. |
| 21 | const VAddr start_address = 0; | ||
| 22 | const VAddr end_address = (1ULL << 39); | ||
| 23 | const IntervalType address_space_interval(start_address, end_address); | ||
| 24 | const auto value = std::make_pair(address_space_interval, IdentityValue); | ||
| 25 | |||
| 26 | map.add(value); | ||
| 27 | } | ||
| 18 | 28 | ||
| 19 | RasterizerAccelerated::~RasterizerAccelerated() = default; | 29 | RasterizerAccelerated::~RasterizerAccelerated() = default; |
| 20 | 30 | ||
| 21 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 31 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) { |
| 22 | u64 uncache_begin = 0; | 32 | // Align sizes. |
| 23 | u64 cache_begin = 0; | 33 | addr = Common::AlignDown(addr, YUZU_PAGESIZE); |
| 24 | u64 uncache_bytes = 0; | 34 | size = Common::AlignUp(size, YUZU_PAGESIZE); |
| 25 | u64 cache_bytes = 0; | ||
| 26 | |||
| 27 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 28 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | ||
| 29 | for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { | ||
| 30 | std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||
| 31 | |||
| 32 | if (delta > 0) { | ||
| 33 | ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); | ||
| 34 | } else if (delta < 0) { | ||
| 35 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||
| 36 | } else { | ||
| 37 | ASSERT_MSG(false, "Delta must be non-zero!"); | ||
| 38 | } | ||
| 39 | 35 | ||
| 40 | // Adds or subtracts 1, as count is a unsigned 8-bit value | 36 | // Declare the overall interval we are going to operate on. |
| 41 | count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | 37 | const VAddr start_address = addr; |
| 42 | 38 | const VAddr end_address = addr + size; | |
| 43 | // Assume delta is either -1 or 1 | 39 | const IntervalType modification_range(start_address, end_address); |
| 44 | if (count.load(std::memory_order::relaxed) == 0) { | 40 | |
| 45 | if (uncache_bytes == 0) { | 41 | // Find the boundaries of where to iterate. |
| 46 | uncache_begin = page; | 42 | const auto lower = map.lower_bound(modification_range); |
| 47 | } | 43 | const auto upper = map.upper_bound(modification_range); |
| 48 | uncache_bytes += YUZU_PAGESIZE; | 44 | |
| 49 | } else if (uncache_bytes > 0) { | 45 | // Iterate over the contained intervals. |
| 50 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, | 46 | for (auto it = lower; it != upper; it++) { |
| 51 | false); | 47 | // Intersect interval range with modification range. |
| 52 | uncache_bytes = 0; | 48 | const auto current_range = modification_range & it->first; |
| 53 | } | 49 | |
| 54 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | 50 | // Calculate the address and size to operate over. |
| 55 | if (cache_bytes == 0) { | 51 | const auto current_addr = current_range.lower(); |
| 56 | cache_begin = page; | 52 | const auto current_size = current_range.upper() - current_addr; |
| 57 | } | 53 | |
| 58 | cache_bytes += YUZU_PAGESIZE; | 54 | // Get the current value of the range. |
| 59 | } else if (cache_bytes > 0) { | 55 | const auto value = it->second; |
| 60 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | 56 | |
| 61 | cache_bytes = 0; | 57 | if (cache && value == IdentityValue) { |
| 58 | // If we are going to cache, and the value is not yet referenced, then cache this range. | ||
| 59 | cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, true); | ||
| 60 | } else if (!cache && value == IdentityValue + 1) { | ||
| 61 | // If we are going to uncache, and this is the last reference, then uncache this range. | ||
| 62 | cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, false); | ||
| 62 | } | 63 | } |
| 63 | } | 64 | } |
| 64 | if (uncache_bytes > 0) { | 65 | |
| 65 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); | 66 | // Update the set. |
| 66 | } | 67 | const auto value = std::make_pair(modification_range, IdentityValue); |
| 67 | if (cache_bytes > 0) { | 68 | if (cache) { |
| 68 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | 69 | map.add(value); |
| 70 | } else { | ||
| 71 | map.subtract(value); | ||
| 69 | } | 72 | } |
| 70 | } | 73 | } |
| 71 | 74 | ||
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index e6c0ea87a..cd1c706de 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h | |||
| @@ -3,8 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <boost/icl/interval_map.hpp> |
| 7 | #include <atomic> | ||
| 8 | 7 | ||
| 9 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| @@ -21,28 +20,16 @@ public: | |||
| 21 | explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); | 20 | explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); |
| 22 | ~RasterizerAccelerated() override; | 21 | ~RasterizerAccelerated() override; |
| 23 | 22 | ||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | 23 | void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) override; |
| 25 | 24 | ||
| 26 | private: | 25 | private: |
| 27 | class CacheEntry final { | 26 | using PageIndex = VAddr; |
| 28 | public: | 27 | using PageReferenceCount = u16; |
| 29 | CacheEntry() = default; | ||
| 30 | 28 | ||
| 31 | std::atomic_uint16_t& Count(std::size_t page) { | 29 | using IntervalMap = boost::icl::interval_map<PageIndex, PageReferenceCount>; |
| 32 | return values[page & 3]; | 30 | using IntervalType = IntervalMap::interval_type; |
| 33 | } | ||
| 34 | 31 | ||
| 35 | const std::atomic_uint16_t& Count(std::size_t page) const { | 32 | IntervalMap map; |
| 36 | return values[page & 3]; | ||
| 37 | } | ||
| 38 | |||
| 39 | private: | ||
| 40 | std::array<std::atomic_uint16_t, 4> values{}; | ||
| 41 | }; | ||
| 42 | static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); | ||
| 43 | |||
| 44 | using CachedPages = std::array<CacheEntry, 0x2000000>; | ||
| 45 | std::unique_ptr<CachedPages> cached_pages; | ||
| 46 | Core::Memory::Memory& cpu_memory; | 33 | Core::Memory::Memory& cpu_memory; |
| 47 | }; | 34 | }; |
| 48 | 35 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index af1469147..fd42d26b5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -162,7 +162,7 @@ public: | |||
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | /// Increase/decrease the number of object in pages touching the specified region | 164 | /// Increase/decrease the number of object in pages touching the specified region |
| 165 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | 165 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {} |
| 166 | 166 | ||
| 167 | /// Initialize disk cached resources for the game being emulated | 167 | /// Initialize disk cached resources for the game being emulated |
| 168 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 168 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index e81cd031b..a109f9cbe 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -132,7 +132,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t | |||
| 132 | 132 | ||
| 133 | storage.push_back(std::move(data)); | 133 | storage.push_back(std::move(data)); |
| 134 | 134 | ||
| 135 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | 135 | rasterizer.UpdatePagesCachedCount(addr, size, true); |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | 138 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { |
| @@ -209,7 +209,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { | |||
| 209 | 209 | ||
| 210 | const VAddr addr = entry->addr_start; | 210 | const VAddr addr = entry->addr_start; |
| 211 | const size_t size = entry->addr_end - addr; | 211 | const size_t size = entry->addr_end - addr; |
| 212 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | 212 | rasterizer.UpdatePagesCachedCount(addr, size, false); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | 215 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..d7941f6a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -2080,7 +2080,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2080 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 2080 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 2081 | image.flags |= ImageFlagBits::Tracked; | 2081 | image.flags |= ImageFlagBits::Tracked; |
| 2082 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2082 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2083 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 2083 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, true); |
| 2084 | return; | 2084 | return; |
| 2085 | } | 2085 | } |
| 2086 | if (True(image.flags & ImageFlagBits::Registered)) { | 2086 | if (True(image.flags & ImageFlagBits::Registered)) { |
| @@ -2091,13 +2091,13 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2091 | const auto& map = slot_map_views[map_view_id]; | 2091 | const auto& map = slot_map_views[map_view_id]; |
| 2092 | const VAddr cpu_addr = map.cpu_addr; | 2092 | const VAddr cpu_addr = map.cpu_addr; |
| 2093 | const std::size_t size = map.size; | 2093 | const std::size_t size = map.size; |
| 2094 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2094 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, true); |
| 2095 | } | 2095 | } |
| 2096 | return; | 2096 | return; |
| 2097 | } | 2097 | } |
| 2098 | ForEachSparseSegment(image, | 2098 | ForEachSparseSegment(image, |
| 2099 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 2099 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 2100 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2100 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, true); |
| 2101 | }); | 2101 | }); |
| 2102 | } | 2102 | } |
| 2103 | 2103 | ||
| @@ -2106,7 +2106,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2106 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | 2106 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 2107 | image.flags &= ~ImageFlagBits::Tracked; | 2107 | image.flags &= ~ImageFlagBits::Tracked; |
| 2108 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2108 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2109 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | 2109 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, false); |
| 2110 | return; | 2110 | return; |
| 2111 | } | 2111 | } |
| 2112 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | 2112 | ASSERT(True(image.flags & ImageFlagBits::Registered)); |
| @@ -2117,7 +2117,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2117 | const auto& map = slot_map_views[map_view_id]; | 2117 | const auto& map = slot_map_views[map_view_id]; |
| 2118 | const VAddr cpu_addr = map.cpu_addr; | 2118 | const VAddr cpu_addr = map.cpu_addr; |
| 2119 | const std::size_t size = map.size; | 2119 | const std::size_t size = map.size; |
| 2120 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 2120 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, false); |
| 2121 | } | 2121 | } |
| 2122 | } | 2122 | } |
| 2123 | 2123 | ||