diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/lru_cache.h | 141 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 13 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 61 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 89 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 |
6 files changed, 213 insertions, 101 deletions
diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..048e9c3da --- /dev/null +++ b/src/common/lru_cache.h | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2+ or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <deque> | ||
| 8 | #include <memory> | ||
| 9 | #include <type_traits> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace Common { | ||
| 14 | |||
| 15 | template <class Traits> | ||
| 16 | class LeastRecentlyUsedCache { | ||
| 17 | using ObjectType = typename Traits::ObjectType; | ||
| 18 | using TickType = typename Traits::TickType; | ||
| 19 | |||
| 20 | struct Item { | ||
| 21 | ObjectType obj; | ||
| 22 | TickType tick; | ||
| 23 | Item* next{}; | ||
| 24 | Item* prev{}; | ||
| 25 | }; | ||
| 26 | |||
| 27 | public: | ||
| 28 | LeastRecentlyUsedCache() : first_item{}, last_item{} {} | ||
| 29 | ~LeastRecentlyUsedCache() = default; | ||
| 30 | |||
| 31 | size_t Insert(ObjectType obj, TickType tick) { | ||
| 32 | const auto new_id = build(); | ||
| 33 | auto& item = item_pool[new_id]; | ||
| 34 | item.obj = obj; | ||
| 35 | item.tick = tick; | ||
| 36 | attach(item); | ||
| 37 | return new_id; | ||
| 38 | } | ||
| 39 | |||
| 40 | void Touch(size_t id, TickType tick) { | ||
| 41 | auto& item = item_pool[id]; | ||
| 42 | if (item.tick >= tick) { | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | item.tick = tick; | ||
| 46 | if (&item == last_item) { | ||
| 47 | return; | ||
| 48 | } | ||
| 49 | detach(item); | ||
| 50 | attach(item); | ||
| 51 | } | ||
| 52 | |||
| 53 | void Free(size_t id) { | ||
| 54 | auto& item = item_pool[id]; | ||
| 55 | detach(item); | ||
| 56 | item.prev = nullptr; | ||
| 57 | item.next = nullptr; | ||
| 58 | free_items.push_back(id); | ||
| 59 | } | ||
| 60 | |||
| 61 | template <typename Func> | ||
| 62 | void ForEachItemBelow(TickType tick, Func&& func) { | ||
| 63 | static constexpr bool RETURNS_BOOL = | ||
| 64 | std::is_same_v<std::invoke_result<Func, ObjectType>, bool>; | ||
| 65 | Item* iterator = first_item; | ||
| 66 | while (iterator) { | ||
| 67 | if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) { | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | Item* next = iterator->next; | ||
| 71 | if constexpr (RETURNS_BOOL) { | ||
| 72 | if (func(iterator->obj)) { | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | func(iterator->obj); | ||
| 77 | } | ||
| 78 | iterator = next; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | private: | ||
| 83 | size_t build() { | ||
| 84 | if (free_items.empty()) { | ||
| 85 | const size_t item_id = item_pool.size(); | ||
| 86 | item_pool.emplace_back(); | ||
| 87 | auto& item = item_pool[item_id]; | ||
| 88 | item.next = nullptr; | ||
| 89 | item.prev = nullptr; | ||
| 90 | return item_id; | ||
| 91 | } | ||
| 92 | const size_t item_id = free_items.front(); | ||
| 93 | free_items.pop_front(); | ||
| 94 | auto& item = item_pool[item_id]; | ||
| 95 | item.next = nullptr; | ||
| 96 | item.prev = nullptr; | ||
| 97 | return item_id; | ||
| 98 | } | ||
| 99 | |||
| 100 | void attach(Item& item) { | ||
| 101 | if (!first_item) { | ||
| 102 | first_item = &item; | ||
| 103 | } | ||
| 104 | if (!last_item) { | ||
| 105 | last_item = &item; | ||
| 106 | } else { | ||
| 107 | item.prev = last_item; | ||
| 108 | last_item->next = &item; | ||
| 109 | item.next = nullptr; | ||
| 110 | last_item = &item; | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | void detach(Item& item) { | ||
| 115 | if (item.prev) { | ||
| 116 | item.prev->next = item.next; | ||
| 117 | } | ||
| 118 | if (item.next) { | ||
| 119 | item.next->prev = item.prev; | ||
| 120 | } | ||
| 121 | if (&item == first_item) { | ||
| 122 | first_item = item.next; | ||
| 123 | if (first_item) { | ||
| 124 | first_item->prev = nullptr; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | if (&item == last_item) { | ||
| 128 | last_item = item.prev; | ||
| 129 | if (last_item) { | ||
| 130 | last_item->next = nullptr; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | std::deque<Item> item_pool; | ||
| 136 | std::deque<size_t> free_items; | ||
| 137 | Item* first_item; | ||
| 138 | Item* last_item; | ||
| 139 | }; | ||
| 140 | |||
| 141 | } // namespace Common | ||
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..4b696a60f 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -261,16 +261,6 @@ public: | |||
| 261 | stream_score += score; | 261 | stream_score += score; |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | /// Sets the new frame tick | ||
| 265 | void SetFrameTick(u64 new_frame_tick) noexcept { | ||
| 266 | frame_tick = new_frame_tick; | ||
| 267 | } | ||
| 268 | |||
| 269 | /// Returns the new frame tick | ||
| 270 | [[nodiscard]] u64 FrameTick() const noexcept { | ||
| 271 | return frame_tick; | ||
| 272 | } | ||
| 273 | |||
| 274 | /// Returns the likeliness of this being a stream buffer | 264 | /// Returns the likeliness of this being a stream buffer |
| 275 | [[nodiscard]] int StreamScore() const noexcept { | 265 | [[nodiscard]] int StreamScore() const noexcept { |
| 276 | return stream_score; | 266 | return stream_score; |
| @@ -307,6 +297,8 @@ public: | |||
| 307 | return words.size_bytes; | 297 | return words.size_bytes; |
| 308 | } | 298 | } |
| 309 | 299 | ||
| 300 | size_t lru_id; | ||
| 301 | |||
| 310 | private: | 302 | private: |
| 311 | template <Type type> | 303 | template <Type type> |
| 312 | u64* Array() noexcept { | 304 | u64* Array() noexcept { |
| @@ -603,7 +595,6 @@ private: | |||
| 603 | RasterizerInterface* rasterizer = nullptr; | 595 | RasterizerInterface* rasterizer = nullptr; |
| 604 | VAddr cpu_addr = 0; | 596 | VAddr cpu_addr = 0; |
| 605 | Words words; | 597 | Words words; |
| 606 | u64 frame_tick = 0; | ||
| 607 | BufferFlagBits flags{}; | 598 | BufferFlagBits flags{}; |
| 608 | int stream_score = 0; | 599 | int stream_score = 0; |
| 609 | }; | 600 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..a0217908a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 21 | #include "common/div_ceil.h" | 21 | #include "common/div_ceil.h" |
| 22 | #include "common/literals.h" | 22 | #include "common/literals.h" |
| 23 | #include "common/lru_cache.h" | ||
| 23 | #include "common/microprofile.h" | 24 | #include "common/microprofile.h" |
| 24 | #include "common/scope_exit.h" | 25 | #include "common/scope_exit.h" |
| 25 | #include "common/settings.h" | 26 | #include "common/settings.h" |
| @@ -77,7 +78,7 @@ class BufferCache { | |||
| 77 | 78 | ||
| 78 | static constexpr BufferId NULL_BUFFER_ID{0}; | 79 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 79 | 80 | ||
| 80 | static constexpr u64 EXPECTED_MEMORY = 512_MiB; | 81 | static constexpr u64 EXPECTED_MEMORY = 256_MiB; |
| 81 | static constexpr u64 CRITICAL_MEMORY = 1_GiB; | 82 | static constexpr u64 CRITICAL_MEMORY = 1_GiB; |
| 82 | 83 | ||
| 83 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 84 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -330,7 +331,7 @@ private: | |||
| 330 | template <bool insert> | 331 | template <bool insert> |
| 331 | void ChangeRegister(BufferId buffer_id); | 332 | void ChangeRegister(BufferId buffer_id); |
| 332 | 333 | ||
| 333 | void TouchBuffer(Buffer& buffer) const noexcept; | 334 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; |
| 334 | 335 | ||
| 335 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 336 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 336 | 337 | ||
| @@ -428,7 +429,11 @@ private: | |||
| 428 | size_t immediate_buffer_capacity = 0; | 429 | size_t immediate_buffer_capacity = 0; |
| 429 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 430 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| 430 | 431 | ||
| 431 | typename SlotVector<Buffer>::Iterator deletion_iterator; | 432 | struct LRUItemParams { |
| 433 | using ObjectType = BufferId; | ||
| 434 | using TickType = u64; | ||
| 435 | }; | ||
| 436 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 432 | u64 frame_tick = 0; | 437 | u64 frame_tick = 0; |
| 433 | u64 total_used_memory = 0; | 438 | u64 total_used_memory = 0; |
| 434 | 439 | ||
| @@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 445 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { | 450 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { |
| 446 | // Ensure the first slot is used for the null buffer | 451 | // Ensure the first slot is used for the null buffer |
| 447 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 452 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 448 | deletion_iterator = slot_buffers.end(); | ||
| 449 | common_ranges.clear(); | 453 | common_ranges.clear(); |
| 450 | } | 454 | } |
| 451 | 455 | ||
| @@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() { | |||
| 454 | const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | 458 | const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; |
| 455 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | 459 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; |
| 456 | int num_iterations = aggressive_gc ? 64 : 32; | 460 | int num_iterations = aggressive_gc ? 64 : 32; |
| 457 | for (; num_iterations > 0; --num_iterations) { | 461 | const auto clean_up = [this, &num_iterations](BufferId buffer_id) { |
| 458 | if (deletion_iterator == slot_buffers.end()) { | 462 | if (num_iterations == 0) { |
| 459 | deletion_iterator = slot_buffers.begin(); | 463 | return true; |
| 460 | } | ||
| 461 | ++deletion_iterator; | ||
| 462 | if (deletion_iterator == slot_buffers.end()) { | ||
| 463 | break; | ||
| 464 | } | ||
| 465 | const auto [buffer_id, buffer] = *deletion_iterator; | ||
| 466 | if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { | ||
| 467 | DownloadBufferMemory(*buffer); | ||
| 468 | DeleteBuffer(buffer_id); | ||
| 469 | } | 464 | } |
| 470 | } | 465 | --num_iterations; |
| 466 | auto& buffer = slot_buffers[buffer_id]; | ||
| 467 | DownloadBufferMemory(buffer); | ||
| 468 | DeleteBuffer(buffer_id); | ||
| 469 | return false; | ||
| 470 | }; | ||
| 471 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | ||
| 471 | } | 472 | } |
| 472 | 473 | ||
| 473 | template <class P> | 474 | template <class P> |
| @@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | |||
| 954 | template <class P> | 955 | template <class P> |
| 955 | void BufferCache<P>::BindHostIndexBuffer() { | 956 | void BufferCache<P>::BindHostIndexBuffer() { |
| 956 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 957 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
| 957 | TouchBuffer(buffer); | 958 | TouchBuffer(buffer, index_buffer.buffer_id); |
| 958 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 959 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 959 | const u32 size = index_buffer.size; | 960 | const u32 size = index_buffer.size; |
| 960 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 961 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| @@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 975 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 976 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 976 | const Binding& binding = vertex_buffers[index]; | 977 | const Binding& binding = vertex_buffers[index]; |
| 977 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 978 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 978 | TouchBuffer(buffer); | 979 | TouchBuffer(buffer, binding.buffer_id); |
| 979 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 980 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 980 | if (!flags[Dirty::VertexBuffer0 + index]) { | 981 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 981 | continue; | 982 | continue; |
| @@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 1011 | const VAddr cpu_addr = binding.cpu_addr; | 1012 | const VAddr cpu_addr = binding.cpu_addr; |
| 1012 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); | 1013 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); |
| 1013 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1014 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1014 | TouchBuffer(buffer); | 1015 | TouchBuffer(buffer, binding.buffer_id); |
| 1015 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 1016 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 1016 | size <= uniform_buffer_skip_cache_size && | 1017 | size <= uniform_buffer_skip_cache_size && |
| 1017 | !buffer.IsRegionGpuModified(cpu_addr, size); | 1018 | !buffer.IsRegionGpuModified(cpu_addr, size); |
| @@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 1083 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 1084 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |
| 1084 | const Binding& binding = storage_buffers[stage][index]; | 1085 | const Binding& binding = storage_buffers[stage][index]; |
| 1085 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1086 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1086 | TouchBuffer(buffer); | 1087 | TouchBuffer(buffer, binding.buffer_id); |
| 1087 | const u32 size = binding.size; | 1088 | const u32 size = binding.size; |
| 1088 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1089 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1089 | 1090 | ||
| @@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 1128 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 1129 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 1129 | const Binding& binding = transform_feedback_buffers[index]; | 1130 | const Binding& binding = transform_feedback_buffers[index]; |
| 1130 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1131 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1131 | TouchBuffer(buffer); | 1132 | TouchBuffer(buffer, binding.buffer_id); |
| 1132 | const u32 size = binding.size; | 1133 | const u32 size = binding.size; |
| 1133 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1134 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1134 | 1135 | ||
| @@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1148 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | 1149 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1149 | const Binding& binding = compute_uniform_buffers[index]; | 1150 | const Binding& binding = compute_uniform_buffers[index]; |
| 1150 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1151 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1151 | TouchBuffer(buffer); | 1152 | TouchBuffer(buffer, binding.buffer_id); |
| 1152 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); | 1153 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); |
| 1153 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1154 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1154 | 1155 | ||
| @@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1168 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 1169 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 1169 | const Binding& binding = compute_storage_buffers[index]; | 1170 | const Binding& binding = compute_storage_buffers[index]; |
| 1170 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1171 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1171 | TouchBuffer(buffer); | 1172 | TouchBuffer(buffer, binding.buffer_id); |
| 1172 | const u32 size = binding.size; | 1173 | const u32 size = binding.size; |
| 1173 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1174 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1174 | 1175 | ||
| @@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1513 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1514 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1514 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1515 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1515 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1516 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1516 | TouchBuffer(slot_buffers[new_buffer_id]); | ||
| 1517 | for (const BufferId overlap_id : overlap.ids) { | 1517 | for (const BufferId overlap_id : overlap.ids) { |
| 1518 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1518 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1519 | } | 1519 | } |
| 1520 | Register(new_buffer_id); | 1520 | Register(new_buffer_id); |
| 1521 | TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); | ||
| 1521 | return new_buffer_id; | 1522 | return new_buffer_id; |
| 1522 | } | 1523 | } |
| 1523 | 1524 | ||
| @@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) { | |||
| 1534 | template <class P> | 1535 | template <class P> |
| 1535 | template <bool insert> | 1536 | template <bool insert> |
| 1536 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | 1537 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { |
| 1537 | const Buffer& buffer = slot_buffers[buffer_id]; | 1538 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1538 | const auto size = buffer.SizeBytes(); | 1539 | const auto size = buffer.SizeBytes(); |
| 1539 | if (insert) { | 1540 | if (insert) { |
| 1540 | total_used_memory += Common::AlignUp(size, 1024); | 1541 | total_used_memory += Common::AlignUp(size, 1024); |
| 1542 | buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick); | ||
| 1541 | } else { | 1543 | } else { |
| 1542 | total_used_memory -= Common::AlignUp(size, 1024); | 1544 | total_used_memory -= Common::AlignUp(size, 1024); |
| 1545 | lru_cache.Free(buffer.lru_id); | ||
| 1543 | } | 1546 | } |
| 1544 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1547 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1545 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1548 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| @@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1555 | } | 1558 | } |
| 1556 | 1559 | ||
| 1557 | template <class P> | 1560 | template <class P> |
| 1558 | void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | 1561 | void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { |
| 1559 | buffer.SetFrameTick(frame_tick); | 1562 | if (buffer_id != NULL_BUFFER_ID) { |
| 1563 | lru_cache.Touch(buffer.lru_id, frame_tick); | ||
| 1564 | } | ||
| 1560 | } | 1565 | } |
| 1561 | 1566 | ||
| 1562 | template <class P> | 1567 | template <class P> |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..662089e3d 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -80,7 +80,7 @@ struct ImageBase { | |||
| 80 | VAddr cpu_addr_end = 0; | 80 | VAddr cpu_addr_end = 0; |
| 81 | 81 | ||
| 82 | u64 modification_tick = 0; | 82 | u64 modification_tick = 0; |
| 83 | u64 frame_tick = 0; | 83 | size_t lru_index = ~0; |
| 84 | 84 | ||
| 85 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | 85 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; |
| 86 | 86 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..c16cc0838 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -43,8 +43,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 43 | void(slot_image_views.insert(runtime, NullImageParams{})); | 43 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 44 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 44 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 45 | 45 | ||
| 46 | deletion_iterator = slot_images.begin(); | ||
| 47 | |||
| 48 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 46 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 49 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 47 | const auto device_memory = runtime.GetDeviceLocalMemory(); |
| 50 | const u64 possible_expected_memory = (device_memory * 3) / 10; | 48 | const u64 possible_expected_memory = (device_memory * 3) / 10; |
| @@ -64,65 +62,33 @@ template <class P> | |||
| 64 | void TextureCache<P>::RunGarbageCollector() { | 62 | void TextureCache<P>::RunGarbageCollector() { |
| 65 | const bool high_priority_mode = total_used_memory >= expected_memory; | 63 | const bool high_priority_mode = total_used_memory >= expected_memory; |
| 66 | const bool aggressive_mode = total_used_memory >= critical_memory; | 64 | const bool aggressive_mode = total_used_memory >= critical_memory; |
| 67 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | 65 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 50ULL : 100ULL; |
| 68 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | 66 | size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 50 : 5); |
| 69 | for (; num_iterations > 0; --num_iterations) { | 67 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { |
| 70 | if (deletion_iterator == slot_images.end()) { | 68 | if (num_iterations == 0) { |
| 71 | deletion_iterator = slot_images.begin(); | 69 | return true; |
| 72 | if (deletion_iterator == slot_images.end()) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | 70 | } |
| 76 | auto [image_id, image_tmp] = *deletion_iterator; | 71 | --num_iterations; |
| 77 | Image* image = image_tmp; // fix clang error. | 72 | auto& image = slot_images[image_id]; |
| 78 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | 73 | const bool must_download = image.IsSafeDownload(); |
| 79 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | 74 | if (!high_priority_mode && must_download) { |
| 80 | const bool must_download = image->IsSafeDownload(); | 75 | return false; |
| 81 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 82 | const u64 ticks_needed = | ||
| 83 | is_bad_overlap | ||
| 84 | ? ticks_to_destroy >> 4 | ||
| 85 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 86 | should_care |= aggressive_mode; | ||
| 87 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 88 | if (is_bad_overlap) { | ||
| 89 | const bool overlap_check = std::ranges::all_of( | ||
| 90 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 91 | auto& overlap = slot_images[overlap_id]; | ||
| 92 | return overlap.frame_tick >= image->frame_tick; | ||
| 93 | }); | ||
| 94 | if (!overlap_check) { | ||
| 95 | ++deletion_iterator; | ||
| 96 | continue; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | if (!is_bad_overlap && must_download) { | ||
| 100 | const bool alias_check = std::ranges::none_of( | ||
| 101 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 102 | auto& alias_image = slot_images[alias.id]; | ||
| 103 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 104 | (alias_image.modification_tick < image->modification_tick); | ||
| 105 | }); | ||
| 106 | |||
| 107 | if (alias_check) { | ||
| 108 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 109 | const auto copies = FullDownloadCopies(image->info); | ||
| 110 | image->DownloadMemory(map, copies); | ||
| 111 | runtime.Finish(); | ||
| 112 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 116 | UntrackImage(*image, image_id); | ||
| 117 | } | ||
| 118 | UnregisterImage(image_id); | ||
| 119 | DeleteImage(image_id); | ||
| 120 | if (is_bad_overlap) { | ||
| 121 | ++num_iterations; | ||
| 122 | } | ||
| 123 | } | 76 | } |
| 124 | ++deletion_iterator; | 77 | if (must_download) { |
| 125 | } | 78 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 79 | const auto copies = FullDownloadCopies(image.info); | ||
| 80 | image.DownloadMemory(map, copies); | ||
| 81 | runtime.Finish(); | ||
| 82 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 83 | } | ||
| 84 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 85 | UntrackImage(image, image_id); | ||
| 86 | } | ||
| 87 | UnregisterImage(image_id); | ||
| 88 | DeleteImage(image_id); | ||
| 89 | return false; | ||
| 90 | }; | ||
| 91 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | ||
| 126 | } | 92 | } |
| 127 | 93 | ||
| 128 | template <class P> | 94 | template <class P> |
| @@ -1078,6 +1044,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1078 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1044 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1079 | } | 1045 | } |
| 1080 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1046 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1047 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | ||
| 1048 | |||
| 1081 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1049 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| 1082 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | 1050 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); |
| 1083 | if (False(image.flags & ImageFlagBits::Sparse)) { | 1051 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| @@ -1115,6 +1083,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1115 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1083 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1116 | } | 1084 | } |
| 1117 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | 1085 | total_used_memory -= Common::AlignUp(tentative_size, 1024); |
| 1086 | lru_cache.Free(image.lru_index); | ||
| 1118 | const auto& clear_page_table = | 1087 | const auto& clear_page_table = |
| 1119 | [this, image_id]( | 1088 | [this, image_id]( |
| 1120 | u64 page, | 1089 | u64 page, |
| @@ -1384,7 +1353,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool | |||
| 1384 | if (is_modification) { | 1353 | if (is_modification) { |
| 1385 | MarkModification(image); | 1354 | MarkModification(image); |
| 1386 | } | 1355 | } |
| 1387 | image.frame_tick = frame_tick; | 1356 | lru_cache.Touch(image.lru_index, frame_tick); |
| 1388 | } | 1357 | } |
| 1389 | 1358 | ||
| 1390 | template <class P> | 1359 | template <class P> |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | 14 | ||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/literals.h" | 16 | #include "common/literals.h" |
| 17 | #include "common/lru_cache.h" | ||
| 17 | #include "video_core/compatible_formats.h" | 18 | #include "video_core/compatible_formats.h" |
| 18 | #include "video_core/delayed_destruction_ring.h" | 19 | #include "video_core/delayed_destruction_ring.h" |
| 19 | #include "video_core/engines/fermi_2d.h" | 20 | #include "video_core/engines/fermi_2d.h" |
| @@ -370,6 +371,12 @@ private: | |||
| 370 | std::vector<ImageId> uncommitted_downloads; | 371 | std::vector<ImageId> uncommitted_downloads; |
| 371 | std::queue<std::vector<ImageId>> committed_downloads; | 372 | std::queue<std::vector<ImageId>> committed_downloads; |
| 372 | 373 | ||
| 374 | struct LRUItemParams { | ||
| 375 | using ObjectType = ImageId; | ||
| 376 | using TickType = u64; | ||
| 377 | }; | ||
| 378 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 379 | |||
| 373 | static constexpr size_t TICKS_TO_DESTROY = 6; | 380 | static constexpr size_t TICKS_TO_DESTROY = 6; |
| 374 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | 381 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; |
| 375 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | 382 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; |
| @@ -379,7 +386,6 @@ private: | |||
| 379 | 386 | ||
| 380 | u64 modification_tick = 0; | 387 | u64 modification_tick = 0; |
| 381 | u64 frame_tick = 0; | 388 | u64 frame_tick = 0; |
| 382 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 383 | }; | 389 | }; |
| 384 | 390 | ||
| 385 | } // namespace VideoCommon | 391 | } // namespace VideoCommon |