diff options
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 92 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 |
3 files changed, 38 insertions, 64 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -80,7 +80,7 @@ struct ImageBase { | |||
| 80 | VAddr cpu_addr_end = 0; | 80 | VAddr cpu_addr_end = 0; |
| 81 | 81 | ||
| 82 | u64 modification_tick = 0; | 82 | u64 modification_tick = 0; |
| 83 | u64 frame_tick = 0; | 83 | size_t lru_index = SIZE_MAX; |
| 84 | 84 | ||
| 85 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | 85 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; |
| 86 | 86 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 8 | #include "common/settings.h" | ||
| 9 | #include "video_core/dirty_flags.h" | 8 | #include "video_core/dirty_flags.h" |
| 10 | #include "video_core/texture_cache/samples_helper.h" | 9 | #include "video_core/texture_cache/samples_helper.h" |
| 11 | #include "video_core/texture_cache/texture_cache_base.h" | 10 | #include "video_core/texture_cache/texture_cache_base.h" |
| @@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 43 | void(slot_image_views.insert(runtime, NullImageParams{})); | 42 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 44 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 43 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 45 | 44 | ||
| 46 | deletion_iterator = slot_images.begin(); | ||
| 47 | |||
| 48 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 45 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 49 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 46 | const auto device_memory = runtime.GetDeviceLocalMemory(); |
| 50 | const u64 possible_expected_memory = (device_memory * 3) / 10; | 47 | const u64 possible_expected_memory = (device_memory * 3) / 10; |
| @@ -64,70 +61,38 @@ template <class P> | |||
| 64 | void TextureCache<P>::RunGarbageCollector() { | 61 | void TextureCache<P>::RunGarbageCollector() { |
| 65 | const bool high_priority_mode = total_used_memory >= expected_memory; | 62 | const bool high_priority_mode = total_used_memory >= expected_memory; |
| 66 | const bool aggressive_mode = total_used_memory >= critical_memory; | 63 | const bool aggressive_mode = total_used_memory >= critical_memory; |
| 67 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | 64 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; |
| 68 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | 65 | size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); |
| 69 | for (; num_iterations > 0; --num_iterations) { | 66 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { |
| 70 | if (deletion_iterator == slot_images.end()) { | 67 | if (num_iterations == 0) { |
| 71 | deletion_iterator = slot_images.begin(); | 68 | return true; |
| 72 | if (deletion_iterator == slot_images.end()) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | 69 | } |
| 76 | auto [image_id, image_tmp] = *deletion_iterator; | 70 | --num_iterations; |
| 77 | Image* image = image_tmp; // fix clang error. | 71 | auto& image = slot_images[image_id]; |
| 78 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | 72 | const bool must_download = image.IsSafeDownload(); |
| 79 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | 73 | if (!high_priority_mode && must_download) { |
| 80 | const bool must_download = image->IsSafeDownload(); | 74 | return false; |
| 81 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 82 | const u64 ticks_needed = | ||
| 83 | is_bad_overlap | ||
| 84 | ? ticks_to_destroy >> 4 | ||
| 85 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 86 | should_care |= aggressive_mode; | ||
| 87 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 88 | if (is_bad_overlap) { | ||
| 89 | const bool overlap_check = std::ranges::all_of( | ||
| 90 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 91 | auto& overlap = slot_images[overlap_id]; | ||
| 92 | return overlap.frame_tick >= image->frame_tick; | ||
| 93 | }); | ||
| 94 | if (!overlap_check) { | ||
| 95 | ++deletion_iterator; | ||
| 96 | continue; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | if (!is_bad_overlap && must_download) { | ||
| 100 | const bool alias_check = std::ranges::none_of( | ||
| 101 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 102 | auto& alias_image = slot_images[alias.id]; | ||
| 103 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 104 | (alias_image.modification_tick < image->modification_tick); | ||
| 105 | }); | ||
| 106 | |||
| 107 | if (alias_check) { | ||
| 108 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 109 | const auto copies = FullDownloadCopies(image->info); | ||
| 110 | image->DownloadMemory(map, copies); | ||
| 111 | runtime.Finish(); | ||
| 112 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 116 | UntrackImage(*image, image_id); | ||
| 117 | } | ||
| 118 | UnregisterImage(image_id); | ||
| 119 | DeleteImage(image_id); | ||
| 120 | if (is_bad_overlap) { | ||
| 121 | ++num_iterations; | ||
| 122 | } | ||
| 123 | } | 75 | } |
| 124 | ++deletion_iterator; | 76 | if (must_download) { |
| 125 | } | 77 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 78 | const auto copies = FullDownloadCopies(image.info); | ||
| 79 | image.DownloadMemory(map, copies); | ||
| 80 | runtime.Finish(); | ||
| 81 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 82 | } | ||
| 83 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 84 | UntrackImage(image, image_id); | ||
| 85 | } | ||
| 86 | UnregisterImage(image_id); | ||
| 87 | DeleteImage(image_id); | ||
| 88 | return false; | ||
| 89 | }; | ||
| 90 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | ||
| 126 | } | 91 | } |
| 127 | 92 | ||
| 128 | template <class P> | 93 | template <class P> |
| 129 | void TextureCache<P>::TickFrame() { | 94 | void TextureCache<P>::TickFrame() { |
| 130 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { | 95 | if (total_used_memory > minimum_memory) { |
| 131 | RunGarbageCollector(); | 96 | RunGarbageCollector(); |
| 132 | } | 97 | } |
| 133 | sentenced_images.Tick(); | 98 | sentenced_images.Tick(); |
| @@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1078 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1043 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1079 | } | 1044 | } |
| 1080 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1045 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1046 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | ||
| 1047 | |||
| 1081 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1048 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| 1082 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | 1049 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); |
| 1083 | if (False(image.flags & ImageFlagBits::Sparse)) { | 1050 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| @@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1115 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1082 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1116 | } | 1083 | } |
| 1117 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | 1084 | total_used_memory -= Common::AlignUp(tentative_size, 1024); |
| 1085 | lru_cache.Free(image.lru_index); | ||
| 1118 | const auto& clear_page_table = | 1086 | const auto& clear_page_table = |
| 1119 | [this, image_id]( | 1087 | [this, image_id]( |
| 1120 | u64 page, | 1088 | u64 page, |
| @@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool | |||
| 1384 | if (is_modification) { | 1352 | if (is_modification) { |
| 1385 | MarkModification(image); | 1353 | MarkModification(image); |
| 1386 | } | 1354 | } |
| 1387 | image.frame_tick = frame_tick; | 1355 | lru_cache.Touch(image.lru_index, frame_tick); |
| 1388 | } | 1356 | } |
| 1389 | 1357 | ||
| 1390 | template <class P> | 1358 | template <class P> |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | 14 | ||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/literals.h" | 16 | #include "common/literals.h" |
| 17 | #include "common/lru_cache.h" | ||
| 17 | #include "video_core/compatible_formats.h" | 18 | #include "video_core/compatible_formats.h" |
| 18 | #include "video_core/delayed_destruction_ring.h" | 19 | #include "video_core/delayed_destruction_ring.h" |
| 19 | #include "video_core/engines/fermi_2d.h" | 20 | #include "video_core/engines/fermi_2d.h" |
| @@ -370,6 +371,12 @@ private: | |||
| 370 | std::vector<ImageId> uncommitted_downloads; | 371 | std::vector<ImageId> uncommitted_downloads; |
| 371 | std::queue<std::vector<ImageId>> committed_downloads; | 372 | std::queue<std::vector<ImageId>> committed_downloads; |
| 372 | 373 | ||
| 374 | struct LRUItemParams { | ||
| 375 | using ObjectType = ImageId; | ||
| 376 | using TickType = u64; | ||
| 377 | }; | ||
| 378 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 379 | |||
| 373 | static constexpr size_t TICKS_TO_DESTROY = 6; | 380 | static constexpr size_t TICKS_TO_DESTROY = 6; |
| 374 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | 381 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; |
| 375 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | 382 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; |
| @@ -379,7 +386,6 @@ private: | |||
| 379 | 386 | ||
| 380 | u64 modification_tick = 0; | 387 | u64 modification_tick = 0; |
| 381 | u64 frame_tick = 0; | 388 | u64 frame_tick = 0; |
| 382 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 383 | }; | 389 | }; |
| 384 | 390 | ||
| 385 | } // namespace VideoCommon | 391 | } // namespace VideoCommon |