diff options
| author | 2022-03-24 20:16:11 -0700 | |
|---|---|---|
| committer | 2022-03-24 20:16:11 -0700 | |
| commit | ab6a5784fa991016b5d8c097471fbda88853ba5d (patch) | |
| tree | 407ed36099630b4e52bcd727e799d8f22bb43c24 /src/video_core/texture_cache | |
| parent | Merge pull request #8050 from bunnei/nvflinger-rewrite (diff) | |
| parent | GC: Address Feedback. (diff) | |
| download | yuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.tar.gz yuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.tar.xz yuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.zip | |
Merge pull request #7720 from FernandoS27/yfc-gc
First Nugget: Reworked Garbage Collection to be smarter [originally from Project YFC]
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 66 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 7 |
3 files changed, 61 insertions, 19 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..dd0106432 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -33,11 +33,12 @@ enum class ImageFlagBits : u32 { | |||
| 33 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 35 | ///< collection | 35 | ///< collection |
| 36 | CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back. | ||
| 36 | 37 | ||
| 37 | // Rescaler | 38 | // Rescaler |
| 38 | Rescaled = 1 << 12, | 39 | Rescaled = 1 << 13, |
| 39 | CheckingRescalable = 1 << 13, | 40 | CheckingRescalable = 1 << 14, |
| 40 | IsRescalable = 1 << 14, | 41 | IsRescalable = 1 << 15, |
| 41 | }; | 42 | }; |
| 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 43 | 44 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..efc1c4525 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 51 | 51 | ||
| 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; | 54 | const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; |
| 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; | 55 | const s64 min_spacing_critical = device_memory - 1_GiB; |
| 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); | 56 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); |
| 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); | 57 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 58 | minimum_memory = 0; | 58 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 59 | expected_memory = static_cast<u64>( | ||
| 60 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||
| 61 | DEFAULT_EXPECTED_MEMORY)); | ||
| 62 | critical_memory = static_cast<u64>( | ||
| 63 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||
| 64 | DEFAULT_CRITICAL_MEMORY)); | ||
| 65 | minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); | ||
| 59 | } else { | 66 | } else { |
| 60 | // On OpenGL we can be more conservatives as the driver takes care. | ||
| 61 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 67 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 62 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 68 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 63 | minimum_memory = 0; | 69 | minimum_memory = 0; |
| @@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 66 | 72 | ||
| 67 | template <class P> | 73 | template <class P> |
| 68 | void TextureCache<P>::RunGarbageCollector() { | 74 | void TextureCache<P>::RunGarbageCollector() { |
| 69 | const bool high_priority_mode = total_used_memory >= expected_memory; | 75 | bool high_priority_mode = total_used_memory >= expected_memory; |
| 70 | const bool aggressive_mode = total_used_memory >= critical_memory; | 76 | bool aggressive_mode = total_used_memory >= critical_memory; |
| 71 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; | 77 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; |
| 72 | size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); | 78 | size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); |
| 73 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { | 79 | const auto clean_up = [this, &num_iterations, &high_priority_mode, |
| 80 | &aggressive_mode](ImageId image_id) { | ||
| 74 | if (num_iterations == 0) { | 81 | if (num_iterations == 0) { |
| 75 | return true; | 82 | return true; |
| 76 | } | 83 | } |
| 77 | --num_iterations; | 84 | --num_iterations; |
| 78 | auto& image = slot_images[image_id]; | 85 | auto& image = slot_images[image_id]; |
| 79 | const bool must_download = image.IsSafeDownload(); | 86 | const bool must_download = |
| 80 | if (!high_priority_mode && must_download) { | 87 | image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); |
| 88 | if (!high_priority_mode && | ||
| 89 | (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { | ||
| 81 | return false; | 90 | return false; |
| 82 | } | 91 | } |
| 83 | if (must_download) { | 92 | if (must_download) { |
| @@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 92 | } | 101 | } |
| 93 | UnregisterImage(image_id); | 102 | UnregisterImage(image_id); |
| 94 | DeleteImage(image_id, image.scale_tick > frame_tick + 5); | 103 | DeleteImage(image_id, image.scale_tick > frame_tick + 5); |
| 104 | if (total_used_memory < critical_memory) { | ||
| 105 | if (aggressive_mode) { | ||
| 106 | // Sink the aggresiveness. | ||
| 107 | num_iterations >>= 2; | ||
| 108 | aggressive_mode = false; | ||
| 109 | return false; | ||
| 110 | } | ||
| 111 | if (high_priority_mode && total_used_memory < expected_memory) { | ||
| 112 | num_iterations >>= 1; | ||
| 113 | high_priority_mode = false; | ||
| 114 | } | ||
| 115 | } | ||
| 95 | return false; | 116 | return false; |
| 96 | }; | 117 | }; |
| 97 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | 118 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); |
| @@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 99 | 120 | ||
| 100 | template <class P> | 121 | template <class P> |
| 101 | void TextureCache<P>::TickFrame() { | 122 | void TextureCache<P>::TickFrame() { |
| 123 | // If we can obtain the memory info, use it instead of the estimate. | ||
| 124 | if (runtime.CanReportMemoryUsage()) { | ||
| 125 | total_used_memory = runtime.GetDeviceMemoryUsage(); | ||
| 126 | } | ||
| 102 | if (total_used_memory > minimum_memory) { | 127 | if (total_used_memory > minimum_memory) { |
| 103 | RunGarbageCollector(); | 128 | RunGarbageCollector(); |
| 104 | } | 129 | } |
| @@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() { | |||
| 106 | sentenced_framebuffers.Tick(); | 131 | sentenced_framebuffers.Tick(); |
| 107 | sentenced_image_view.Tick(); | 132 | sentenced_image_view.Tick(); |
| 108 | runtime.TickFrame(); | 133 | runtime.TickFrame(); |
| 134 | critical_gc = 0; | ||
| 109 | ++frame_tick; | 135 | ++frame_tick; |
| 110 | } | 136 | } |
| 111 | 137 | ||
| @@ -1052,6 +1078,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1052 | 1078 | ||
| 1053 | for (const ImageId overlap_id : overlap_ids) { | 1079 | for (const ImageId overlap_id : overlap_ids) { |
| 1054 | Image& overlap = slot_images[overlap_id]; | 1080 | Image& overlap = slot_images[overlap_id]; |
| 1081 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1082 | new_image.flags |= ImageFlagBits::GpuModified; | ||
| 1083 | new_image.modification_tick = | ||
| 1084 | std::max(overlap.modification_tick, new_image.modification_tick); | ||
| 1085 | } | ||
| 1055 | if (overlap.info.num_samples != new_image.info.num_samples) { | 1086 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 1056 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | 1087 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 1057 | } else { | 1088 | } else { |
| @@ -1414,6 +1445,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1414 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1445 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1415 | } | 1446 | } |
| 1416 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1447 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1448 | if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { | ||
| 1449 | RunGarbageCollector(); | ||
| 1450 | critical_gc++; | ||
| 1451 | } | ||
| 1417 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | 1452 | image.lru_index = lru_cache.Insert(image_id, frame_tick); |
| 1418 | 1453 | ||
| 1419 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1454 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| @@ -1704,6 +1739,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1704 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1739 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1705 | aliased_images.push_back(&aliased); | 1740 | aliased_images.push_back(&aliased); |
| 1706 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | 1741 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); |
| 1742 | if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { | ||
| 1743 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1744 | } | ||
| 1707 | } | 1745 | } |
| 1708 | } | 1746 | } |
| 1709 | if (aliased_images.empty()) { | 1747 | if (aliased_images.empty()) { |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 647ca0730..b1324edf3 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -59,8 +59,10 @@ class TextureCache { | |||
| 59 | /// True when the API can provide info about the memory of the device. | 59 | /// True when the API can provide info about the memory of the device. |
| 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 61 | 61 | ||
| 62 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 62 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| 63 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 63 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; |
| 64 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; | ||
| 65 | static constexpr size_t GC_EMERGENCY_COUNTS = 2; | ||
| 64 | 66 | ||
| 65 | using Runtime = typename P::Runtime; | 67 | using Runtime = typename P::Runtime; |
| 66 | using Image = typename P::Image; | 68 | using Image = typename P::Image; |
| @@ -372,6 +374,7 @@ private: | |||
| 372 | u64 minimum_memory; | 374 | u64 minimum_memory; |
| 373 | u64 expected_memory; | 375 | u64 expected_memory; |
| 374 | u64 critical_memory; | 376 | u64 critical_memory; |
| 377 | size_t critical_gc; | ||
| 375 | 378 | ||
| 376 | SlotVector<Image> slot_images; | 379 | SlotVector<Image> slot_images; |
| 377 | SlotVector<ImageMapView> slot_map_views; | 380 | SlotVector<ImageMapView> slot_map_views; |