diff options
| author | 2022-01-16 04:43:06 +0100 | |
|---|---|---|
| committer | 2022-03-25 01:51:51 +0100 | |
| commit | ecb3342145780d811017a3a3c8f14f3e0725db75 (patch) | |
| tree | 2badf5f2b54a90cc3803d63f9f013c6abe1a6a2d /src/video_core/texture_cache | |
| parent | Merge pull request #8074 from liamwhite/cached-words (diff) | |
| download | yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.tar.gz yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.tar.xz yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.zip | |
Garbage Collection: Redesign the algorithm to do a better use of memory.
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 15 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 42 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 9 |
3 files changed, 49 insertions, 17 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..279f39269 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -29,15 +29,16 @@ enum class ImageFlagBits : u32 { | |||
| 29 | Sparse = 1 << 9, ///< Image has non continous submemory. | 29 | Sparse = 1 << 9, ///< Image has non continous submemory. |
| 30 | 30 | ||
| 31 | // Garbage Collection Flags | 31 | // Garbage Collection Flags |
| 32 | BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher | 32 | BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher |
| 33 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 35 | ///< collection | 35 | ///< collection |
| 36 | GCProtected = 1 << 12, ///< Protected from low-tier GC as they are costy to load back. | ||
| 36 | 37 | ||
| 37 | // Rescaler | 38 | // Rescaler |
| 38 | Rescaled = 1 << 12, | 39 | Rescaled = 1 << 13, |
| 39 | CheckingRescalable = 1 << 13, | 40 | CheckingRescalable = 1 << 14, |
| 40 | IsRescalable = 1 << 14, | 41 | IsRescalable = 1 << 15, |
| 41 | }; | 42 | }; |
| 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 43 | 44 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..7b6bd8697 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -50,14 +50,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 51 | 51 | ||
| 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; | 54 | const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; |
| 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; | 55 | const s64 min_spacing_critical = device_memory - 1_GiB; |
| 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); | 56 | const s64 mem_tresshold = std::min(device_memory, TARGET_THRESHOLD); |
| 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); | 57 | const s64 min_vacancy_expected = (6 * mem_tresshold) / 10; |
| 58 | minimum_memory = 0; | 58 | const s64 min_vacancy_critical = (3 * mem_tresshold) / 10; |
| 59 | expected_memory = static_cast<u64>( | ||
| 60 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||
| 61 | DEFAULT_EXPECTED_MEMORY)); | ||
| 62 | critical_memory = static_cast<u64>( | ||
| 63 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||
| 64 | DEFAULT_CRITICAL_MEMORY)); | ||
| 65 | minimum_memory = static_cast<u64>((device_memory - mem_tresshold) / 2); | ||
| 66 | LOG_CRITICAL(Debug, "Available Memory: {}", device_memory / 1_MiB); | ||
| 59 | } else { | 67 | } else { |
| 60 | // On OpenGL we can be more conservatives as the driver takes care. | ||
| 61 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 68 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 62 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 69 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 63 | minimum_memory = 0; | 70 | minimum_memory = 0; |
| @@ -76,7 +83,8 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 76 | } | 83 | } |
| 77 | --num_iterations; | 84 | --num_iterations; |
| 78 | auto& image = slot_images[image_id]; | 85 | auto& image = slot_images[image_id]; |
| 79 | const bool must_download = image.IsSafeDownload(); | 86 | const bool must_download = |
| 87 | image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); | ||
| 80 | if (!high_priority_mode && must_download) { | 88 | if (!high_priority_mode && must_download) { |
| 81 | return false; | 89 | return false; |
| 82 | } | 90 | } |
| @@ -99,6 +107,10 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 99 | 107 | ||
| 100 | template <class P> | 108 | template <class P> |
| 101 | void TextureCache<P>::TickFrame() { | 109 | void TextureCache<P>::TickFrame() { |
| 110 | // If we can obtain the memory info, use it instead of the estimate. | ||
| 111 | if (runtime.CanReportMemoryUsage()) { | ||
| 112 | total_used_memory = runtime.GetDeviceMemoryUsage(); | ||
| 113 | } | ||
| 102 | if (total_used_memory > minimum_memory) { | 114 | if (total_used_memory > minimum_memory) { |
| 103 | RunGarbageCollector(); | 115 | RunGarbageCollector(); |
| 104 | } | 116 | } |
| @@ -106,7 +118,9 @@ void TextureCache<P>::TickFrame() { | |||
| 106 | sentenced_framebuffers.Tick(); | 118 | sentenced_framebuffers.Tick(); |
| 107 | sentenced_image_view.Tick(); | 119 | sentenced_image_view.Tick(); |
| 108 | runtime.TickFrame(); | 120 | runtime.TickFrame(); |
| 121 | critical_gc = 0; | ||
| 109 | ++frame_tick; | 122 | ++frame_tick; |
| 123 | LOG_CRITICAL(Debug, "Current memory: {}", total_used_memory / 1_MiB); | ||
| 110 | } | 124 | } |
| 111 | 125 | ||
| 112 | template <class P> | 126 | template <class P> |
| @@ -1052,6 +1066,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1052 | 1066 | ||
| 1053 | for (const ImageId overlap_id : overlap_ids) { | 1067 | for (const ImageId overlap_id : overlap_ids) { |
| 1054 | Image& overlap = slot_images[overlap_id]; | 1068 | Image& overlap = slot_images[overlap_id]; |
| 1069 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1070 | new_image.flags |= ImageFlagBits::GpuModified; | ||
| 1071 | new_image.modification_tick = | ||
| 1072 | std::max(overlap.modification_tick, new_image.modification_tick); | ||
| 1073 | } | ||
| 1055 | if (overlap.info.num_samples != new_image.info.num_samples) { | 1074 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 1056 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | 1075 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 1057 | } else { | 1076 | } else { |
| @@ -1414,6 +1433,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1414 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1433 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1415 | } | 1434 | } |
| 1416 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1435 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1436 | if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { | ||
| 1437 | RunGarbageCollector(); | ||
| 1438 | critical_gc++; | ||
| 1439 | } | ||
| 1417 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | 1440 | image.lru_index = lru_cache.Insert(image_id, frame_tick); |
| 1418 | 1441 | ||
| 1419 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1442 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| @@ -1704,6 +1727,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1704 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1727 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1705 | aliased_images.push_back(&aliased); | 1728 | aliased_images.push_back(&aliased); |
| 1706 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | 1729 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); |
| 1730 | if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { | ||
| 1731 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1732 | } | ||
| 1707 | } | 1733 | } |
| 1708 | } | 1734 | } |
| 1709 | if (aliased_images.empty()) { | 1735 | if (aliased_images.empty()) { |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 647ca0730..5dabc344b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -59,8 +59,12 @@ class TextureCache { | |||
| 59 | /// True when the API can provide info about the memory of the device. | 59 | /// True when the API can provide info about the memory of the device. |
| 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 61 | 61 | ||
| 62 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 62 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| 63 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 63 | static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10; |
| 64 | static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10; | ||
| 65 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; | ||
| 66 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; | ||
| 67 | static constexpr size_t GC_EMERGENCY_COUNTS = 2; | ||
| 64 | 68 | ||
| 65 | using Runtime = typename P::Runtime; | 69 | using Runtime = typename P::Runtime; |
| 66 | using Image = typename P::Image; | 70 | using Image = typename P::Image; |
| @@ -372,6 +376,7 @@ private: | |||
| 372 | u64 minimum_memory; | 376 | u64 minimum_memory; |
| 373 | u64 expected_memory; | 377 | u64 expected_memory; |
| 374 | u64 critical_memory; | 378 | u64 critical_memory; |
| 379 | size_t critical_gc; | ||
| 375 | 380 | ||
| 376 | SlotVector<Image> slot_images; | 381 | SlotVector<Image> slot_images; |
| 377 | SlotVector<ImageMapView> slot_map_views; | 382 | SlotVector<ImageMapView> slot_map_views; |