summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar bunnei2022-03-24 20:16:11 -0700
committerGravatar GitHub2022-03-24 20:16:11 -0700
commitab6a5784fa991016b5d8c097471fbda88853ba5d (patch)
tree407ed36099630b4e52bcd727e799d8f22bb43c24 /src/video_core/texture_cache
parentMerge pull request #8050 from bunnei/nvflinger-rewrite (diff)
parentGC: Address Feedback. (diff)
downloadyuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.tar.gz
yuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.tar.xz
yuzu-ab6a5784fa991016b5d8c097471fbda88853ba5d.zip
Merge pull request #7720 from FernandoS27/yfc-gc
First Nugget: Reworked Garbage Collection to be smarter [originally from Project YFC]
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/image_base.h7
-rw-r--r--src/video_core/texture_cache/texture_cache.h66
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h7
3 files changed, 61 insertions, 19 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 89c111c00..dd0106432 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -33,11 +33,12 @@ enum class ImageFlagBits : u32 {
33 ///< garbage collection priority 33 ///< garbage collection priority
34 Alias = 1 << 11, ///< This image has aliases and has priority on garbage 34 Alias = 1 << 11, ///< This image has aliases and has priority on garbage
35 ///< collection 35 ///< collection
36 CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back.
36 37
37 // Rescaler 38 // Rescaler
38 Rescaled = 1 << 12, 39 Rescaled = 1 << 13,
39 CheckingRescalable = 1 << 13, 40 CheckingRescalable = 1 << 14,
40 IsRescalable = 1 << 14, 41 IsRescalable = 1 << 15,
41}; 42};
42DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 43DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
43 44
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 72eeb8bbd..efc1c4525 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
50 void(slot_samplers.insert(runtime, sampler_descriptor)); 50 void(slot_samplers.insert(runtime, sampler_descriptor));
51 51
52 if constexpr (HAS_DEVICE_MEMORY_INFO) { 52 if constexpr (HAS_DEVICE_MEMORY_INFO) {
53 const auto device_memory = runtime.GetDeviceLocalMemory(); 53 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
54 const u64 possible_expected_memory = (device_memory * 4) / 10; 54 const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
55 const u64 possible_critical_memory = (device_memory * 7) / 10; 55 const s64 min_spacing_critical = device_memory - 1_GiB;
56 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); 56 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
57 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); 57 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
58 minimum_memory = 0; 58 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
59 expected_memory = static_cast<u64>(
60 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
61 DEFAULT_EXPECTED_MEMORY));
62 critical_memory = static_cast<u64>(
63 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
64 DEFAULT_CRITICAL_MEMORY));
65 minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
59 } else { 66 } else {
60 // On OpenGL we can be more conservatives as the driver takes care.
61 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; 67 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
62 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; 68 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
63 minimum_memory = 0; 69 minimum_memory = 0;
@@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
66 72
67template <class P> 73template <class P>
68void TextureCache<P>::RunGarbageCollector() { 74void TextureCache<P>::RunGarbageCollector() {
69 const bool high_priority_mode = total_used_memory >= expected_memory; 75 bool high_priority_mode = total_used_memory >= expected_memory;
70 const bool aggressive_mode = total_used_memory >= critical_memory; 76 bool aggressive_mode = total_used_memory >= critical_memory;
71 const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; 77 const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
72 size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); 78 size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
73 const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { 79 const auto clean_up = [this, &num_iterations, &high_priority_mode,
80 &aggressive_mode](ImageId image_id) {
74 if (num_iterations == 0) { 81 if (num_iterations == 0) {
75 return true; 82 return true;
76 } 83 }
77 --num_iterations; 84 --num_iterations;
78 auto& image = slot_images[image_id]; 85 auto& image = slot_images[image_id];
79 const bool must_download = image.IsSafeDownload(); 86 const bool must_download =
80 if (!high_priority_mode && must_download) { 87 image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
88 if (!high_priority_mode &&
89 (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
81 return false; 90 return false;
82 } 91 }
83 if (must_download) { 92 if (must_download) {
@@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() {
92 } 101 }
93 UnregisterImage(image_id); 102 UnregisterImage(image_id);
94 DeleteImage(image_id, image.scale_tick > frame_tick + 5); 103 DeleteImage(image_id, image.scale_tick > frame_tick + 5);
104 if (total_used_memory < critical_memory) {
105 if (aggressive_mode) {
106 // Sink the aggresiveness.
107 num_iterations >>= 2;
108 aggressive_mode = false;
109 return false;
110 }
111 if (high_priority_mode && total_used_memory < expected_memory) {
112 num_iterations >>= 1;
113 high_priority_mode = false;
114 }
115 }
95 return false; 116 return false;
96 }; 117 };
97 lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); 118 lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
@@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() {
99 120
100template <class P> 121template <class P>
101void TextureCache<P>::TickFrame() { 122void TextureCache<P>::TickFrame() {
123 // If we can obtain the memory info, use it instead of the estimate.
124 if (runtime.CanReportMemoryUsage()) {
125 total_used_memory = runtime.GetDeviceMemoryUsage();
126 }
102 if (total_used_memory > minimum_memory) { 127 if (total_used_memory > minimum_memory) {
103 RunGarbageCollector(); 128 RunGarbageCollector();
104 } 129 }
@@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() {
106 sentenced_framebuffers.Tick(); 131 sentenced_framebuffers.Tick();
107 sentenced_image_view.Tick(); 132 sentenced_image_view.Tick();
108 runtime.TickFrame(); 133 runtime.TickFrame();
134 critical_gc = 0;
109 ++frame_tick; 135 ++frame_tick;
110} 136}
111 137
@@ -1052,6 +1078,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1052 1078
1053 for (const ImageId overlap_id : overlap_ids) { 1079 for (const ImageId overlap_id : overlap_ids) {
1054 Image& overlap = slot_images[overlap_id]; 1080 Image& overlap = slot_images[overlap_id];
1081 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1082 new_image.flags |= ImageFlagBits::GpuModified;
1083 new_image.modification_tick =
1084 std::max(overlap.modification_tick, new_image.modification_tick);
1085 }
1055 if (overlap.info.num_samples != new_image.info.num_samples) { 1086 if (overlap.info.num_samples != new_image.info.num_samples) {
1056 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); 1087 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1057 } else { 1088 } else {
@@ -1414,6 +1445,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1414 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1445 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1415 } 1446 }
1416 total_used_memory += Common::AlignUp(tentative_size, 1024); 1447 total_used_memory += Common::AlignUp(tentative_size, 1024);
1448 if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
1449 RunGarbageCollector();
1450 critical_gc++;
1451 }
1417 image.lru_index = lru_cache.Insert(image_id, frame_tick); 1452 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1418 1453
1419 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1454 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
@@ -1704,6 +1739,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1704 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); 1739 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1705 aliased_images.push_back(&aliased); 1740 aliased_images.push_back(&aliased);
1706 any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); 1741 any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
1742 if (True(aliased_image.flags & ImageFlagBits::GpuModified)) {
1743 image.flags |= ImageFlagBits::GpuModified;
1744 }
1707 } 1745 }
1708 } 1746 }
1709 if (aliased_images.empty()) { 1747 if (aliased_images.empty()) {
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 647ca0730..b1324edf3 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -59,8 +59,10 @@ class TextureCache {
59 /// True when the API can provide info about the memory of the device. 59 /// True when the API can provide info about the memory of the device.
60 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 60 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
61 61
62 static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; 62 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
63 static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; 63 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
64 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
65 static constexpr size_t GC_EMERGENCY_COUNTS = 2;
64 66
65 using Runtime = typename P::Runtime; 67 using Runtime = typename P::Runtime;
66 using Image = typename P::Image; 68 using Image = typename P::Image;
@@ -372,6 +374,7 @@ private:
372 u64 minimum_memory; 374 u64 minimum_memory;
373 u64 expected_memory; 375 u64 expected_memory;
374 u64 critical_memory; 376 u64 critical_memory;
377 size_t critical_gc;
375 378
376 SlotVector<Image> slot_images; 379 SlotVector<Image> slot_images;
377 SlotVector<ImageMapView> slot_map_views; 380 SlotVector<ImageMapView> slot_map_views;