From ecb3342145780d811017a3a3c8f14f3e0725db75 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 16 Jan 2022 04:43:06 +0100 Subject: Garbage Collection: Redesign the algorithm to do a better use of memory. --- src/video_core/texture_cache/image_base.h | 15 ++++---- src/video_core/texture_cache/texture_cache.h | 42 ++++++++++++++++++----- src/video_core/texture_cache/texture_cache_base.h | 9 +++-- 3 files changed, 49 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..279f39269 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -29,15 +29,16 @@ enum class ImageFlagBits : u32 { Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 11, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection + GCProtected = 1 << 12, ///< Protected from low-tier GC as they are costy to load back. // Rescaler - Rescaled = 1 << 12, - CheckingRescalable = 1 << 13, - IsRescalable = 1 << 14, + Rescaled = 1 << 13, + CheckingRescalable = 1 << 14, + IsRescalable = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..7b6bd8697 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,14 +50,21 @@ TextureCache
::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
void(slot_samplers.insert(runtime, sampler_descriptor));
if constexpr (HAS_DEVICE_MEMORY_INFO) {
- const auto device_memory = runtime.GetDeviceLocalMemory();
- const u64 possible_expected_memory = (device_memory * 4) / 10;
- const u64 possible_critical_memory = (device_memory * 7) / 10;
- expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
- critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
- minimum_memory = 0;
+ const s64 device_memory = static_cast ::RunGarbageCollector() {
}
--num_iterations;
auto& image = slot_images[image_id];
- const bool must_download = image.IsSafeDownload();
+ const bool must_download =
+ image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
if (!high_priority_mode && must_download) {
return false;
}
@@ -99,6 +107,10 @@ void TextureCache ::RunGarbageCollector() {
template ::TickFrame() {
+ // If we can obtain the memory info, use it instead of the estimate.
+ if (runtime.CanReportMemoryUsage()) {
+ total_used_memory = runtime.GetDeviceMemoryUsage();
+ }
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
@@ -106,7 +118,9 @@ void TextureCache ::TickFrame() {
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
runtime.TickFrame();
+ critical_gc = 0;
++frame_tick;
+ LOG_CRITICAL(Debug, "Current memory: {}", total_used_memory / 1_MiB);
}
template ::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId overlap_id : overlap_ids) {
Image& overlap = slot_images[overlap_id];
+ if (True(overlap.flags & ImageFlagBits::GpuModified)) {
+ new_image.flags |= ImageFlagBits::GpuModified;
+ new_image.modification_tick =
+ std::max(overlap.modification_tick, new_image.modification_tick);
+ }
if (overlap.info.num_samples != new_image.info.num_samples) {
LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
} else {
@@ -1414,6 +1433,10 @@ void TextureCache ::RegisterImage(ImageId image_id) {
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
}
total_used_memory += Common::AlignUp(tentative_size, 1024);
+ if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
+ RunGarbageCollector();
+ critical_gc++;
+ }
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
@@ -1704,6 +1727,9 @@ void TextureCache ::SynchronizeAliases(ImageId image_id) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
+ if (True(aliased_image.flags & ImageFlagBits::GpuModified)) {
+ image.flags |= ImageFlagBits::GpuModified;
+ }
}
}
if (aliased_images.empty()) {
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 647ca0730..5dabc344b 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -59,8 +59,12 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
- static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
- static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
+ static constexpr s64 TARGET_THRESHOLD = 4_GiB;
+ static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10;
+ static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10;
+ static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
+ static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
+ static constexpr size_t GC_EMERGENCY_COUNTS = 2;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
@@ -372,6 +376,7 @@ private:
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
+ size_t critical_gc;
SlotVector