From ecb3342145780d811017a3a3c8f14f3e0725db75 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 16 Jan 2022 04:43:06 +0100 Subject: Garbage Collection: Redesign the algorithm to do a better use of memory. --- src/video_core/texture_cache/image_base.h | 15 ++++---- src/video_core/texture_cache/texture_cache.h | 42 ++++++++++++++++++----- src/video_core/texture_cache/texture_cache_base.h | 9 +++-- 3 files changed, 49 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..279f39269 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -29,15 +29,16 @@ enum class ImageFlagBits : u32 { Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 11, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection + GCProtected = 1 << 12, ///< Protected from low-tier GC as they are costy to load back. // Rescaler - Rescaled = 1 << 12, - CheckingRescalable = 1 << 13, - IsRescalable = 1 << 14, + Rescaled = 1 << 13, + CheckingRescalable = 1 << 14, + IsRescalable = 1 << 15, }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..7b6bd8697 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,14 +50,21 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 4) / 10; - const u64 possible_critical_memory = (device_memory * 7) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); - minimum_memory = 0; + const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); + const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; + const s64 min_spacing_critical = device_memory - 1_GiB; + const s64 mem_tresshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_tresshold) / 10; + const s64 min_vacancy_critical = (3 * mem_tresshold) / 10; + expected_memory = static_cast( + std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), + DEFAULT_EXPECTED_MEMORY)); + critical_memory = static_cast( + std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), + DEFAULT_CRITICAL_MEMORY)); + minimum_memory = static_cast((device_memory - mem_tresshold) / 2); + LOG_CRITICAL(Debug, "Available Memory: {}", device_memory / 1_MiB); } else { - // On OpenGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; minimum_memory = 0; @@ -76,7 +83,8 @@ void TextureCache

::RunGarbageCollector() { } --num_iterations; auto& image = slot_images[image_id]; - const bool must_download = image.IsSafeDownload(); + const bool must_download = + image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); if (!high_priority_mode && must_download) { return false; } @@ -99,6 +107,10 @@ void TextureCache

::RunGarbageCollector() { template void TextureCache

::TickFrame() { + // If we can obtain the memory info, use it instead of the estimate. + if (runtime.CanReportMemoryUsage()) { + total_used_memory = runtime.GetDeviceMemoryUsage(); + } if (total_used_memory > minimum_memory) { RunGarbageCollector(); } @@ -106,7 +118,9 @@ void TextureCache

::TickFrame() { sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); runtime.TickFrame(); + critical_gc = 0; ++frame_tick; + LOG_CRITICAL(Debug, "Current memory: {}", total_used_memory / 1_MiB); } template @@ -1052,6 +1066,11 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + new_image.flags |= ImageFlagBits::GpuModified; + new_image.modification_tick = + std::max(overlap.modification_tick, new_image.modification_tick); + } if (overlap.info.num_samples != new_image.info.num_samples) { LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); } else { @@ -1414,6 +1433,10 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { + RunGarbageCollector(); + critical_gc++; + } image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, @@ -1704,6 +1727,9 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); + if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { + image.flags |= ImageFlagBits::GpuModified; + } } } if (aliased_images.empty()) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 647ca0730..5dabc344b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -59,8 +59,12 @@ class TextureCache { /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; - static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + static constexpr s64 TARGET_THRESHOLD = 4_GiB; + static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10; + static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10; + static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; + static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; + static constexpr size_t GC_EMERGENCY_COUNTS = 2; using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -372,6 +376,7 @@ private: u64 minimum_memory; u64 expected_memory; u64 critical_memory; + size_t critical_gc; SlotVector slot_images; SlotVector slot_map_views; -- cgit v1.2.3 From 5e982a781201a12c4cee6af2908e4732b4c8d945 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 16 Jan 2022 05:05:34 +0100 Subject: Buffer Cache: Tune to the levels of the new GC. --- src/video_core/texture_cache/texture_cache_base.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5dabc344b..b1324edf3 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -60,8 +60,6 @@ class TextureCache { static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; static constexpr s64 TARGET_THRESHOLD = 4_GiB; - static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10; - static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; static constexpr size_t GC_EMERGENCY_COUNTS = 2; -- cgit v1.2.3 From 9edbbf2af401f821c0be6a266e65975e3de25fb3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 16 Jan 2022 06:34:43 +0100 Subject: Garbage Collection: Final tuning. --- src/video_core/texture_cache/image_base.h | 10 ++++---- src/video_core/texture_cache/texture_cache.h | 36 ++++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 279f39269..dd0106432 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -29,11 +29,11 @@ enum class ImageFlagBits : u32 { Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 11, ///< This image has aliases and has priority on garbage - ///< collection - GCProtected = 1 << 12, ///< Protected from low-tier GC as they are costy to load back. + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection + CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back. // Rescaler Rescaled = 1 << 13, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7b6bd8697..efc1c4525 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -53,17 +53,16 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; const s64 min_spacing_critical = device_memory - 1_GiB; - const s64 mem_tresshold = std::min(device_memory, TARGET_THRESHOLD); - const s64 min_vacancy_expected = (6 * mem_tresshold) / 10; - const s64 min_vacancy_critical = (3 * mem_tresshold) / 10; + const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 min_vacancy_expected = (6 * mem_threshold) / 10; + const s64 min_vacancy_critical = (3 * mem_threshold) / 10; expected_memory = static_cast( std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), DEFAULT_EXPECTED_MEMORY)); critical_memory = static_cast( std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); - minimum_memory = static_cast((device_memory - mem_tresshold) / 2); - LOG_CRITICAL(Debug, "Available Memory: {}", device_memory / 1_MiB); + minimum_memory = static_cast((device_memory - mem_threshold) / 2); } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; @@ -73,11 +72,12 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template void TextureCache

::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); - const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + bool high_priority_mode = total_used_memory >= expected_memory; + bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; + size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); + const auto clean_up = [this, &num_iterations, &high_priority_mode, + &aggressive_mode](ImageId image_id) { if (num_iterations == 0) { return true; } @@ -85,7 +85,8 @@ void TextureCache

::RunGarbageCollector() { auto& image = slot_images[image_id]; const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); - if (!high_priority_mode && must_download) { + if (!high_priority_mode && + (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { return false; } if (must_download) { @@ -100,6 +101,18 @@ void TextureCache

::RunGarbageCollector() { } UnregisterImage(image_id); DeleteImage(image_id, image.scale_tick > frame_tick + 5); + if (total_used_memory < critical_memory) { + if (aggressive_mode) { + // Sink the aggresiveness. + num_iterations >>= 2; + aggressive_mode = false; + return false; + } + if (high_priority_mode && total_used_memory < expected_memory) { + num_iterations >>= 1; + high_priority_mode = false; + } + } return false; }; lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); @@ -120,7 +133,6 @@ void TextureCache

::TickFrame() { runtime.TickFrame(); critical_gc = 0; ++frame_tick; - LOG_CRITICAL(Debug, "Current memory: {}", total_used_memory / 1_MiB); } template -- cgit v1.2.3