summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-06-13 15:47:54 +0200
committerGravatar Fernando Sahmkow2021-06-16 21:35:02 +0200
commitd8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7 (patch)
tree21aced60b83b8bc16fb8920a4911b4a2070ddd81
parentInitial Reaper Setup (diff)
downloadyuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.tar.gz
yuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.tar.xz
yuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.zip
Reaper: Tune it up to be an smart GC.
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h27
-rw-r--r--src/video_core/texture_cache/image_base.cpp20
-rw-r--r--src/video_core/texture_cache/image_base.h10
-rw-r--r--src/video_core/texture_cache/texture_cache.h84
-rw-r--r--src/video_core/texture_cache/util.cpp2
5 files changed, 130 insertions, 13 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index ecb7d3dee..b4fa85c5b 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -65,6 +65,9 @@ class BufferCache {
65 65
66 static constexpr BufferId NULL_BUFFER_ID{0}; 66 static constexpr BufferId NULL_BUFFER_ID{0};
67 67
68 static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL;
69 static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL;
70
68 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 71 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
69 72
70 using Runtime = typename P::Runtime; 73 using Runtime = typename P::Runtime;
@@ -327,6 +330,7 @@ private:
327 330
328 typename SlotVector<Buffer>::Iterator deletion_iterator; 331 typename SlotVector<Buffer>::Iterator deletion_iterator;
329 u64 frame_tick = 0; 332 u64 frame_tick = 0;
333 u64 total_used_memory = 0;
330 334
331 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; 335 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
332}; 336};
@@ -346,6 +350,10 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
346 350
347template <class P> 351template <class P>
348void BufferCache<P>::TickFrame() { 352void BufferCache<P>::TickFrame() {
353 SCOPE_EXIT({
354 ++frame_tick;
355 delayed_destruction_ring.Tick();
356 });
349 // Calculate hits and shots and move hit bits to the right 357 // Calculate hits and shots and move hit bits to the right
350 const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); 358 const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
351 const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); 359 const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
@@ -359,8 +367,13 @@ void BufferCache<P>::TickFrame() {
359 const bool skip_preferred = hits * 256 < shots * 251; 367 const bool skip_preferred = hits * 256 < shots * 251;
360 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 368 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
361 369
362 static constexpr u64 ticks_to_destroy = 120; 370 const bool activate_gc = total_used_memory >= expected_memory;
363 int num_iterations = 32; 371 if (!activate_gc) {
372 return;
373 }
374 const bool agressive_gc = total_used_memory >= critical_memory;
375 const u64 ticks_to_destroy = agressive_gc ? 60 : 120;
376 int num_iterations = agressive_gc ? 64 : 32;
364 for (; num_iterations > 0; --num_iterations) { 377 for (; num_iterations > 0; --num_iterations) {
365 if (deletion_iterator == slot_buffers.end()) { 378 if (deletion_iterator == slot_buffers.end()) {
366 deletion_iterator = slot_buffers.begin(); 379 deletion_iterator = slot_buffers.begin();
@@ -375,8 +388,6 @@ void BufferCache<P>::TickFrame() {
375 DeleteBuffer(buffer_id); 388 DeleteBuffer(buffer_id);
376 } 389 }
377 } 390 }
378 delayed_destruction_ring.Tick();
379 ++frame_tick;
380} 391}
381 392
382template <class P> 393template <class P>
@@ -1115,8 +1126,14 @@ template <class P>
1115template <bool insert> 1126template <bool insert>
1116void BufferCache<P>::ChangeRegister(BufferId buffer_id) { 1127void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1117 const Buffer& buffer = slot_buffers[buffer_id]; 1128 const Buffer& buffer = slot_buffers[buffer_id];
1129 const auto size = buffer.SizeBytes();
1130 if (insert) {
1131 total_used_memory += Common::AlignUp(size, 1024);
1132 } else {
1133 total_used_memory -= Common::AlignUp(size, 1024);
1134 }
1118 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1135 const VAddr cpu_addr_begin = buffer.CpuAddr();
1119 const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); 1136 const VAddr cpu_addr_end = cpu_addr_begin + size;
1120 const u64 page_begin = cpu_addr_begin / PAGE_SIZE; 1137 const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
1121 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); 1138 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
1122 for (u64 page = page_begin; page != page_end; ++page) { 1139 for (u64 page = page_begin; page != page_end; ++page) {
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index bd0e7e64e..ad69d32d1 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -130,6 +130,26 @@ bool ImageBase::IsSafeDownload() const noexcept {
130 return true; 130 return true;
131} 131}
132 132
133void ImageBase::CheckBadOverlapState() {
134 if (False(flags & ImageFlagBits::BadOverlap)) {
135 return;
136 }
137 if (!overlapping_images.empty()) {
138 return;
139 }
140 flags &= ~ImageFlagBits::BadOverlap;
141}
142
143void ImageBase::CheckAliasState() {
144 if (False(flags & ImageFlagBits::Alias)) {
145 return;
146 }
147 if (!aliased_images.empty()) {
148 return;
149 }
150 flags &= ~ImageFlagBits::Alias;
151}
152
133void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { 153void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
134 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; 154 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
135 ASSERT(lhs.info.type == rhs.info.type); 155 ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 0f69d8a32..40c047ea1 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted 25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered 26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked 27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28
29 // Garbage Collection Flags
30 BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
31 ///< garbage collection priority
32 Alias = 1 << 9, ///< This image has aliases and has priority on garbage
33 ///< collection
28}; 34};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 35DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30 36
@@ -51,6 +57,9 @@ struct ImageBase {
51 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; 57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
52 } 58 }
53 59
60 void CheckBadOverlapState();
61 void CheckAliasState();
62
54 ImageInfo info; 63 ImageInfo info;
55 64
56 u32 guest_size_bytes = 0; 65 u32 guest_size_bytes = 0;
@@ -74,6 +83,7 @@ struct ImageBase {
74 std::vector<SubresourceBase> slice_subresources; 83 std::vector<SubresourceBase> slice_subresources;
75 84
76 std::vector<AliasedImage> aliased_images; 85 std::vector<AliasedImage> aliased_images;
86 std::vector<ImageId> overlapping_images;
77}; 87};
78 88
79struct ImageAllocBase { 89struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 45ef155b5..cf48f7b02 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -75,6 +75,9 @@ class TextureCache {
75 /// Sampler ID for bugged sampler ids 75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0}; 76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77 77
78 static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL;
79 static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL;
80
78 using Runtime = typename P::Runtime; 81 using Runtime = typename P::Runtime;
79 using Image = typename P::Image; 82 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc; 83 using ImageAlloc = typename P::ImageAlloc;
@@ -333,6 +336,7 @@ private:
333 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; 336 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
334 337
335 bool has_deleted_images = false; 338 bool has_deleted_images = false;
339 u64 total_used_memory = 0;
336 340
337 SlotVector<Image> slot_images; 341 SlotVector<Image> slot_images;
338 SlotVector<ImageView> slot_image_views; 342 SlotVector<ImageView> slot_image_views;
@@ -380,8 +384,10 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
380 384
381template <class P> 385template <class P>
382void TextureCache<P>::TickFrame() { 386void TextureCache<P>::TickFrame() {
383 static constexpr u64 ticks_to_destroy = 120; 387 const bool high_priority_mode = total_used_memory >= expected_memory;
384 int num_iterations = 32; 388 const bool aggressive_mode = total_used_memory >= critical_memory;
389 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
390 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
385 for (; num_iterations > 0; --num_iterations) { 391 for (; num_iterations > 0; --num_iterations) {
386 if (deletion_iterator == slot_images.end()) { 392 if (deletion_iterator == slot_images.end()) {
387 deletion_iterator = slot_images.begin(); 393 deletion_iterator = slot_images.begin();
@@ -390,11 +396,42 @@ void TextureCache<P>::TickFrame() {
390 } 396 }
391 } 397 }
392 const auto [image_id, image] = *deletion_iterator; 398 const auto [image_id, image] = *deletion_iterator;
393 if (image->frame_tick + ticks_to_destroy < frame_tick) { 399 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
394 if (image->IsSafeDownload() && 400 if (is_alias && image->aliased_images.size() <= 1) {
395 std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { 401 ++deletion_iterator;
396 return slot_images[alias.id].modification_tick > image->modification_tick; 402 continue;
397 })) { 403 }
404 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
405 const bool must_download = image->IsSafeDownload();
406 const u64 ticks_needed = is_bad_overlap ? ticks_to_destroy >> 4 : ticks_to_destroy;
407 const bool should_care =
408 aggressive_mode || is_bad_overlap || is_alias || (high_priority_mode && !must_download);
409 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
410 if (is_bad_overlap) {
411 const bool overlap_check =
412 std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) {
413 auto& overlap = slot_images[overlap_id];
414 return (overlap.frame_tick >= image->frame_tick) &&
415 (overlap.modification_tick > image->modification_tick);
416 });
417 if (!overlap_check) {
418 ++deletion_iterator;
419 continue;
420 }
421 }
422 if (!is_bad_overlap && must_download) {
423 if (is_alias) {
424 const bool alias_check =
425 std::ranges::all_of(image->aliased_images, [&](const AliasedImage& alias) {
426 auto& alias_image = slot_images[alias.id];
427 return (alias_image.frame_tick >= image->frame_tick) &&
428 (alias_image.modification_tick > image->modification_tick);
429 });
430 if (!alias_check) {
431 ++deletion_iterator;
432 continue;
433 }
434 }
398 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); 435 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
399 const auto copies = FullDownloadCopies(image->info); 436 const auto copies = FullDownloadCopies(image->info);
400 image->DownloadMemory(map, copies); 437 image->DownloadMemory(map, copies);
@@ -406,10 +443,12 @@ void TextureCache<P>::TickFrame() {
406 } 443 }
407 UnregisterImage(image_id); 444 UnregisterImage(image_id);
408 DeleteImage(image_id); 445 DeleteImage(image_id);
446 if (is_bad_overlap) {
447 num_iterations++;
448 }
409 } 449 }
410 ++deletion_iterator; 450 ++deletion_iterator;
411 } 451 }
412 // Tick sentenced resources in this order to ensure they are destroyed in the right order
413 sentenced_images.Tick(); 452 sentenced_images.Tick();
414 sentenced_framebuffers.Tick(); 453 sentenced_framebuffers.Tick();
415 sentenced_image_view.Tick(); 454 sentenced_image_view.Tick();
@@ -989,6 +1028,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
989 std::vector<ImageId> overlap_ids; 1028 std::vector<ImageId> overlap_ids;
990 std::vector<ImageId> left_aliased_ids; 1029 std::vector<ImageId> left_aliased_ids;
991 std::vector<ImageId> right_aliased_ids; 1030 std::vector<ImageId> right_aliased_ids;
1031 std::vector<ImageId> bad_overlap_ids;
992 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1032 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
993 if (info.type != overlap.info.type) { 1033 if (info.type != overlap.info.type) {
994 return; 1034 return;
@@ -1014,9 +1054,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1014 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 1054 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
1015 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { 1055 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
1016 left_aliased_ids.push_back(overlap_id); 1056 left_aliased_ids.push_back(overlap_id);
1057 overlap.flags |= ImageFlagBits::Alias;
1017 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 1058 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
1018 broken_views, native_bgr)) { 1059 broken_views, native_bgr)) {
1019 right_aliased_ids.push_back(overlap_id); 1060 right_aliased_ids.push_back(overlap_id);
1061 overlap.flags |= ImageFlagBits::Alias;
1062 } else {
1063 bad_overlap_ids.push_back(overlap_id);
1064 overlap.flags |= ImageFlagBits::BadOverlap;
1020 } 1065 }
1021 }); 1066 });
1022 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1067 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
@@ -1044,10 +1089,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1044 for (const ImageId aliased_id : right_aliased_ids) { 1089 for (const ImageId aliased_id : right_aliased_ids) {
1045 ImageBase& aliased = slot_images[aliased_id]; 1090 ImageBase& aliased = slot_images[aliased_id];
1046 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); 1091 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1092 new_image.flags |= ImageFlagBits::Alias;
1047 } 1093 }
1048 for (const ImageId aliased_id : left_aliased_ids) { 1094 for (const ImageId aliased_id : left_aliased_ids) {
1049 ImageBase& aliased = slot_images[aliased_id]; 1095 ImageBase& aliased = slot_images[aliased_id];
1050 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); 1096 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1097 new_image.flags |= ImageFlagBits::Alias;
1098 }
1099 for (const ImageId aliased_id : bad_overlap_ids) {
1100 ImageBase& aliased = slot_images[aliased_id];
1101 aliased.overlapping_images.push_back(new_image_id);
1102 new_image.overlapping_images.push_back(aliased_id);
1103 new_image.flags |= ImageFlagBits::BadOverlap;
1051 } 1104 }
1052 RegisterImage(new_image_id); 1105 RegisterImage(new_image_id);
1053 return new_image_id; 1106 return new_image_id;
@@ -1217,6 +1270,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1217 image.flags |= ImageFlagBits::Registered; 1270 image.flags |= ImageFlagBits::Registered;
1218 ForEachPage(image.cpu_addr, image.guest_size_bytes, 1271 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1219 [this, image_id](u64 page) { page_table[page].push_back(image_id); }); 1272 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1273 total_used_memory +=
1274 Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
1220} 1275}
1221 1276
1222template <class P> 1277template <class P>
@@ -1225,6 +1280,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1225 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), 1280 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1226 "Trying to unregister an already registered image"); 1281 "Trying to unregister an already registered image");
1227 image.flags &= ~ImageFlagBits::Registered; 1282 image.flags &= ~ImageFlagBits::Registered;
1283 image.flags &= ~ImageFlagBits::BadOverlap;
1284 total_used_memory -=
1285 Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024);
1228 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1286 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1229 const auto page_it = page_table.find(page); 1287 const auto page_it = page_table.find(page);
1230 if (page_it == page_table.end()) { 1288 if (page_it == page_table.end()) {
@@ -1298,9 +1356,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
1298 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { 1356 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1299 return other_alias.id == image_id; 1357 return other_alias.id == image_id;
1300 }); 1358 });
1359 other_image.CheckAliasState();
1301 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", 1360 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1302 num_removed_aliases); 1361 num_removed_aliases);
1303 } 1362 }
1363 for (const ImageId overlap_id : image.overlapping_images) {
1364 ImageBase& other_image = slot_images[overlap_id];
1365 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1366 other_image.overlapping_images,
1367 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1368 other_image.CheckBadOverlapState();
1369 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1370 num_removed_overlaps);
1371 }
1304 for (const ImageViewId image_view_id : image_view_ids) { 1372 for (const ImageViewId image_view_id : image_view_ids) {
1305 sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); 1373 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1306 slot_image_views.erase(image_view_id); 1374 slot_image_views.erase(image_view_id);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 0d3e0804f..9680167ee 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
581 581
582 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 582 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
583 const std::span<const u8> src = input.subspan(host_offset); 583 const std::span<const u8> src = input.subspan(host_offset);
584 gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
585
584 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 586 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
585 num_tiles.depth, block.height, block.depth); 587 num_tiles.depth, block.height, block.depth);
586 588