diff options
| author | 2021-06-13 15:47:54 +0200 | |
|---|---|---|
| committer | 2021-06-16 21:35:02 +0200 | |
| commit | d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7 (patch) | |
| tree | 21aced60b83b8bc16fb8920a4911b4a2070ddd81 /src/video_core/texture_cache | |
| parent | Initial Reaper Setup (diff) | |
| download | yuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.tar.gz yuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.tar.xz yuzu-d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7.zip | |
Reaper: Tune it up to be an smart GC.
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 10 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 84 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 2 |
4 files changed, 108 insertions, 8 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index bd0e7e64e..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -130,6 +130,26 @@ bool ImageBase::IsSafeDownload() const noexcept { | |||
| 130 | return true; | 130 | return true; |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | void ImageBase::CheckBadOverlapState() { | ||
| 134 | if (False(flags & ImageFlagBits::BadOverlap)) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | if (!overlapping_images.empty()) { | ||
| 138 | return; | ||
| 139 | } | ||
| 140 | flags &= ~ImageFlagBits::BadOverlap; | ||
| 141 | } | ||
| 142 | |||
| 143 | void ImageBase::CheckAliasState() { | ||
| 144 | if (False(flags & ImageFlagBits::Alias)) { | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | if (!aliased_images.empty()) { | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | flags &= ~ImageFlagBits::Alias; | ||
| 151 | } | ||
| 152 | |||
| 133 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | 153 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { |
| 134 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | 154 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; |
| 135 | ASSERT(lhs.info.type == rhs.info.type); | 155 | ASSERT(lhs.info.type == rhs.info.type); |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0f69d8a32..40c047ea1 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { | |||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted |
| 26 | Registered = 1 << 6, ///< True when the image is registered | 26 | Registered = 1 << 6, ///< True when the image is registered |
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked |
| 28 | |||
| 29 | // Garbage Collection Flags | ||
| 30 | BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher | ||
| 31 | ///< garbage collection priority | ||
| 32 | Alias = 1 << 9, ///< This image has aliases and has priority on garbage | ||
| 33 | ///< collection | ||
| 28 | }; | 34 | }; |
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 35 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 30 | 36 | ||
| @@ -51,6 +57,9 @@ struct ImageBase { | |||
| 51 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | 57 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; |
| 52 | } | 58 | } |
| 53 | 59 | ||
| 60 | void CheckBadOverlapState(); | ||
| 61 | void CheckAliasState(); | ||
| 62 | |||
| 54 | ImageInfo info; | 63 | ImageInfo info; |
| 55 | 64 | ||
| 56 | u32 guest_size_bytes = 0; | 65 | u32 guest_size_bytes = 0; |
| @@ -74,6 +83,7 @@ struct ImageBase { | |||
| 74 | std::vector<SubresourceBase> slice_subresources; | 83 | std::vector<SubresourceBase> slice_subresources; |
| 75 | 84 | ||
| 76 | std::vector<AliasedImage> aliased_images; | 85 | std::vector<AliasedImage> aliased_images; |
| 86 | std::vector<ImageId> overlapping_images; | ||
| 77 | }; | 87 | }; |
| 78 | 88 | ||
| 79 | struct ImageAllocBase { | 89 | struct ImageAllocBase { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 45ef155b5..cf48f7b02 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -75,6 +75,9 @@ class TextureCache { | |||
| 75 | /// Sampler ID for bugged sampler ids | 75 | /// Sampler ID for bugged sampler ids |
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; |
| 77 | 77 | ||
| 78 | static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL; | ||
| 79 | static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL; | ||
| 80 | |||
| 78 | using Runtime = typename P::Runtime; | 81 | using Runtime = typename P::Runtime; |
| 79 | using Image = typename P::Image; | 82 | using Image = typename P::Image; |
| 80 | using ImageAlloc = typename P::ImageAlloc; | 83 | using ImageAlloc = typename P::ImageAlloc; |
| @@ -333,6 +336,7 @@ private: | |||
| 333 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | 336 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; |
| 334 | 337 | ||
| 335 | bool has_deleted_images = false; | 338 | bool has_deleted_images = false; |
| 339 | u64 total_used_memory = 0; | ||
| 336 | 340 | ||
| 337 | SlotVector<Image> slot_images; | 341 | SlotVector<Image> slot_images; |
| 338 | SlotVector<ImageView> slot_image_views; | 342 | SlotVector<ImageView> slot_image_views; |
| @@ -380,8 +384,10 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 380 | 384 | ||
| 381 | template <class P> | 385 | template <class P> |
| 382 | void TextureCache<P>::TickFrame() { | 386 | void TextureCache<P>::TickFrame() { |
| 383 | static constexpr u64 ticks_to_destroy = 120; | 387 | const bool high_priority_mode = total_used_memory >= expected_memory; |
| 384 | int num_iterations = 32; | 388 | const bool aggressive_mode = total_used_memory >= critical_memory; |
| 389 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 390 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 385 | for (; num_iterations > 0; --num_iterations) { | 391 | for (; num_iterations > 0; --num_iterations) { |
| 386 | if (deletion_iterator == slot_images.end()) { | 392 | if (deletion_iterator == slot_images.end()) { |
| 387 | deletion_iterator = slot_images.begin(); | 393 | deletion_iterator = slot_images.begin(); |
| @@ -390,11 +396,42 @@ void TextureCache<P>::TickFrame() { | |||
| 390 | } | 396 | } |
| 391 | } | 397 | } |
| 392 | const auto [image_id, image] = *deletion_iterator; | 398 | const auto [image_id, image] = *deletion_iterator; |
| 393 | if (image->frame_tick + ticks_to_destroy < frame_tick) { | 399 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); |
| 394 | if (image->IsSafeDownload() && | 400 | if (is_alias && image->aliased_images.size() <= 1) { |
| 395 | std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { | 401 | ++deletion_iterator; |
| 396 | return slot_images[alias.id].modification_tick > image->modification_tick; | 402 | continue; |
| 397 | })) { | 403 | } |
| 404 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 405 | const bool must_download = image->IsSafeDownload(); | ||
| 406 | const u64 ticks_needed = is_bad_overlap ? ticks_to_destroy >> 4 : ticks_to_destroy; | ||
| 407 | const bool should_care = | ||
| 408 | aggressive_mode || is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 409 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 410 | if (is_bad_overlap) { | ||
| 411 | const bool overlap_check = | ||
| 412 | std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) { | ||
| 413 | auto& overlap = slot_images[overlap_id]; | ||
| 414 | return (overlap.frame_tick >= image->frame_tick) && | ||
| 415 | (overlap.modification_tick > image->modification_tick); | ||
| 416 | }); | ||
| 417 | if (!overlap_check) { | ||
| 418 | ++deletion_iterator; | ||
| 419 | continue; | ||
| 420 | } | ||
| 421 | } | ||
| 422 | if (!is_bad_overlap && must_download) { | ||
| 423 | if (is_alias) { | ||
| 424 | const bool alias_check = | ||
| 425 | std::ranges::all_of(image->aliased_images, [&](const AliasedImage& alias) { | ||
| 426 | auto& alias_image = slot_images[alias.id]; | ||
| 427 | return (alias_image.frame_tick >= image->frame_tick) && | ||
| 428 | (alias_image.modification_tick > image->modification_tick); | ||
| 429 | }); | ||
| 430 | if (!alias_check) { | ||
| 431 | ++deletion_iterator; | ||
| 432 | continue; | ||
| 433 | } | ||
| 434 | } | ||
| 398 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | 435 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); |
| 399 | const auto copies = FullDownloadCopies(image->info); | 436 | const auto copies = FullDownloadCopies(image->info); |
| 400 | image->DownloadMemory(map, copies); | 437 | image->DownloadMemory(map, copies); |
| @@ -406,10 +443,12 @@ void TextureCache<P>::TickFrame() { | |||
| 406 | } | 443 | } |
| 407 | UnregisterImage(image_id); | 444 | UnregisterImage(image_id); |
| 408 | DeleteImage(image_id); | 445 | DeleteImage(image_id); |
| 446 | if (is_bad_overlap) { | ||
| 447 | num_iterations++; | ||
| 448 | } | ||
| 409 | } | 449 | } |
| 410 | ++deletion_iterator; | 450 | ++deletion_iterator; |
| 411 | } | 451 | } |
| 412 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | ||
| 413 | sentenced_images.Tick(); | 452 | sentenced_images.Tick(); |
| 414 | sentenced_framebuffers.Tick(); | 453 | sentenced_framebuffers.Tick(); |
| 415 | sentenced_image_view.Tick(); | 454 | sentenced_image_view.Tick(); |
| @@ -989,6 +1028,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 989 | std::vector<ImageId> overlap_ids; | 1028 | std::vector<ImageId> overlap_ids; |
| 990 | std::vector<ImageId> left_aliased_ids; | 1029 | std::vector<ImageId> left_aliased_ids; |
| 991 | std::vector<ImageId> right_aliased_ids; | 1030 | std::vector<ImageId> right_aliased_ids; |
| 1031 | std::vector<ImageId> bad_overlap_ids; | ||
| 992 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | 1032 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { |
| 993 | if (info.type != overlap.info.type) { | 1033 | if (info.type != overlap.info.type) { |
| 994 | return; | 1034 | return; |
| @@ -1014,9 +1054,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1014 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | 1054 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |
| 1015 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | 1055 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { |
| 1016 | left_aliased_ids.push_back(overlap_id); | 1056 | left_aliased_ids.push_back(overlap_id); |
| 1057 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1017 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | 1058 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, |
| 1018 | broken_views, native_bgr)) { | 1059 | broken_views, native_bgr)) { |
| 1019 | right_aliased_ids.push_back(overlap_id); | 1060 | right_aliased_ids.push_back(overlap_id); |
| 1061 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1062 | } else { | ||
| 1063 | bad_overlap_ids.push_back(overlap_id); | ||
| 1064 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 1020 | } | 1065 | } |
| 1021 | }); | 1066 | }); |
| 1022 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1067 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| @@ -1044,10 +1089,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1044 | for (const ImageId aliased_id : right_aliased_ids) { | 1089 | for (const ImageId aliased_id : right_aliased_ids) { |
| 1045 | ImageBase& aliased = slot_images[aliased_id]; | 1090 | ImageBase& aliased = slot_images[aliased_id]; |
| 1046 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | 1091 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); |
| 1092 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1047 | } | 1093 | } |
| 1048 | for (const ImageId aliased_id : left_aliased_ids) { | 1094 | for (const ImageId aliased_id : left_aliased_ids) { |
| 1049 | ImageBase& aliased = slot_images[aliased_id]; | 1095 | ImageBase& aliased = slot_images[aliased_id]; |
| 1050 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | 1096 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); |
| 1097 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1098 | } | ||
| 1099 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 1100 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1101 | aliased.overlapping_images.push_back(new_image_id); | ||
| 1102 | new_image.overlapping_images.push_back(aliased_id); | ||
| 1103 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 1051 | } | 1104 | } |
| 1052 | RegisterImage(new_image_id); | 1105 | RegisterImage(new_image_id); |
| 1053 | return new_image_id; | 1106 | return new_image_id; |
| @@ -1217,6 +1270,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1217 | image.flags |= ImageFlagBits::Registered; | 1270 | image.flags |= ImageFlagBits::Registered; |
| 1218 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | 1271 | ForEachPage(image.cpu_addr, image.guest_size_bytes, |
| 1219 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | 1272 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); |
| 1273 | total_used_memory += | ||
| 1274 | Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); | ||
| 1220 | } | 1275 | } |
| 1221 | 1276 | ||
| 1222 | template <class P> | 1277 | template <class P> |
| @@ -1225,6 +1280,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1225 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | 1280 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1226 | "Trying to unregister an already registered image"); | 1281 | "Trying to unregister an already registered image"); |
| 1227 | image.flags &= ~ImageFlagBits::Registered; | 1282 | image.flags &= ~ImageFlagBits::Registered; |
| 1283 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1284 | total_used_memory -= | ||
| 1285 | Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); | ||
| 1228 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | 1286 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { |
| 1229 | const auto page_it = page_table.find(page); | 1287 | const auto page_it = page_table.find(page); |
| 1230 | if (page_it == page_table.end()) { | 1288 | if (page_it == page_table.end()) { |
| @@ -1298,9 +1356,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { | |||
| 1298 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | 1356 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { |
| 1299 | return other_alias.id == image_id; | 1357 | return other_alias.id == image_id; |
| 1300 | }); | 1358 | }); |
| 1359 | other_image.CheckAliasState(); | ||
| 1301 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | 1360 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", |
| 1302 | num_removed_aliases); | 1361 | num_removed_aliases); |
| 1303 | } | 1362 | } |
| 1363 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1364 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1365 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1366 | other_image.overlapping_images, | ||
| 1367 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1368 | other_image.CheckBadOverlapState(); | ||
| 1369 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1370 | num_removed_overlaps); | ||
| 1371 | } | ||
| 1304 | for (const ImageViewId image_view_id : image_view_ids) { | 1372 | for (const ImageViewId image_view_id : image_view_ids) { |
| 1305 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | 1373 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); |
| 1306 | slot_image_views.erase(image_view_id); | 1374 | slot_image_views.erase(image_view_id); |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0d3e0804f..9680167ee 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 581 | 581 | ||
| 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 583 | const std::span<const u8> src = input.subspan(host_offset); | 583 | const std::span<const u8> src = input.subspan(host_offset); |
| 584 | gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 585 | |||
| 584 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 586 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
| 585 | num_tiles.depth, block.height, block.depth); | 587 | num_tiles.depth, block.height, block.depth); |
| 586 | 588 | ||