diff options
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 12 | ||||
| -rw-r--r-- | src/video_core/texture_cache/slot_vector.h | 70 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 151 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 2 |
5 files changed, 259 insertions, 13 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 9914926b3..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie | |||
| 113 | image_view_ids.push_back(image_view_id); | 113 | image_view_ids.push_back(image_view_id); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | bool ImageBase::IsSafeDownload() const noexcept { | ||
| 117 | // Skip images that were not modified from the GPU | ||
| 118 | if (False(flags & ImageFlagBits::GpuModified)) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | // Skip images that .are. modified from the CPU | ||
| 122 | // We don't want to write sensitive data from the guest | ||
| 123 | if (True(flags & ImageFlagBits::CpuModified)) { | ||
| 124 | return false; | ||
| 125 | } | ||
| 126 | if (info.num_samples > 1) { | ||
| 127 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | return true; | ||
| 131 | } | ||
| 132 | |||
| 133 | void ImageBase::CheckBadOverlapState() { | ||
| 134 | if (False(flags & ImageFlagBits::BadOverlap)) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | if (!overlapping_images.empty()) { | ||
| 138 | return; | ||
| 139 | } | ||
| 140 | flags &= ~ImageFlagBits::BadOverlap; | ||
| 141 | } | ||
| 142 | |||
| 143 | void ImageBase::CheckAliasState() { | ||
| 144 | if (False(flags & ImageFlagBits::Alias)) { | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | if (!aliased_images.empty()) { | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | flags &= ~ImageFlagBits::Alias; | ||
| 151 | } | ||
| 152 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | 153 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { |
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | 154 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; |
| 118 | ASSERT(lhs.info.type == rhs.info.type); | 155 | ASSERT(lhs.info.type == rhs.info.type); |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index b7f3b7e43..e326cab71 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { | |||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted |
| 26 | Registered = 1 << 6, ///< True when the image is registered | 26 | Registered = 1 << 6, ///< True when the image is registered |
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked |
| 28 | |||
| 29 | // Garbage Collection Flags | ||
| 30 | BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher | ||
| 31 | ///< garbage collection priority | ||
| 32 | Alias = 1 << 9, ///< This image has aliases and has priority on garbage | ||
| 33 | ///< collection | ||
| 28 | }; | 34 | }; |
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 35 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 30 | 36 | ||
| @@ -44,11 +50,16 @@ struct ImageBase { | |||
| 44 | 50 | ||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | 51 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); |
| 46 | 52 | ||
| 53 | [[nodiscard]] bool IsSafeDownload() const noexcept; | ||
| 54 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | 55 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { |
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | 56 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; |
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | 57 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; |
| 50 | } | 58 | } |
| 51 | 59 | ||
| 60 | void CheckBadOverlapState(); | ||
| 61 | void CheckAliasState(); | ||
| 62 | |||
| 52 | ImageInfo info; | 63 | ImageInfo info; |
| 53 | 64 | ||
| 54 | u32 guest_size_bytes = 0; | 65 | u32 guest_size_bytes = 0; |
| @@ -72,6 +83,7 @@ struct ImageBase { | |||
| 72 | std::vector<SubresourceBase> slice_subresources; | 83 | std::vector<SubresourceBase> slice_subresources; |
| 73 | 84 | ||
| 74 | std::vector<AliasedImage> aliased_images; | 85 | std::vector<AliasedImage> aliased_images; |
| 86 | std::vector<ImageId> overlapping_images; | ||
| 75 | }; | 87 | }; |
| 76 | 88 | ||
| 77 | struct ImageAllocBase { | 89 | struct ImageAllocBase { |
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index eae3be6ea..6180b8c0e 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bit> | ||
| 8 | #include <concepts> | 9 | #include <concepts> |
| 9 | #include <numeric> | 10 | #include <numeric> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| @@ -32,6 +33,60 @@ template <class T> | |||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | 33 | requires std::is_nothrow_move_assignable_v<T>&& |
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | 34 | std::is_nothrow_move_constructible_v<T> class SlotVector { |
| 34 | public: | 35 | public: |
| 36 | class Iterator { | ||
| 37 | friend SlotVector<T>; | ||
| 38 | |||
| 39 | public: | ||
| 40 | constexpr Iterator() = default; | ||
| 41 | |||
| 42 | Iterator& operator++() noexcept { | ||
| 43 | const u64* const bitset = slot_vector->stored_bitset.data(); | ||
| 44 | const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; | ||
| 45 | if (id.index < size) { | ||
| 46 | do { | ||
| 47 | ++id.index; | ||
| 48 | } while (id.index < size && !IsValid(bitset)); | ||
| 49 | if (id.index == size) { | ||
| 50 | id.index = SlotId::INVALID_INDEX; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | Iterator operator++(int) noexcept { | ||
| 57 | const Iterator copy{*this}; | ||
| 58 | ++*this; | ||
| 59 | return copy; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool operator==(const Iterator& other) const noexcept { | ||
| 63 | return id.index == other.id.index; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool operator!=(const Iterator& other) const noexcept { | ||
| 67 | return id.index != other.id.index; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::pair<SlotId, T*> operator*() const noexcept { | ||
| 71 | return {id, std::addressof((*slot_vector)[id])}; | ||
| 72 | } | ||
| 73 | |||
| 74 | T* operator->() const noexcept { | ||
| 75 | return std::addressof((*slot_vector)[id]); | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept | ||
| 80 | : slot_vector{slot_vector_}, id{id_} {} | ||
| 81 | |||
| 82 | bool IsValid(const u64* bitset) const noexcept { | ||
| 83 | return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | SlotVector<T>* slot_vector; | ||
| 87 | SlotId id; | ||
| 88 | }; | ||
| 89 | |||
| 35 | ~SlotVector() noexcept { | 90 | ~SlotVector() noexcept { |
| 36 | size_t index = 0; | 91 | size_t index = 0; |
| 37 | for (u64 bits : stored_bitset) { | 92 | for (u64 bits : stored_bitset) { |
| @@ -70,6 +125,20 @@ public: | |||
| 70 | ResetStorageBit(id.index); | 125 | ResetStorageBit(id.index); |
| 71 | } | 126 | } |
| 72 | 127 | ||
| 128 | [[nodiscard]] Iterator begin() noexcept { | ||
| 129 | const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); | ||
| 130 | if (it == stored_bitset.end()) { | ||
| 131 | return end(); | ||
| 132 | } | ||
| 133 | const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin())); | ||
| 134 | const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))}; | ||
| 135 | return Iterator(this, first_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] Iterator end() noexcept { | ||
| 139 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); | ||
| 140 | } | ||
| 141 | |||
| 73 | private: | 142 | private: |
| 74 | struct NonTrivialDummy { | 143 | struct NonTrivialDummy { |
| 75 | NonTrivialDummy() noexcept {} | 144 | NonTrivialDummy() noexcept {} |
| @@ -140,7 +209,6 @@ private: | |||
| 140 | 209 | ||
| 141 | Entry* values = nullptr; | 210 | Entry* values = nullptr; |
| 142 | size_t values_capacity = 0; | 211 | size_t values_capacity = 0; |
| 143 | size_t values_size = 0; | ||
| 144 | 212 | ||
| 145 | std::vector<u64> stored_bitset; | 213 | std::vector<u64> stored_bitset; |
| 146 | std::vector<u32> free_list; | 214 | std::vector<u32> free_list; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 59b7c678b..e7f8478b4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -20,8 +20,10 @@ | |||
| 20 | 20 | ||
| 21 | #include "common/alignment.h" | 21 | #include "common/alignment.h" |
| 22 | #include "common/common_funcs.h" | 22 | #include "common/common_funcs.h" |
| 23 | #include "common/common_sizes.h" | ||
| 23 | #include "common/common_types.h" | 24 | #include "common/common_types.h" |
| 24 | #include "common/logging/log.h" | 25 | #include "common/logging/log.h" |
| 26 | #include "common/settings.h" | ||
| 25 | #include "video_core/compatible_formats.h" | 27 | #include "video_core/compatible_formats.h" |
| 26 | #include "video_core/delayed_destruction_ring.h" | 28 | #include "video_core/delayed_destruction_ring.h" |
| 27 | #include "video_core/dirty_flags.h" | 29 | #include "video_core/dirty_flags.h" |
| @@ -69,12 +71,17 @@ class TextureCache { | |||
| 69 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | 71 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; |
| 70 | /// True when some copies have to be emulated | 72 | /// True when some copies have to be emulated |
| 71 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | 73 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; |
| 74 | /// True when the API can provide info about the memory of the device. | ||
| 75 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||
| 72 | 76 | ||
| 73 | /// Image view ID for null descriptors | 77 | /// Image view ID for null descriptors |
| 74 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | 78 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; |
| 75 | /// Sampler ID for bugged sampler ids | 79 | /// Sampler ID for bugged sampler ids |
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | 80 | static constexpr SamplerId NULL_SAMPLER_ID{0}; |
| 77 | 81 | ||
| 82 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; | ||
| 83 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; | ||
| 84 | |||
| 78 | using Runtime = typename P::Runtime; | 85 | using Runtime = typename P::Runtime; |
| 79 | using Image = typename P::Image; | 86 | using Image = typename P::Image; |
| 80 | using ImageAlloc = typename P::ImageAlloc; | 87 | using ImageAlloc = typename P::ImageAlloc; |
| @@ -103,6 +110,9 @@ public: | |||
| 103 | /// Notify the cache that a new frame has been queued | 110 | /// Notify the cache that a new frame has been queued |
| 104 | void TickFrame(); | 111 | void TickFrame(); |
| 105 | 112 | ||
| 113 | /// Runs the Garbage Collector. | ||
| 114 | void RunGarbageCollector(); | ||
| 115 | |||
| 106 | /// Return a constant reference to the given image view id | 116 | /// Return a constant reference to the given image view id |
| 107 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | 117 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 108 | 118 | ||
| @@ -333,6 +343,10 @@ private: | |||
| 333 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | 343 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; |
| 334 | 344 | ||
| 335 | bool has_deleted_images = false; | 345 | bool has_deleted_images = false; |
| 346 | u64 total_used_memory = 0; | ||
| 347 | u64 minimum_memory; | ||
| 348 | u64 expected_memory; | ||
| 349 | u64 critical_memory; | ||
| 336 | 350 | ||
| 337 | SlotVector<Image> slot_images; | 351 | SlotVector<Image> slot_images; |
| 338 | SlotVector<ImageView> slot_image_views; | 352 | SlotVector<ImageView> slot_image_views; |
| @@ -353,6 +367,7 @@ private: | |||
| 353 | 367 | ||
| 354 | u64 modification_tick = 0; | 368 | u64 modification_tick = 0; |
| 355 | u64 frame_tick = 0; | 369 | u64 frame_tick = 0; |
| 370 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 356 | }; | 371 | }; |
| 357 | 372 | ||
| 358 | template <class P> | 373 | template <class P> |
| @@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 373 | // This way the null resource becomes a compile time constant | 388 | // This way the null resource becomes a compile time constant |
| 374 | void(slot_image_views.insert(runtime, NullImageParams{})); | 389 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 375 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 390 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 391 | |||
| 392 | deletion_iterator = slot_images.begin(); | ||
| 393 | |||
| 394 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||
| 395 | const auto device_memory = runtime.GetDeviceLocalMemory(); | ||
| 396 | const u64 possible_expected_memory = (device_memory * 3) / 10; | ||
| 397 | const u64 possible_critical_memory = (device_memory * 6) / 10; | ||
| 398 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||
| 399 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||
| 400 | minimum_memory = 0; | ||
| 401 | } else { | ||
| 402 | // on OGL we can be more conservatives as the driver takes care. | ||
| 403 | expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB; | ||
| 404 | critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB; | ||
| 405 | minimum_memory = expected_memory; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | template <class P> | ||
| 410 | void TextureCache<P>::RunGarbageCollector() { | ||
| 411 | const bool high_priority_mode = total_used_memory >= expected_memory; | ||
| 412 | const bool aggressive_mode = total_used_memory >= critical_memory; | ||
| 413 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 414 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 415 | for (; num_iterations > 0; --num_iterations) { | ||
| 416 | if (deletion_iterator == slot_images.end()) { | ||
| 417 | deletion_iterator = slot_images.begin(); | ||
| 418 | if (deletion_iterator == slot_images.end()) { | ||
| 419 | break; | ||
| 420 | } | ||
| 421 | } | ||
| 422 | auto [image_id, image_tmp] = *deletion_iterator; | ||
| 423 | Image* image = image_tmp; // fix clang error. | ||
| 424 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | ||
| 425 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 426 | const bool must_download = image->IsSafeDownload(); | ||
| 427 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 428 | const u64 ticks_needed = | ||
| 429 | is_bad_overlap | ||
| 430 | ? ticks_to_destroy >> 4 | ||
| 431 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 432 | should_care |= aggressive_mode; | ||
| 433 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 434 | if (is_bad_overlap) { | ||
| 435 | const bool overlap_check = std::ranges::all_of( | ||
| 436 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 437 | auto& overlap = slot_images[overlap_id]; | ||
| 438 | return overlap.frame_tick >= image->frame_tick; | ||
| 439 | }); | ||
| 440 | if (!overlap_check) { | ||
| 441 | ++deletion_iterator; | ||
| 442 | continue; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | if (!is_bad_overlap && must_download) { | ||
| 446 | const bool alias_check = std::ranges::none_of( | ||
| 447 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 448 | auto& alias_image = slot_images[alias.id]; | ||
| 449 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 450 | (alias_image.modification_tick < image->modification_tick); | ||
| 451 | }); | ||
| 452 | |||
| 453 | if (alias_check) { | ||
| 454 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 455 | const auto copies = FullDownloadCopies(image->info); | ||
| 456 | image->DownloadMemory(map, copies); | ||
| 457 | runtime.Finish(); | ||
| 458 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 462 | UntrackImage(*image); | ||
| 463 | } | ||
| 464 | UnregisterImage(image_id); | ||
| 465 | DeleteImage(image_id); | ||
| 466 | if (is_bad_overlap) { | ||
| 467 | ++num_iterations; | ||
| 468 | } | ||
| 469 | } | ||
| 470 | ++deletion_iterator; | ||
| 471 | } | ||
| 376 | } | 472 | } |
| 377 | 473 | ||
| 378 | template <class P> | 474 | template <class P> |
| 379 | void TextureCache<P>::TickFrame() { | 475 | void TextureCache<P>::TickFrame() { |
| 380 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | 476 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { |
| 477 | RunGarbageCollector(); | ||
| 478 | } | ||
| 381 | sentenced_images.Tick(); | 479 | sentenced_images.Tick(); |
| 382 | sentenced_framebuffers.Tick(); | 480 | sentenced_framebuffers.Tick(); |
| 383 | sentenced_image_view.Tick(); | 481 | sentenced_image_view.Tick(); |
| @@ -568,17 +666,7 @@ template <class P> | |||
| 568 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 666 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 569 | std::vector<ImageId> images; | 667 | std::vector<ImageId> images; |
| 570 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | 668 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { |
| 571 | // Skip images that were not modified from the GPU | 669 | if (!image.IsSafeDownload()) { |
| 572 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 573 | return; | ||
| 574 | } | ||
| 575 | // Skip images that .are. modified from the CPU | ||
| 576 | // We don't want to write sensitive data from the guest | ||
| 577 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 578 | return; | ||
| 579 | } | ||
| 580 | if (image.info.num_samples > 1) { | ||
| 581 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 582 | return; | 670 | return; |
| 583 | } | 671 | } |
| 584 | image.flags &= ~ImageFlagBits::GpuModified; | 672 | image.flags &= ~ImageFlagBits::GpuModified; |
| @@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 967 | std::vector<ImageId> overlap_ids; | 1055 | std::vector<ImageId> overlap_ids; |
| 968 | std::vector<ImageId> left_aliased_ids; | 1056 | std::vector<ImageId> left_aliased_ids; |
| 969 | std::vector<ImageId> right_aliased_ids; | 1057 | std::vector<ImageId> right_aliased_ids; |
| 1058 | std::vector<ImageId> bad_overlap_ids; | ||
| 970 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | 1059 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { |
| 971 | if (info.type != overlap.info.type) { | 1060 | if (info.type != overlap.info.type) { |
| 972 | return; | 1061 | return; |
| @@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 992 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | 1081 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |
| 993 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | 1082 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { |
| 994 | left_aliased_ids.push_back(overlap_id); | 1083 | left_aliased_ids.push_back(overlap_id); |
| 1084 | overlap.flags |= ImageFlagBits::Alias; | ||
| 995 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | 1085 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, |
| 996 | broken_views, native_bgr)) { | 1086 | broken_views, native_bgr)) { |
| 997 | right_aliased_ids.push_back(overlap_id); | 1087 | right_aliased_ids.push_back(overlap_id); |
| 1088 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1089 | } else { | ||
| 1090 | bad_overlap_ids.push_back(overlap_id); | ||
| 1091 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 998 | } | 1092 | } |
| 999 | }); | 1093 | }); |
| 1000 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1094 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| @@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1022 | for (const ImageId aliased_id : right_aliased_ids) { | 1116 | for (const ImageId aliased_id : right_aliased_ids) { |
| 1023 | ImageBase& aliased = slot_images[aliased_id]; | 1117 | ImageBase& aliased = slot_images[aliased_id]; |
| 1024 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | 1118 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); |
| 1119 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1025 | } | 1120 | } |
| 1026 | for (const ImageId aliased_id : left_aliased_ids) { | 1121 | for (const ImageId aliased_id : left_aliased_ids) { |
| 1027 | ImageBase& aliased = slot_images[aliased_id]; | 1122 | ImageBase& aliased = slot_images[aliased_id]; |
| 1028 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | 1123 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); |
| 1124 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1125 | } | ||
| 1126 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 1127 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1128 | aliased.overlapping_images.push_back(new_image_id); | ||
| 1129 | new_image.overlapping_images.push_back(aliased_id); | ||
| 1130 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 1029 | } | 1131 | } |
| 1030 | RegisterImage(new_image_id); | 1132 | RegisterImage(new_image_id); |
| 1031 | return new_image_id; | 1133 | return new_image_id; |
| @@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1195 | image.flags |= ImageFlagBits::Registered; | 1297 | image.flags |= ImageFlagBits::Registered; |
| 1196 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | 1298 | ForEachPage(image.cpu_addr, image.guest_size_bytes, |
| 1197 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | 1299 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); |
| 1300 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1301 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1302 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1303 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1304 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1305 | } | ||
| 1306 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1198 | } | 1307 | } |
| 1199 | 1308 | ||
| 1200 | template <class P> | 1309 | template <class P> |
| @@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1203 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | 1312 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1204 | "Trying to unregister an already registered image"); | 1313 | "Trying to unregister an already registered image"); |
| 1205 | image.flags &= ~ImageFlagBits::Registered; | 1314 | image.flags &= ~ImageFlagBits::Registered; |
| 1315 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1316 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1317 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1318 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1319 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1320 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1321 | } | ||
| 1322 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1206 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | 1323 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { |
| 1207 | const auto page_it = page_table.find(page); | 1324 | const auto page_it = page_table.find(page); |
| 1208 | if (page_it == page_table.end()) { | 1325 | if (page_it == page_table.end()) { |
| @@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { | |||
| 1276 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | 1393 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { |
| 1277 | return other_alias.id == image_id; | 1394 | return other_alias.id == image_id; |
| 1278 | }); | 1395 | }); |
| 1396 | other_image.CheckAliasState(); | ||
| 1279 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | 1397 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", |
| 1280 | num_removed_aliases); | 1398 | num_removed_aliases); |
| 1281 | } | 1399 | } |
| 1400 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1401 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1402 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1403 | other_image.overlapping_images, | ||
| 1404 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1405 | other_image.CheckBadOverlapState(); | ||
| 1406 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1407 | num_removed_overlaps); | ||
| 1408 | } | ||
| 1282 | for (const ImageViewId image_view_id : image_view_ids) { | 1409 | for (const ImageViewId image_view_id : image_view_ids) { |
| 1283 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | 1410 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); |
| 1284 | slot_image_views.erase(image_view_id); | 1411 | slot_image_views.erase(image_view_id); |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 6835fd747..4efe042b6 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 581 | 581 | ||
| 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 583 | const std::span<const u8> src = input.subspan(host_offset); | 583 | const std::span<const u8> src = input.subspan(host_offset); |
| 584 | gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 585 | |||
| 584 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 586 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
| 585 | num_tiles.depth, block.height, block.depth); | 587 | num_tiles.depth, block.height, block.depth); |
| 586 | 588 | ||