diff options
| -rw-r--r-- | src/video_core/memory_manager.cpp | 105 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 20 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 310 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 2 |
6 files changed, 342 insertions, 131 deletions
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3589c72ea..e66af4443 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -69,11 +69,17 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 69 | } else { | 69 | } else { |
| 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); | 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); |
| 71 | } | 71 | } |
| 72 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | ||
| 73 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 74 | ASSERT(cpu_addr); | ||
| 75 | 72 | ||
| 76 | rasterizer->UnmapMemory(*cpu_addr, size); | 73 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); |
| 74 | |||
| 75 | for (const auto& map : submapped_ranges) { | ||
| 76 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | ||
| 77 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); | ||
| 78 | ASSERT(cpu_addr); | ||
| 79 | |||
| 80 | rasterizer->UnmapMemory(*cpu_addr, map.second); | ||
| 81 | } | ||
| 82 | |||
| 77 | 83 | ||
| 78 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); | 84 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); |
| 79 | } | 85 | } |
| @@ -128,7 +134,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s | |||
| 128 | //// Lock the new page | 134 | //// Lock the new page |
| 129 | // TryLockPage(page_entry, size); | 135 | // TryLockPage(page_entry, size); |
| 130 | auto& current_page = page_table[PageEntryIndex(gpu_addr)]; | 136 | auto& current_page = page_table[PageEntryIndex(gpu_addr)]; |
| 131 | if (current_page.IsValid() != page_entry.IsValid() || | 137 | |
| 138 | if ((!current_page.IsValid() && page_entry.IsValid()) || | ||
| 132 | current_page.ToAddress() != page_entry.ToAddress()) { | 139 | current_page.ToAddress() != page_entry.ToAddress()) { |
| 133 | rasterizer->ModifyGPUMemory(gpu_addr, size); | 140 | rasterizer->ModifyGPUMemory(gpu_addr, size); |
| 134 | } | 141 | } |
| @@ -179,6 +186,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | |||
| 179 | return page_entry.ToAddress() + (gpu_addr & page_mask); | 186 | return page_entry.ToAddress() + (gpu_addr & page_mask); |
| 180 | } | 187 | } |
| 181 | 188 | ||
| 189 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||
| 190 | size_t page_index{addr >> page_bits}; | ||
| 191 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||
| 192 | while (page_index < page_last) { | ||
| 193 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 194 | if (page_addr && *page_addr != 0) { | ||
| 195 | return page_addr; | ||
| 196 | } | ||
| 197 | ++page_index; | ||
| 198 | } | ||
| 199 | return std::nullopt; | ||
| 200 | } | ||
| 201 | |||
| 182 | template <typename T> | 202 | template <typename T> |
| 183 | T MemoryManager::Read(GPUVAddr addr) const { | 203 | T MemoryManager::Read(GPUVAddr addr) const { |
| 184 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { | 204 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { |
| @@ -375,4 +395,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| 375 | return page <= Core::Memory::PAGE_SIZE; | 395 | return page <= Core::Memory::PAGE_SIZE; |
| 376 | } | 396 | } |
| 377 | 397 | ||
| 398 | bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { | ||
| 399 | size_t page_index{gpu_addr >> page_bits}; | ||
| 400 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||
| 401 | std::optional<VAddr> old_page_addr{}; | ||
| 402 | while (page_index != page_last) { | ||
| 403 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 404 | if (!page_addr || *page_addr == 0) { | ||
| 405 | return false; | ||
| 406 | } | ||
| 407 | if (old_page_addr) { | ||
| 408 | if (*old_page_addr + page_size != *page_addr) { | ||
| 409 | return false; | ||
| 410 | } | ||
| 411 | } | ||
| 412 | old_page_addr = page_addr; | ||
| 413 | ++page_index; | ||
| 414 | } | ||
| 415 | return true; | ||
| 416 | } | ||
| 417 | |||
| 418 | bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { | ||
| 419 | size_t page_index{gpu_addr >> page_bits}; | ||
| 420 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||
| 421 | while (page_index < page_last) { | ||
| 422 | if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { | ||
| 423 | return false; | ||
| 424 | } | ||
| 425 | ++page_index; | ||
| 426 | } | ||
| 427 | return true; | ||
| 428 | } | ||
| 429 | |||
| 430 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||
| 431 | GPUVAddr gpu_addr, std::size_t size) const { | ||
| 432 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | ||
| 433 | size_t page_index{gpu_addr >> page_bits}; | ||
| 434 | size_t remaining_size{size}; | ||
| 435 | size_t page_offset{gpu_addr & page_mask}; | ||
| 436 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | ||
| 437 | std::optional<VAddr> old_page_addr{}; | ||
| 438 | const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { | ||
| 439 | if (!last_segment) { | ||
| 440 | GPUVAddr new_base_addr = page_index << page_bits; | ||
| 441 | last_segment = {new_base_addr, bytes}; | ||
| 442 | } else { | ||
| 443 | last_segment->second += bytes; | ||
| 444 | } | ||
| 445 | }; | ||
| 446 | const auto split = [this, &last_segment, &result] { | ||
| 447 | if (last_segment) { | ||
| 448 | result.push_back(*last_segment); | ||
| 449 | last_segment = std::nullopt; | ||
| 450 | } | ||
| 451 | }; | ||
| 452 | while (remaining_size > 0) { | ||
| 453 | const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; | ||
| 454 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 455 | if (!page_addr) { | ||
| 456 | split(); | ||
| 457 | } else if (old_page_addr) { | ||
| 458 | if (*old_page_addr + page_size != *page_addr) { | ||
| 459 | split(); | ||
| 460 | } | ||
| 461 | extend_size(num_bytes); | ||
| 462 | } else { | ||
| 463 | extend_size(num_bytes); | ||
| 464 | } | ||
| 465 | ++page_index; | ||
| 466 | page_offset = 0; | ||
| 467 | remaining_size -= num_bytes; | ||
| 468 | } | ||
| 469 | split(); | ||
| 470 | return result; | ||
| 471 | } | ||
| 472 | |||
| 378 | } // namespace Tegra | 473 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503..305c4b1f1 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -76,6 +76,8 @@ public: | |||
| 76 | 76 | ||
| 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 78 | 78 | ||
| 79 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | ||
| 80 | |||
| 79 | template <typename T> | 81 | template <typename T> |
| 80 | [[nodiscard]] T Read(GPUVAddr addr) const; | 82 | [[nodiscard]] T Read(GPUVAddr addr) const; |
| 81 | 83 | ||
| @@ -116,6 +118,24 @@ public: | |||
| 116 | */ | 118 | */ |
| 117 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | 119 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 118 | 120 | ||
| 121 | /** | ||
| 122 | * IsContinousRange checks if a gpu region is mapped by a single range of cpu addresses. | ||
| 123 | */ | ||
| 124 | [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; | ||
| 125 | |||
| 126 | /** | ||
| 127 | * IsFullyMappedRange checks if a gpu region is mapped entirely. | ||
| 128 | */ | ||
| 129 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | ||
| 130 | |||
| 131 | /** | ||
| 132 | * GetSubmappedRange returns a vector with all the subranges of cpu addresses mapped beneath. | ||
| 133 | * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector | ||
| 134 | * will be returned; | ||
| 135 | */ | ||
| 136 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | ||
| 137 | std::size_t size) const; | ||
| 138 | |||
| 119 | [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); | 139 | [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); |
| 120 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); | 140 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); |
| 121 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); | 141 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 004ec23e4..fd14a3980 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -25,11 +25,12 @@ enum class ImageFlagBits : u32 { | |||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted |
| 26 | Registered = 1 << 6, ///< True when the image is registered | 26 | Registered = 1 << 6, ///< True when the image is registered |
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked |
| 28 | Remapped = 1 << 8, ///< Image has been remapped. | ||
| 28 | 29 | ||
| 29 | // Garbage Collection Flags | 30 | // Garbage Collection Flags |
| 30 | BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher | 31 | BadOverlap = 1 << 9, ///< This image overlaps other but doesn't fit, has higher |
| 31 | ///< garbage collection priority | 32 | ///< garbage collection priority |
| 32 | Alias = 1 << 9, ///< This image has aliases and has priority on garbage | 33 | Alias = 1 << 10, ///< This image has aliases and has priority on garbage |
| 33 | ///< collection | 34 | ///< collection |
| 34 | }; | 35 | }; |
| 35 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 36 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9f6410d58..1704fc48c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <span> | 13 | #include <span> |
| 14 | #include <type_traits> | 14 | #include <type_traits> |
| 15 | #include <unordered_map> | 15 | #include <unordered_map> |
| 16 | #include <unordered_set> | ||
| 16 | #include <utility> | 17 | #include <utility> |
| 17 | #include <vector> | 18 | #include <vector> |
| 18 | 19 | ||
| @@ -155,6 +156,9 @@ public: | |||
| 155 | /// Remove images in a region | 156 | /// Remove images in a region |
| 156 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | 157 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); |
| 157 | 158 | ||
| 159 | /// Used when GPU memory changes layout on sparse textures. | ||
| 160 | // void CheckRemaps(); | ||
| 161 | |||
| 158 | /// Blit an image with the given parameters | 162 | /// Blit an image with the given parameters |
| 159 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 163 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 160 | const Tegra::Engines::Fermi2D::Surface& src, | 164 | const Tegra::Engines::Fermi2D::Surface& src, |
| @@ -238,7 +242,7 @@ private: | |||
| 238 | FramebufferId GetFramebufferId(const RenderTargets& key); | 242 | FramebufferId GetFramebufferId(const RenderTargets& key); |
| 239 | 243 | ||
| 240 | /// Refresh the contents (pixel data) of an image | 244 | /// Refresh the contents (pixel data) of an image |
| 241 | void RefreshContents(Image& image); | 245 | void RefreshContents(Image& image, ImageId image_id); |
| 242 | 246 | ||
| 243 | /// Upload data from guest to an image | 247 | /// Upload data from guest to an image |
| 244 | template <typename StagingBuffer> | 248 | template <typename StagingBuffer> |
| @@ -290,6 +294,9 @@ private: | |||
| 290 | template <typename Func> | 294 | template <typename Func> |
| 291 | void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); | 295 | void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); |
| 292 | 296 | ||
| 297 | template <typename Func> | ||
| 298 | void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | ||
| 299 | |||
| 293 | /// Iterates over all the images in a region calling func | 300 | /// Iterates over all the images in a region calling func |
| 294 | template <typename Func> | 301 | template <typename Func> |
| 295 | void ForEachSparseSegment(ImageBase& image, Func&& func); | 302 | void ForEachSparseSegment(ImageBase& image, Func&& func); |
| @@ -304,10 +311,10 @@ private: | |||
| 304 | void UnregisterImage(ImageId image); | 311 | void UnregisterImage(ImageId image); |
| 305 | 312 | ||
| 306 | /// Track CPU reads and writes for image | 313 | /// Track CPU reads and writes for image |
| 307 | void TrackImage(ImageBase& image); | 314 | void TrackImage(ImageBase& image, ImageId image_id); |
| 308 | 315 | ||
| 309 | /// Stop tracking CPU reads and writes for image | 316 | /// Stop tracking CPU reads and writes for image |
| 310 | void UntrackImage(ImageBase& image); | 317 | void UntrackImage(ImageBase& image, ImageId image_id); |
| 311 | 318 | ||
| 312 | /// Delete image from the cache | 319 | /// Delete image from the cache |
| 313 | void DeleteImage(ImageId image); | 320 | void DeleteImage(ImageId image); |
| @@ -367,6 +374,11 @@ private: | |||
| 367 | 374 | ||
| 368 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; | 375 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; |
| 369 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | 376 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; |
| 377 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; | ||
| 378 | |||
| 379 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | ||
| 380 | |||
| 381 | VAddr virtual_invalid_space{}; | ||
| 370 | 382 | ||
| 371 | bool has_deleted_images = false; | 383 | bool has_deleted_images = false; |
| 372 | u64 total_used_memory = 0; | 384 | u64 total_used_memory = 0; |
| @@ -685,7 +697,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 685 | return; | 697 | return; |
| 686 | } | 698 | } |
| 687 | image.flags |= ImageFlagBits::CpuModified; | 699 | image.flags |= ImageFlagBits::CpuModified; |
| 688 | UntrackImage(image); | 700 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 701 | UntrackImage(image, image_id); | ||
| 702 | } | ||
| 689 | }); | 703 | }); |
| 690 | } | 704 | } |
| 691 | 705 | ||
| @@ -722,7 +736,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 722 | for (const ImageId id : deleted_images) { | 736 | for (const ImageId id : deleted_images) { |
| 723 | Image& image = slot_images[id]; | 737 | Image& image = slot_images[id]; |
| 724 | if (True(image.flags & ImageFlagBits::Tracked)) { | 738 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 725 | UntrackImage(image); | 739 | UntrackImage(image, id); |
| 726 | } | 740 | } |
| 727 | UnregisterImage(id); | 741 | UnregisterImage(id); |
| 728 | DeleteImage(id); | 742 | DeleteImage(id); |
| @@ -736,11 +750,13 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | |||
| 736 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 750 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 737 | for (const ImageId id : deleted_images) { | 751 | for (const ImageId id : deleted_images) { |
| 738 | Image& image = slot_images[id]; | 752 | Image& image = slot_images[id]; |
| 753 | if (True(image.flags & ImageFlagBits::Remapped)) { | ||
| 754 | continue; | ||
| 755 | } | ||
| 756 | image.flags |= ImageFlagBits::Remapped; | ||
| 739 | if (True(image.flags & ImageFlagBits::Tracked)) { | 757 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 740 | UntrackImage(image); | 758 | UntrackImage(image, id); |
| 741 | } | 759 | } |
| 742 | UnregisterImage(id); | ||
| 743 | DeleteImage(id); | ||
| 744 | } | 760 | } |
| 745 | } | 761 | } |
| 746 | 762 | ||
| @@ -958,13 +974,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 958 | } | 974 | } |
| 959 | 975 | ||
| 960 | template <class P> | 976 | template <class P> |
| 961 | void TextureCache<P>::RefreshContents(Image& image) { | 977 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 962 | if (False(image.flags & ImageFlagBits::CpuModified)) { | 978 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 963 | // Only upload modified images | 979 | // Only upload modified images |
| 964 | return; | 980 | return; |
| 965 | } | 981 | } |
| 966 | image.flags &= ~ImageFlagBits::CpuModified; | 982 | image.flags &= ~ImageFlagBits::CpuModified; |
| 967 | TrackImage(image); | 983 | TrackImage(image, image_id); |
| 968 | 984 | ||
| 969 | if (image.info.num_samples > 1) { | 985 | if (image.info.num_samples > 1) { |
| 970 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 986 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| @@ -1043,14 +1059,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| 1043 | template <class P> | 1059 | template <class P> |
| 1044 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1060 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1045 | RelaxedOptions options) { | 1061 | RelaxedOptions options) { |
| 1046 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1062 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1047 | if (!cpu_addr) { | 1063 | if (!cpu_addr) { |
| 1048 | return ImageId{}; | 1064 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1065 | if (!cpu_addr) { | ||
| 1066 | return ImageId{}; | ||
| 1067 | } | ||
| 1049 | } | 1068 | } |
| 1050 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1069 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1051 | const bool native_bgr = runtime.HasNativeBgr(); | 1070 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1052 | ImageId image_id; | 1071 | ImageId image_id; |
| 1053 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1072 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1073 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 1074 | return false; | ||
| 1075 | } | ||
| 1054 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | 1076 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |
| 1055 | const bool strict_size = False(options & RelaxedOptions::Size) && | 1077 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 1056 | True(existing_image.flags & ImageFlagBits::Strong); | 1078 | True(existing_image.flags & ImageFlagBits::Strong); |
| @@ -1069,14 +1091,23 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1069 | } | 1091 | } |
| 1070 | return false; | 1092 | return false; |
| 1071 | }; | 1093 | }; |
| 1072 | ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda); | 1094 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); |
| 1073 | return image_id; | 1095 | return image_id; |
| 1074 | } | 1096 | } |
| 1075 | 1097 | ||
| 1076 | template <class P> | 1098 | template <class P> |
| 1077 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1099 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1078 | RelaxedOptions options) { | 1100 | RelaxedOptions options) { |
| 1079 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1101 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1102 | if (!cpu_addr) { | ||
| 1103 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 1104 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 1105 | if (!cpu_addr) { | ||
| 1106 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 1107 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 1108 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 1109 | } | ||
| 1110 | } | ||
| 1080 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | 1111 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
| 1081 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | 1112 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); |
| 1082 | const Image& image = slot_images[image_id]; | 1113 | const Image& image = slot_images[image_id]; |
| @@ -1096,10 +1127,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1096 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1127 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1097 | const bool native_bgr = runtime.HasNativeBgr(); | 1128 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1098 | std::vector<ImageId> overlap_ids; | 1129 | std::vector<ImageId> overlap_ids; |
| 1130 | std::unordered_set<ImageId> overlaps_found; | ||
| 1099 | std::vector<ImageId> left_aliased_ids; | 1131 | std::vector<ImageId> left_aliased_ids; |
| 1100 | std::vector<ImageId> right_aliased_ids; | 1132 | std::vector<ImageId> right_aliased_ids; |
| 1133 | std::unordered_set<ImageId> ignore_textures; | ||
| 1101 | std::vector<ImageId> bad_overlap_ids; | 1134 | std::vector<ImageId> bad_overlap_ids; |
| 1102 | ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | 1135 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { |
| 1136 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 1137 | ignore_textures.insert(overlap_id); | ||
| 1138 | return; | ||
| 1139 | } | ||
| 1103 | if (info.type == ImageType::Linear) { | 1140 | if (info.type == ImageType::Linear) { |
| 1104 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | 1141 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 1105 | // Alias linear images with the same pitch | 1142 | // Alias linear images with the same pitch |
| @@ -1107,6 +1144,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1107 | } | 1144 | } |
| 1108 | return; | 1145 | return; |
| 1109 | } | 1146 | } |
| 1147 | overlaps_found.insert(overlap_id); | ||
| 1110 | static constexpr bool strict_size = true; | 1148 | static constexpr bool strict_size = true; |
| 1111 | const std::optional<OverlapResult> solution = ResolveOverlap( | 1149 | const std::optional<OverlapResult> solution = ResolveOverlap( |
| 1112 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | 1150 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); |
| @@ -1130,30 +1168,34 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1130 | bad_overlap_ids.push_back(overlap_id); | 1168 | bad_overlap_ids.push_back(overlap_id); |
| 1131 | overlap.flags |= ImageFlagBits::BadOverlap; | 1169 | overlap.flags |= ImageFlagBits::BadOverlap; |
| 1132 | } | 1170 | } |
| 1133 | }); | 1171 | }; |
| 1172 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 1173 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 1174 | if (!overlaps_found.contains(overlap_id)) { | ||
| 1175 | ignore_textures.insert(overlap_id); | ||
| 1176 | } | ||
| 1177 | }; | ||
| 1178 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 1134 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1179 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| 1135 | Image& new_image = slot_images[new_image_id]; | 1180 | Image& new_image = slot_images[new_image_id]; |
| 1136 | 1181 | ||
| 1137 | new_image.is_sparse = false; | 1182 | new_image.is_sparse = |
| 1138 | if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) { | 1183 | !gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes); |
| 1139 | const LevelArray offsets = CalculateMipLevelOffsets(new_image.info); | 1184 | |
| 1140 | size_t level; | 1185 | for (const ImageId overlap_id : ignore_textures) { |
| 1141 | const size_t levels = static_cast<size_t>(new_image.info.resources.levels); | 1186 | Image& overlap = slot_images[overlap_id]; |
| 1142 | VAddr n_cpu_addr = new_image.cpu_addr; | 1187 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { |
| 1143 | GPUVAddr n_gpu_addr = new_image.gpu_addr; | 1188 | UNIMPLEMENTED(); |
| 1144 | for (level = 0; level < levels; level++) { | 1189 | } |
| 1145 | n_gpu_addr += offsets[level]; | 1190 | if (True(overlap.flags & ImageFlagBits::Tracked)) { |
| 1146 | n_cpu_addr += offsets[level]; | 1191 | UntrackImage(overlap, overlap_id); |
| 1147 | std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr); | ||
| 1148 | if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) { | ||
| 1149 | new_image.is_sparse = true; | ||
| 1150 | break; | ||
| 1151 | } | ||
| 1152 | } | 1192 | } |
| 1193 | UnregisterImage(overlap_id); | ||
| 1194 | DeleteImage(overlap_id); | ||
| 1153 | } | 1195 | } |
| 1154 | 1196 | ||
| 1155 | // TODO: Only upload what we need | 1197 | // TODO: Only upload what we need |
| 1156 | RefreshContents(new_image); | 1198 | RefreshContents(new_image, new_image_id); |
| 1157 | 1199 | ||
| 1158 | for (const ImageId overlap_id : overlap_ids) { | 1200 | for (const ImageId overlap_id : overlap_ids) { |
| 1159 | Image& overlap = slot_images[overlap_id]; | 1201 | Image& overlap = slot_images[overlap_id]; |
| @@ -1165,7 +1207,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1165 | runtime.CopyImage(new_image, overlap, copies); | 1207 | runtime.CopyImage(new_image, overlap, copies); |
| 1166 | } | 1208 | } |
| 1167 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | 1209 | if (True(overlap.flags & ImageFlagBits::Tracked)) { |
| 1168 | UntrackImage(overlap); | 1210 | UntrackImage(overlap, overlap_id); |
| 1169 | } | 1211 | } |
| 1170 | UnregisterImage(overlap_id); | 1212 | UnregisterImage(overlap_id); |
| 1171 | DeleteImage(overlap_id); | 1213 | DeleteImage(overlap_id); |
| @@ -1390,25 +1432,64 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu | |||
| 1390 | 1432 | ||
| 1391 | template <class P> | 1433 | template <class P> |
| 1392 | template <typename Func> | 1434 | template <typename Func> |
| 1393 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | 1435 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { |
| 1394 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | 1436 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |
| 1395 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1437 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1396 | GPUVAddr gpu_addr = image.gpu_addr; | 1438 | boost::container::small_vector<ImageId, 8> images; |
| 1397 | const size_t levels = image.info.resources.levels; | 1439 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { |
| 1398 | const auto mipmap_sizes = CalculateMipLevelSizes(image.info); | 1440 | const auto it = sparse_page_table.find(page); |
| 1399 | for (size_t level = 0; level < levels; level++) { | 1441 | if (it == sparse_page_table.end()) { |
| 1400 | const size_t size = mipmap_sizes[level]; | 1442 | if constexpr (BOOL_BREAK) { |
| 1401 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1443 | return false; |
| 1402 | if (cpu_addr && *cpu_addr != 0) { | 1444 | } else { |
| 1445 | return; | ||
| 1446 | } | ||
| 1447 | } | ||
| 1448 | for (const ImageId image_id : it->second) { | ||
| 1449 | Image& image = slot_images[image_id]; | ||
| 1450 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1451 | continue; | ||
| 1452 | } | ||
| 1453 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1454 | continue; | ||
| 1455 | } | ||
| 1456 | image.flags |= ImageFlagBits::Picked; | ||
| 1457 | images.push_back(image_id); | ||
| 1403 | if constexpr (BOOL_BREAK) { | 1458 | if constexpr (BOOL_BREAK) { |
| 1404 | if (func(gpu_addr, *cpu_addr, size)) { | 1459 | if (func(image_id, image)) { |
| 1405 | return true; | 1460 | return true; |
| 1406 | } | 1461 | } |
| 1407 | } else { | 1462 | } else { |
| 1408 | func(gpu_addr, *cpu_addr, size); | 1463 | func(image_id, image); |
| 1464 | } | ||
| 1465 | } | ||
| 1466 | if constexpr (BOOL_BREAK) { | ||
| 1467 | return false; | ||
| 1468 | } | ||
| 1469 | }); | ||
| 1470 | for (const ImageId image_id : images) { | ||
| 1471 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1472 | } | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | template <class P> | ||
| 1476 | template <typename Func> | ||
| 1477 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1478 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1479 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1480 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1481 | for (auto& segment : segments) { | ||
| 1482 | const auto gpu_addr = segment.first; | ||
| 1483 | const auto size = segment.second; | ||
| 1484 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1485 | ASSERT(cpu_addr); | ||
| 1486 | if constexpr (BOOL_BREAK) { | ||
| 1487 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1488 | return true; | ||
| 1409 | } | 1489 | } |
| 1490 | } else { | ||
| 1491 | func(gpu_addr, *cpu_addr, size); | ||
| 1410 | } | 1492 | } |
| 1411 | gpu_addr += size; | ||
| 1412 | } | 1493 | } |
| 1413 | } | 1494 | } |
| 1414 | 1495 | ||
| @@ -1446,11 +1527,17 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1446 | image.map_view_id = map_id; | 1527 | image.map_view_id = map_id; |
| 1447 | return; | 1528 | return; |
| 1448 | } | 1529 | } |
| 1449 | ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1530 | std::vector<ImageViewId> sparse_maps{}; |
| 1450 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1531 | ForEachSparseSegment( |
| 1451 | ForEachCPUPage(cpu_addr, size, | 1532 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1452 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | 1533 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| 1453 | }); | 1534 | ForEachCPUPage(cpu_addr, size, |
| 1535 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1536 | sparse_maps.push_back(map_id); | ||
| 1537 | }); | ||
| 1538 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1539 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1540 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1454 | } | 1541 | } |
| 1455 | 1542 | ||
| 1456 | template <class P> | 1543 | template <class P> |
| @@ -1467,20 +1554,26 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1467 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1554 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1468 | } | 1555 | } |
| 1469 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | 1556 | total_used_memory -= Common::AlignUp(tentative_size, 1024); |
| 1470 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | 1557 | const auto& clear_page_table = |
| 1471 | const auto page_it = gpu_page_table.find(page); | 1558 | [this, image_id]( |
| 1472 | if (page_it == gpu_page_table.end()) { | 1559 | u64 page, |
| 1473 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | 1560 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { |
| 1474 | return; | 1561 | const auto page_it = selected_page_table.find(page); |
| 1475 | } | 1562 | if (page_it == selected_page_table.end()) { |
| 1476 | std::vector<ImageId>& image_ids = page_it->second; | 1563 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); |
| 1477 | const auto vector_it = std::ranges::find(image_ids, image_id); | 1564 | return; |
| 1478 | if (vector_it == image_ids.end()) { | 1565 | } |
| 1479 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); | 1566 | std::vector<ImageId>& image_ids = page_it->second; |
| 1480 | return; | 1567 | const auto vector_it = std::ranges::find(image_ids, image_id); |
| 1481 | } | 1568 | if (vector_it == image_ids.end()) { |
| 1482 | image_ids.erase(vector_it); | 1569 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", |
| 1483 | }); | 1570 | page << PAGE_BITS); |
| 1571 | return; | ||
| 1572 | } | ||
| 1573 | image_ids.erase(vector_it); | ||
| 1574 | }; | ||
| 1575 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1576 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1484 | if (!image.is_sparse) { | 1577 | if (!image.is_sparse) { |
| 1485 | const auto map_id = image.map_view_id; | 1578 | const auto map_id = image.map_view_id; |
| 1486 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | 1579 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { |
| @@ -1501,46 +1594,61 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1501 | slot_map_views.erase(map_id); | 1594 | slot_map_views.erase(map_id); |
| 1502 | return; | 1595 | return; |
| 1503 | } | 1596 | } |
| 1504 | boost::container::small_vector<ImageMapId, 8> maps_to_delete; | 1597 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { |
| 1505 | ForEachSparseSegment( | 1598 | clear_page_table(page, sparse_page_table); |
| 1506 | image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, | 1599 | }); |
| 1507 | size_t size) { | 1600 | auto it = sparse_views.find(image_id); |
| 1508 | ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) { | 1601 | ASSERT(it != sparse_views.end()); |
| 1509 | const auto page_it = page_table.find(page); | 1602 | auto& sparse_maps = it->second; |
| 1510 | if (page_it == page_table.end()) { | 1603 | for (auto& map_view_id : sparse_maps) { |
| 1511 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | 1604 | const auto& map = slot_map_views[map_view_id]; |
| 1512 | return; | 1605 | const VAddr cpu_addr = map.cpu_addr; |
| 1606 | const std::size_t size = map.size; | ||
| 1607 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1608 | const auto page_it = page_table.find(page); | ||
| 1609 | if (page_it == page_table.end()) { | ||
| 1610 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1611 | return; | ||
| 1612 | } | ||
| 1613 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1614 | auto vector_it = image_map_ids.begin(); | ||
| 1615 | while (vector_it != image_map_ids.end()) { | ||
| 1616 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1617 | if (map.image_id != image_id) { | ||
| 1618 | vector_it++; | ||
| 1619 | continue; | ||
| 1513 | } | 1620 | } |
| 1514 | std::vector<ImageMapId>& image_map_ids = page_it->second; | 1621 | if (!map.picked) { |
| 1515 | auto vector_it = image_map_ids.begin(); | 1622 | map.picked = true; |
| 1516 | while (vector_it != image_map_ids.end()) { | ||
| 1517 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1518 | if (map.image_id != image_id) { | ||
| 1519 | vector_it++; | ||
| 1520 | continue; | ||
| 1521 | } | ||
| 1522 | if (!map.picked) { | ||
| 1523 | maps_to_delete.push_back(*vector_it); | ||
| 1524 | map.picked = true; | ||
| 1525 | } | ||
| 1526 | vector_it = image_map_ids.erase(vector_it); | ||
| 1527 | } | 1623 | } |
| 1528 | }); | 1624 | vector_it = image_map_ids.erase(vector_it); |
| 1625 | } | ||
| 1529 | }); | 1626 | }); |
| 1530 | 1627 | slot_map_views.erase(map_view_id); | |
| 1531 | for (const ImageMapId map_id : maps_to_delete) { | ||
| 1532 | slot_map_views.erase(map_id); | ||
| 1533 | } | 1628 | } |
| 1629 | sparse_views.erase(it); | ||
| 1534 | } | 1630 | } |
| 1535 | 1631 | ||
| 1536 | template <class P> | 1632 | template <class P> |
| 1537 | void TextureCache<P>::TrackImage(ImageBase& image) { | 1633 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { |
| 1538 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 1634 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1539 | image.flags |= ImageFlagBits::Tracked; | 1635 | image.flags |= ImageFlagBits::Tracked; |
| 1540 | if (!image.is_sparse) { | 1636 | if (!image.is_sparse) { |
| 1541 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 1637 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 1542 | return; | 1638 | return; |
| 1543 | } | 1639 | } |
| 1640 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1641 | auto it = sparse_views.find(image_id); | ||
| 1642 | ASSERT(it != sparse_views.end()); | ||
| 1643 | auto& sparse_maps = it->second; | ||
| 1644 | for (auto& map_view_id : sparse_maps) { | ||
| 1645 | const auto& map = slot_map_views[map_view_id]; | ||
| 1646 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1647 | const std::size_t size = map.size; | ||
| 1648 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1649 | } | ||
| 1650 | return; | ||
| 1651 | } | ||
| 1544 | ForEachSparseSegment(image, | 1652 | ForEachSparseSegment(image, |
| 1545 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1653 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1546 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 1654 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| @@ -1548,17 +1656,23 @@ void TextureCache<P>::TrackImage(ImageBase& image) { | |||
| 1548 | } | 1656 | } |
| 1549 | 1657 | ||
| 1550 | template <class P> | 1658 | template <class P> |
| 1551 | void TextureCache<P>::UntrackImage(ImageBase& image) { | 1659 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { |
| 1552 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | 1660 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 1553 | image.flags &= ~ImageFlagBits::Tracked; | 1661 | image.flags &= ~ImageFlagBits::Tracked; |
| 1554 | if (!image.is_sparse) { | 1662 | if (!image.is_sparse) { |
| 1555 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | 1663 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); |
| 1556 | return; | 1664 | return; |
| 1557 | } | 1665 | } |
| 1558 | ForEachSparseSegment(image, | 1666 | ASSERT(True(image.flags & ImageFlagBits::Registered)); |
| 1559 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1667 | auto it = sparse_views.find(image_id); |
| 1560 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 1668 | ASSERT(it != sparse_views.end()); |
| 1561 | }); | 1669 | auto& sparse_maps = it->second; |
| 1670 | for (auto& map_view_id : sparse_maps) { | ||
| 1671 | const auto& map = slot_map_views[map_view_id]; | ||
| 1672 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1673 | const std::size_t size = map.size; | ||
| 1674 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1675 | } | ||
| 1562 | } | 1676 | } |
| 1563 | 1677 | ||
| 1564 | template <class P> | 1678 | template <class P> |
| @@ -1700,10 +1814,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool | |||
| 1700 | if (invalidate) { | 1814 | if (invalidate) { |
| 1701 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | 1815 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); |
| 1702 | if (False(image.flags & ImageFlagBits::Tracked)) { | 1816 | if (False(image.flags & ImageFlagBits::Tracked)) { |
| 1703 | TrackImage(image); | 1817 | TrackImage(image, image_id); |
| 1704 | } | 1818 | } |
| 1705 | } else { | 1819 | } else { |
| 1706 | RefreshContents(image); | 1820 | RefreshContents(image, image_id); |
| 1707 | SynchronizeAliases(image_id); | 1821 | SynchronizeAliases(image_id); |
| 1708 | } | 1822 | } |
| 1709 | if (is_modification) { | 1823 | if (is_modification) { |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 96bf8f8d9..10093a11d 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -786,37 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 786 | return copies; | 786 | return copies; |
| 787 | } | 787 | } |
| 788 | 788 | ||
| 789 | bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) { | 789 | bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { |
| 790 | if (gpu_addr == 0) { | 790 | const GPUVAddr address = config.Address(); |
| 791 | if (address == 0) { | ||
| 791 | return false; | 792 | return false; |
| 792 | } | 793 | } |
| 793 | if (gpu_addr > (u64(1) << 48)) { | 794 | if (address > (1ULL << 48)) { |
| 794 | return false; | 795 | return false; |
| 795 | } | 796 | } |
| 796 | const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 797 | if (gpu_memory.GpuToCpuAddress(address).has_value()) { |
| 797 | return cpu_addr.has_value() && *cpu_addr != 0; | ||
| 798 | } | ||
| 799 | |||
| 800 | bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||
| 801 | const GPUVAddr gpu_addr = config.Address(); | ||
| 802 | if (IsValidAddress(gpu_memory, gpu_addr)) { | ||
| 803 | return true; | 798 | return true; |
| 804 | } | 799 | } |
| 805 | if (!config.IsBlockLinear()) { | ||
| 806 | return false; | ||
| 807 | } | ||
| 808 | const size_t levels = config.max_mip_level + 1; | ||
| 809 | if (levels <= 1) { | ||
| 810 | return false; | ||
| 811 | } | ||
| 812 | const ImageInfo info{config}; | 800 | const ImageInfo info{config}; |
| 813 | const LevelArray offsets = CalculateMipLevelOffsets(info); | 801 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); |
| 814 | for (size_t level = 1; level < levels; level++) { | 802 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); |
| 815 | if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) { | ||
| 816 | return true; | ||
| 817 | } | ||
| 818 | } | ||
| 819 | return false; | ||
| 820 | } | 803 | } |
| 821 | 804 | ||
| 822 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 805 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index b73361484..766502908 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -57,8 +57,6 @@ struct OverlapResult { | |||
| 57 | const ImageInfo& src, | 57 | const ImageInfo& src, |
| 58 | SubresourceBase base); | 58 | SubresourceBase base); |
| 59 | 59 | ||
| 60 | [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr); | ||
| 61 | |||
| 62 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 60 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 63 | 61 | ||
| 64 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 62 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |