summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/memory_manager.cpp105
-rw-r--r--src/video_core/memory_manager.h20
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h310
-rw-r--r--src/video_core/texture_cache/util.cpp31
-rw-r--r--src/video_core/texture_cache/util.h2
6 files changed, 342 insertions, 131 deletions
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3589c72ea..e66af4443 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -69,11 +69,17 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
69 } else { 69 } else {
70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); 70 UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
71 } 71 }
72 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
73 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
74 ASSERT(cpu_addr);
75 72
76 rasterizer->UnmapMemory(*cpu_addr, size); 73 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
74
75 for (const auto& map : submapped_ranges) {
76 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
77 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
78 ASSERT(cpu_addr);
79
80 rasterizer->UnmapMemory(*cpu_addr, map.second);
81 }
82
77 83
78 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 84 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
79} 85}
@@ -128,7 +134,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
128 //// Lock the new page 134 //// Lock the new page
129 // TryLockPage(page_entry, size); 135 // TryLockPage(page_entry, size);
130 auto& current_page = page_table[PageEntryIndex(gpu_addr)]; 136 auto& current_page = page_table[PageEntryIndex(gpu_addr)];
131 if (current_page.IsValid() != page_entry.IsValid() || 137
138 if ((!current_page.IsValid() && page_entry.IsValid()) ||
132 current_page.ToAddress() != page_entry.ToAddress()) { 139 current_page.ToAddress() != page_entry.ToAddress()) {
133 rasterizer->ModifyGPUMemory(gpu_addr, size); 140 rasterizer->ModifyGPUMemory(gpu_addr, size);
134 } 141 }
@@ -179,6 +186,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
179 return page_entry.ToAddress() + (gpu_addr & page_mask); 186 return page_entry.ToAddress() + (gpu_addr & page_mask);
180} 187}
181 188
189std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
190 size_t page_index{addr >> page_bits};
191 const size_t page_last{(addr + size + page_size - 1) >> page_bits};
192 while (page_index < page_last) {
193 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
194 if (page_addr && *page_addr != 0) {
195 return page_addr;
196 }
197 ++page_index;
198 }
199 return std::nullopt;
200}
201
182template <typename T> 202template <typename T>
183T MemoryManager::Read(GPUVAddr addr) const { 203T MemoryManager::Read(GPUVAddr addr) const {
184 if (auto page_pointer{GetPointer(addr)}; page_pointer) { 204 if (auto page_pointer{GetPointer(addr)}; page_pointer) {
@@ -375,4 +395,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
375 return page <= Core::Memory::PAGE_SIZE; 395 return page <= Core::Memory::PAGE_SIZE;
376} 396}
377 397
398bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
399 size_t page_index{gpu_addr >> page_bits};
400 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
401 std::optional<VAddr> old_page_addr{};
402 while (page_index != page_last) {
403 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
404 if (!page_addr || *page_addr == 0) {
405 return false;
406 }
407 if (old_page_addr) {
408 if (*old_page_addr + page_size != *page_addr) {
409 return false;
410 }
411 }
412 old_page_addr = page_addr;
413 ++page_index;
414 }
415 return true;
416}
417
418bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
419 size_t page_index{gpu_addr >> page_bits};
420 const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
421 while (page_index < page_last) {
422 if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
423 return false;
424 }
425 ++page_index;
426 }
427 return true;
428}
429
430std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
431 GPUVAddr gpu_addr, std::size_t size) const {
432 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
433 size_t page_index{gpu_addr >> page_bits};
434 size_t remaining_size{size};
435 size_t page_offset{gpu_addr & page_mask};
436 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
437 std::optional<VAddr> old_page_addr{};
438 const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
439 if (!last_segment) {
440 GPUVAddr new_base_addr = page_index << page_bits;
441 last_segment = {new_base_addr, bytes};
442 } else {
443 last_segment->second += bytes;
444 }
445 };
446 const auto split = [this, &last_segment, &result] {
447 if (last_segment) {
448 result.push_back(*last_segment);
449 last_segment = std::nullopt;
450 }
451 };
452 while (remaining_size > 0) {
453 const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
454 const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
455 if (!page_addr) {
456 split();
457 } else if (old_page_addr) {
458 if (*old_page_addr + page_size != *page_addr) {
459 split();
460 }
461 extend_size(num_bytes);
462 } else {
463 extend_size(num_bytes);
464 }
465 ++page_index;
466 page_offset = 0;
467 remaining_size -= num_bytes;
468 }
469 split();
470 return result;
471}
472
378} // namespace Tegra 473} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b3538d503..305c4b1f1 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -76,6 +76,8 @@ public:
76 76
77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 77 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
78 78
79 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
80
79 template <typename T> 81 template <typename T>
80 [[nodiscard]] T Read(GPUVAddr addr) const; 82 [[nodiscard]] T Read(GPUVAddr addr) const;
81 83
@@ -116,6 +118,24 @@ public:
116 */ 118 */
117 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; 119 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
118 120
121 /**
122 * IsContinousRange checks if a gpu region is mapped by a single range of cpu addresses.
123 */
124 [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
125
126 /**
127 * IsFullyMappedRange checks if a gpu region is mapped entirely.
128 */
129 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
130
131 /**
132 * GetSubmappedRange returns a vector with all the subranges of cpu addresses mapped beneath.
133 * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
134 * will be returned;
135 */
136 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
137 std::size_t size) const;
138
119 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); 139 [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
120 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); 140 [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
121 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); 141 [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 004ec23e4..fd14a3980 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,11 +25,12 @@ enum class ImageFlagBits : u32 {
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted 25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered 26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked 27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28 Remapped = 1 << 8, ///< Image has been remapped.
28 29
29 // Garbage Collection Flags 30 // Garbage Collection Flags
30 BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher 31 BadOverlap = 1 << 9, ///< This image overlaps other but doesn't fit, has higher
31 ///< garbage collection priority 32 ///< garbage collection priority
32 Alias = 1 << 9, ///< This image has aliases and has priority on garbage 33 Alias = 1 << 10, ///< This image has aliases and has priority on garbage
33 ///< collection 34 ///< collection
34}; 35};
35DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 36DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 9f6410d58..1704fc48c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -13,6 +13,7 @@
13#include <span> 13#include <span>
14#include <type_traits> 14#include <type_traits>
15#include <unordered_map> 15#include <unordered_map>
16#include <unordered_set>
16#include <utility> 17#include <utility>
17#include <vector> 18#include <vector>
18 19
@@ -155,6 +156,9 @@ public:
155 /// Remove images in a region 156 /// Remove images in a region
156 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); 157 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
157 158
159 /// Used when GPU memory changes layout on sparse textures.
160 // void CheckRemaps();
161
158 /// Blit an image with the given parameters 162 /// Blit an image with the given parameters
159 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 163 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
160 const Tegra::Engines::Fermi2D::Surface& src, 164 const Tegra::Engines::Fermi2D::Surface& src,
@@ -238,7 +242,7 @@ private:
238 FramebufferId GetFramebufferId(const RenderTargets& key); 242 FramebufferId GetFramebufferId(const RenderTargets& key);
239 243
240 /// Refresh the contents (pixel data) of an image 244 /// Refresh the contents (pixel data) of an image
241 void RefreshContents(Image& image); 245 void RefreshContents(Image& image, ImageId image_id);
242 246
243 /// Upload data from guest to an image 247 /// Upload data from guest to an image
244 template <typename StagingBuffer> 248 template <typename StagingBuffer>
@@ -290,6 +294,9 @@ private:
290 template <typename Func> 294 template <typename Func>
291 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); 295 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
292 296
297 template <typename Func>
298 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
299
293 /// Iterates over all the images in a region calling func 300 /// Iterates over all the images in a region calling func
294 template <typename Func> 301 template <typename Func>
295 void ForEachSparseSegment(ImageBase& image, Func&& func); 302 void ForEachSparseSegment(ImageBase& image, Func&& func);
@@ -304,10 +311,10 @@ private:
304 void UnregisterImage(ImageId image); 311 void UnregisterImage(ImageId image);
305 312
306 /// Track CPU reads and writes for image 313 /// Track CPU reads and writes for image
307 void TrackImage(ImageBase& image); 314 void TrackImage(ImageBase& image, ImageId image_id);
308 315
309 /// Stop tracking CPU reads and writes for image 316 /// Stop tracking CPU reads and writes for image
310 void UntrackImage(ImageBase& image); 317 void UntrackImage(ImageBase& image, ImageId image_id);
311 318
312 /// Delete image from the cache 319 /// Delete image from the cache
313 void DeleteImage(ImageId image); 320 void DeleteImage(ImageId image);
@@ -367,6 +374,11 @@ private:
367 374
368 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 375 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
369 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; 376 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
377 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
378
379 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
380
381 VAddr virtual_invalid_space{};
370 382
371 bool has_deleted_images = false; 383 bool has_deleted_images = false;
372 u64 total_used_memory = 0; 384 u64 total_used_memory = 0;
@@ -685,7 +697,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
685 return; 697 return;
686 } 698 }
687 image.flags |= ImageFlagBits::CpuModified; 699 image.flags |= ImageFlagBits::CpuModified;
688 UntrackImage(image); 700 if (True(image.flags & ImageFlagBits::Tracked)) {
701 UntrackImage(image, image_id);
702 }
689 }); 703 });
690} 704}
691 705
@@ -722,7 +736,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
722 for (const ImageId id : deleted_images) { 736 for (const ImageId id : deleted_images) {
723 Image& image = slot_images[id]; 737 Image& image = slot_images[id];
724 if (True(image.flags & ImageFlagBits::Tracked)) { 738 if (True(image.flags & ImageFlagBits::Tracked)) {
725 UntrackImage(image); 739 UntrackImage(image, id);
726 } 740 }
727 UnregisterImage(id); 741 UnregisterImage(id);
728 DeleteImage(id); 742 DeleteImage(id);
@@ -736,11 +750,13 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
736 [&](ImageId id, Image&) { deleted_images.push_back(id); }); 750 [&](ImageId id, Image&) { deleted_images.push_back(id); });
737 for (const ImageId id : deleted_images) { 751 for (const ImageId id : deleted_images) {
738 Image& image = slot_images[id]; 752 Image& image = slot_images[id];
753 if (True(image.flags & ImageFlagBits::Remapped)) {
754 continue;
755 }
756 image.flags |= ImageFlagBits::Remapped;
739 if (True(image.flags & ImageFlagBits::Tracked)) { 757 if (True(image.flags & ImageFlagBits::Tracked)) {
740 UntrackImage(image); 758 UntrackImage(image, id);
741 } 759 }
742 UnregisterImage(id);
743 DeleteImage(id);
744 } 760 }
745} 761}
746 762
@@ -958,13 +974,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
958} 974}
959 975
960template <class P> 976template <class P>
961void TextureCache<P>::RefreshContents(Image& image) { 977void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
962 if (False(image.flags & ImageFlagBits::CpuModified)) { 978 if (False(image.flags & ImageFlagBits::CpuModified)) {
963 // Only upload modified images 979 // Only upload modified images
964 return; 980 return;
965 } 981 }
966 image.flags &= ~ImageFlagBits::CpuModified; 982 image.flags &= ~ImageFlagBits::CpuModified;
967 TrackImage(image); 983 TrackImage(image, image_id);
968 984
969 if (image.info.num_samples > 1) { 985 if (image.info.num_samples > 1) {
970 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 986 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
@@ -1043,14 +1059,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
1043template <class P> 1059template <class P>
1044ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 1060ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1045 RelaxedOptions options) { 1061 RelaxedOptions options) {
1046 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1062 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1047 if (!cpu_addr) { 1063 if (!cpu_addr) {
1048 return ImageId{}; 1064 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1065 if (!cpu_addr) {
1066 return ImageId{};
1067 }
1049 } 1068 }
1050 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1069 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1051 const bool native_bgr = runtime.HasNativeBgr(); 1070 const bool native_bgr = runtime.HasNativeBgr();
1052 ImageId image_id; 1071 ImageId image_id;
1053 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1072 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1073 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1074 return false;
1075 }
1054 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { 1076 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1055 const bool strict_size = False(options & RelaxedOptions::Size) && 1077 const bool strict_size = False(options & RelaxedOptions::Size) &&
1056 True(existing_image.flags & ImageFlagBits::Strong); 1078 True(existing_image.flags & ImageFlagBits::Strong);
@@ -1069,14 +1091,23 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1069 } 1091 }
1070 return false; 1092 return false;
1071 }; 1093 };
1072 ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda); 1094 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
1073 return image_id; 1095 return image_id;
1074} 1096}
1075 1097
1076template <class P> 1098template <class P>
1077ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 1099ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1078 RelaxedOptions options) { 1100 RelaxedOptions options) {
1079 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1101 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1102 if (!cpu_addr) {
1103 const auto size = CalculateGuestSizeInBytes(info);
1104 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
1105 if (!cpu_addr) {
1106 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1107 virtual_invalid_space += Common::AlignUp(size, 32);
1108 cpu_addr = std::optional<VAddr>(fake_addr);
1109 }
1110 }
1080 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); 1111 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
1081 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); 1112 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
1082 const Image& image = slot_images[image_id]; 1113 const Image& image = slot_images[image_id];
@@ -1096,10 +1127,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1096 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1127 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1097 const bool native_bgr = runtime.HasNativeBgr(); 1128 const bool native_bgr = runtime.HasNativeBgr();
1098 std::vector<ImageId> overlap_ids; 1129 std::vector<ImageId> overlap_ids;
1130 std::unordered_set<ImageId> overlaps_found;
1099 std::vector<ImageId> left_aliased_ids; 1131 std::vector<ImageId> left_aliased_ids;
1100 std::vector<ImageId> right_aliased_ids; 1132 std::vector<ImageId> right_aliased_ids;
1133 std::unordered_set<ImageId> ignore_textures;
1101 std::vector<ImageId> bad_overlap_ids; 1134 std::vector<ImageId> bad_overlap_ids;
1102 ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1135 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1136 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1137 ignore_textures.insert(overlap_id);
1138 return;
1139 }
1103 if (info.type == ImageType::Linear) { 1140 if (info.type == ImageType::Linear) {
1104 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1141 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1105 // Alias linear images with the same pitch 1142 // Alias linear images with the same pitch
@@ -1107,6 +1144,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1107 } 1144 }
1108 return; 1145 return;
1109 } 1146 }
1147 overlaps_found.insert(overlap_id);
1110 static constexpr bool strict_size = true; 1148 static constexpr bool strict_size = true;
1111 const std::optional<OverlapResult> solution = ResolveOverlap( 1149 const std::optional<OverlapResult> solution = ResolveOverlap(
1112 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1150 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1130,30 +1168,34 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1130 bad_overlap_ids.push_back(overlap_id); 1168 bad_overlap_ids.push_back(overlap_id);
1131 overlap.flags |= ImageFlagBits::BadOverlap; 1169 overlap.flags |= ImageFlagBits::BadOverlap;
1132 } 1170 }
1133 }); 1171 };
1172 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1173 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1174 if (!overlaps_found.contains(overlap_id)) {
1175 ignore_textures.insert(overlap_id);
1176 }
1177 };
1178 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
1134 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1179 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1135 Image& new_image = slot_images[new_image_id]; 1180 Image& new_image = slot_images[new_image_id];
1136 1181
1137 new_image.is_sparse = false; 1182 new_image.is_sparse =
1138 if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) { 1183 !gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes);
1139 const LevelArray offsets = CalculateMipLevelOffsets(new_image.info); 1184
1140 size_t level; 1185 for (const ImageId overlap_id : ignore_textures) {
1141 const size_t levels = static_cast<size_t>(new_image.info.resources.levels); 1186 Image& overlap = slot_images[overlap_id];
1142 VAddr n_cpu_addr = new_image.cpu_addr; 1187 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1143 GPUVAddr n_gpu_addr = new_image.gpu_addr; 1188 UNIMPLEMENTED();
1144 for (level = 0; level < levels; level++) { 1189 }
1145 n_gpu_addr += offsets[level]; 1190 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1146 n_cpu_addr += offsets[level]; 1191 UntrackImage(overlap, overlap_id);
1147 std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr);
1148 if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) {
1149 new_image.is_sparse = true;
1150 break;
1151 }
1152 } 1192 }
1193 UnregisterImage(overlap_id);
1194 DeleteImage(overlap_id);
1153 } 1195 }
1154 1196
1155 // TODO: Only upload what we need 1197 // TODO: Only upload what we need
1156 RefreshContents(new_image); 1198 RefreshContents(new_image, new_image_id);
1157 1199
1158 for (const ImageId overlap_id : overlap_ids) { 1200 for (const ImageId overlap_id : overlap_ids) {
1159 Image& overlap = slot_images[overlap_id]; 1201 Image& overlap = slot_images[overlap_id];
@@ -1165,7 +1207,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1165 runtime.CopyImage(new_image, overlap, copies); 1207 runtime.CopyImage(new_image, overlap, copies);
1166 } 1208 }
1167 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1209 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1168 UntrackImage(overlap); 1210 UntrackImage(overlap, overlap_id);
1169 } 1211 }
1170 UnregisterImage(overlap_id); 1212 UnregisterImage(overlap_id);
1171 DeleteImage(overlap_id); 1213 DeleteImage(overlap_id);
@@ -1390,25 +1432,64 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
1390 1432
1391template <class P> 1433template <class P>
1392template <typename Func> 1434template <typename Func>
1393void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1435void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
1394 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1436 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1395 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1437 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1396 GPUVAddr gpu_addr = image.gpu_addr; 1438 boost::container::small_vector<ImageId, 8> images;
1397 const size_t levels = image.info.resources.levels; 1439 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1398 const auto mipmap_sizes = CalculateMipLevelSizes(image.info); 1440 const auto it = sparse_page_table.find(page);
1399 for (size_t level = 0; level < levels; level++) { 1441 if (it == sparse_page_table.end()) {
1400 const size_t size = mipmap_sizes[level]; 1442 if constexpr (BOOL_BREAK) {
1401 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1443 return false;
1402 if (cpu_addr && *cpu_addr != 0) { 1444 } else {
1445 return;
1446 }
1447 }
1448 for (const ImageId image_id : it->second) {
1449 Image& image = slot_images[image_id];
1450 if (True(image.flags & ImageFlagBits::Picked)) {
1451 continue;
1452 }
1453 if (!image.OverlapsGPU(gpu_addr, size)) {
1454 continue;
1455 }
1456 image.flags |= ImageFlagBits::Picked;
1457 images.push_back(image_id);
1403 if constexpr (BOOL_BREAK) { 1458 if constexpr (BOOL_BREAK) {
1404 if (func(gpu_addr, *cpu_addr, size)) { 1459 if (func(image_id, image)) {
1405 return true; 1460 return true;
1406 } 1461 }
1407 } else { 1462 } else {
1408 func(gpu_addr, *cpu_addr, size); 1463 func(image_id, image);
1464 }
1465 }
1466 if constexpr (BOOL_BREAK) {
1467 return false;
1468 }
1469 });
1470 for (const ImageId image_id : images) {
1471 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1472 }
1473}
1474
1475template <class P>
1476template <typename Func>
1477void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1478 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1479 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1480 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1481 for (auto& segment : segments) {
1482 const auto gpu_addr = segment.first;
1483 const auto size = segment.second;
1484 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1485 ASSERT(cpu_addr);
1486 if constexpr (BOOL_BREAK) {
1487 if (func(gpu_addr, *cpu_addr, size)) {
1488 return true;
1409 } 1489 }
1490 } else {
1491 func(gpu_addr, *cpu_addr, size);
1410 } 1492 }
1411 gpu_addr += size;
1412 } 1493 }
1413} 1494}
1414 1495
@@ -1446,11 +1527,17 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1446 image.map_view_id = map_id; 1527 image.map_view_id = map_id;
1447 return; 1528 return;
1448 } 1529 }
1449 ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1530 std::vector<ImageViewId> sparse_maps{};
1450 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1531 ForEachSparseSegment(
1451 ForEachCPUPage(cpu_addr, size, 1532 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1452 [this, map_id](u64 page) { page_table[page].push_back(map_id); }); 1533 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1453 }); 1534 ForEachCPUPage(cpu_addr, size,
1535 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1536 sparse_maps.push_back(map_id);
1537 });
1538 sparse_views.emplace(image_id, std::move(sparse_maps));
1539 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1540 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1454} 1541}
1455 1542
1456template <class P> 1543template <class P>
@@ -1467,20 +1554,26 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1467 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1554 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1468 } 1555 }
1469 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1556 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1470 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1557 const auto& clear_page_table =
1471 const auto page_it = gpu_page_table.find(page); 1558 [this, image_id](
1472 if (page_it == gpu_page_table.end()) { 1559 u64 page,
1473 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); 1560 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1474 return; 1561 const auto page_it = selected_page_table.find(page);
1475 } 1562 if (page_it == selected_page_table.end()) {
1476 std::vector<ImageId>& image_ids = page_it->second; 1563 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1477 const auto vector_it = std::ranges::find(image_ids, image_id); 1564 return;
1478 if (vector_it == image_ids.end()) { 1565 }
1479 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); 1566 std::vector<ImageId>& image_ids = page_it->second;
1480 return; 1567 const auto vector_it = std::ranges::find(image_ids, image_id);
1481 } 1568 if (vector_it == image_ids.end()) {
1482 image_ids.erase(vector_it); 1569 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1483 }); 1570 page << PAGE_BITS);
1571 return;
1572 }
1573 image_ids.erase(vector_it);
1574 };
1575 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1576 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1484 if (!image.is_sparse) { 1577 if (!image.is_sparse) {
1485 const auto map_id = image.map_view_id; 1578 const auto map_id = image.map_view_id;
1486 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { 1579 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1501,46 +1594,61 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1501 slot_map_views.erase(map_id); 1594 slot_map_views.erase(map_id);
1502 return; 1595 return;
1503 } 1596 }
1504 boost::container::small_vector<ImageMapId, 8> maps_to_delete; 1597 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1505 ForEachSparseSegment( 1598 clear_page_table(page, sparse_page_table);
1506 image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, 1599 });
1507 size_t size) { 1600 auto it = sparse_views.find(image_id);
1508 ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) { 1601 ASSERT(it != sparse_views.end());
1509 const auto page_it = page_table.find(page); 1602 auto& sparse_maps = it->second;
1510 if (page_it == page_table.end()) { 1603 for (auto& map_view_id : sparse_maps) {
1511 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); 1604 const auto& map = slot_map_views[map_view_id];
1512 return; 1605 const VAddr cpu_addr = map.cpu_addr;
1606 const std::size_t size = map.size;
1607 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1608 const auto page_it = page_table.find(page);
1609 if (page_it == page_table.end()) {
1610 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1611 return;
1612 }
1613 std::vector<ImageMapId>& image_map_ids = page_it->second;
1614 auto vector_it = image_map_ids.begin();
1615 while (vector_it != image_map_ids.end()) {
1616 ImageMapView& map = slot_map_views[*vector_it];
1617 if (map.image_id != image_id) {
1618 vector_it++;
1619 continue;
1513 } 1620 }
1514 std::vector<ImageMapId>& image_map_ids = page_it->second; 1621 if (!map.picked) {
1515 auto vector_it = image_map_ids.begin(); 1622 map.picked = true;
1516 while (vector_it != image_map_ids.end()) {
1517 ImageMapView& map = slot_map_views[*vector_it];
1518 if (map.image_id != image_id) {
1519 vector_it++;
1520 continue;
1521 }
1522 if (!map.picked) {
1523 maps_to_delete.push_back(*vector_it);
1524 map.picked = true;
1525 }
1526 vector_it = image_map_ids.erase(vector_it);
1527 } 1623 }
1528 }); 1624 vector_it = image_map_ids.erase(vector_it);
1625 }
1529 }); 1626 });
1530 1627 slot_map_views.erase(map_view_id);
1531 for (const ImageMapId map_id : maps_to_delete) {
1532 slot_map_views.erase(map_id);
1533 } 1628 }
1629 sparse_views.erase(it);
1534} 1630}
1535 1631
1536template <class P> 1632template <class P>
1537void TextureCache<P>::TrackImage(ImageBase& image) { 1633void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1538 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 1634 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1539 image.flags |= ImageFlagBits::Tracked; 1635 image.flags |= ImageFlagBits::Tracked;
1540 if (!image.is_sparse) { 1636 if (!image.is_sparse) {
1541 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 1637 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1542 return; 1638 return;
1543 } 1639 }
1640 if (True(image.flags & ImageFlagBits::Registered)) {
1641 auto it = sparse_views.find(image_id);
1642 ASSERT(it != sparse_views.end());
1643 auto& sparse_maps = it->second;
1644 for (auto& map_view_id : sparse_maps) {
1645 const auto& map = slot_map_views[map_view_id];
1646 const VAddr cpu_addr = map.cpu_addr;
1647 const std::size_t size = map.size;
1648 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1649 }
1650 return;
1651 }
1544 ForEachSparseSegment(image, 1652 ForEachSparseSegment(image,
1545 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1653 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1546 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 1654 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
@@ -1548,17 +1656,23 @@ void TextureCache<P>::TrackImage(ImageBase& image) {
1548} 1656}
1549 1657
1550template <class P> 1658template <class P>
1551void TextureCache<P>::UntrackImage(ImageBase& image) { 1659void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
1552 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 1660 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1553 image.flags &= ~ImageFlagBits::Tracked; 1661 image.flags &= ~ImageFlagBits::Tracked;
1554 if (!image.is_sparse) { 1662 if (!image.is_sparse) {
1555 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); 1663 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1556 return; 1664 return;
1557 } 1665 }
1558 ForEachSparseSegment(image, 1666 ASSERT(True(image.flags & ImageFlagBits::Registered));
1559 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1667 auto it = sparse_views.find(image_id);
1560 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 1668 ASSERT(it != sparse_views.end());
1561 }); 1669 auto& sparse_maps = it->second;
1670 for (auto& map_view_id : sparse_maps) {
1671 const auto& map = slot_map_views[map_view_id];
1672 const VAddr cpu_addr = map.cpu_addr;
1673 const std::size_t size = map.size;
1674 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1675 }
1562} 1676}
1563 1677
1564template <class P> 1678template <class P>
@@ -1700,10 +1814,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
1700 if (invalidate) { 1814 if (invalidate) {
1701 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); 1815 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1702 if (False(image.flags & ImageFlagBits::Tracked)) { 1816 if (False(image.flags & ImageFlagBits::Tracked)) {
1703 TrackImage(image); 1817 TrackImage(image, image_id);
1704 } 1818 }
1705 } else { 1819 } else {
1706 RefreshContents(image); 1820 RefreshContents(image, image_id);
1707 SynchronizeAliases(image_id); 1821 SynchronizeAliases(image_id);
1708 } 1822 }
1709 if (is_modification) { 1823 if (is_modification) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 96bf8f8d9..10093a11d 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -786,37 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
786 return copies; 786 return copies;
787} 787}
788 788
789bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) { 789bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
790 if (gpu_addr == 0) { 790 const GPUVAddr address = config.Address();
791 if (address == 0) {
791 return false; 792 return false;
792 } 793 }
793 if (gpu_addr > (u64(1) << 48)) { 794 if (address > (1ULL << 48)) {
794 return false; 795 return false;
795 } 796 }
796 const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 797 if (gpu_memory.GpuToCpuAddress(address).has_value()) {
797 return cpu_addr.has_value() && *cpu_addr != 0;
798}
799
800bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
801 const GPUVAddr gpu_addr = config.Address();
802 if (IsValidAddress(gpu_memory, gpu_addr)) {
803 return true; 798 return true;
804 } 799 }
805 if (!config.IsBlockLinear()) {
806 return false;
807 }
808 const size_t levels = config.max_mip_level + 1;
809 if (levels <= 1) {
810 return false;
811 }
812 const ImageInfo info{config}; 800 const ImageInfo info{config};
813 const LevelArray offsets = CalculateMipLevelOffsets(info); 801 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
814 for (size_t level = 1; level < levels; level++) { 802 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
815 if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) {
816 return true;
817 }
818 }
819 return false;
820} 803}
821 804
822std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 805std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index b73361484..766502908 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -57,8 +57,6 @@ struct OverlapResult {
57 const ImageInfo& src, 57 const ImageInfo& src,
58 SubresourceBase base); 58 SubresourceBase base);
59 59
60[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr);
61
62[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 60[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
63 61
64[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 62[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,