summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-06-12 15:52:27 +0200
committerGravatar Fernando Sahmkow2021-07-04 22:32:03 +0200
commit38165fb7e3e486b5099cfa76f5a09ec9f3201acd (patch)
treea80e66a254960c9c8c602f256cd37361c780d188
parentMerge pull request #6553 from FernandoS27/bite-a-bat-change-the-world (diff)
downloadyuzu-38165fb7e3e486b5099cfa76f5a09ec9f3201acd.tar.gz
yuzu-38165fb7e3e486b5099cfa76f5a09ec9f3201acd.tar.xz
yuzu-38165fb7e3e486b5099cfa76f5a09ec9f3201acd.zip
Texture Cache: Initial Implementation of Sparse Textures.
-rw-r--r--src/video_core/memory_manager.cpp7
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/image_base.cpp3
-rw-r--r--src/video_core/texture_cache/image_base.h8
-rw-r--r--src/video_core/texture_cache/texture_cache.h248
-rw-r--r--src/video_core/texture_cache/types.h1
-rw-r--r--src/video_core/texture_cache/util.cpp41
-rw-r--r--src/video_core/texture_cache/util.h6
12 files changed, 310 insertions, 23 deletions
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7124c755c..3589c72ea 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -127,8 +127,13 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
127 127
128 //// Lock the new page 128 //// Lock the new page
129 // TryLockPage(page_entry, size); 129 // TryLockPage(page_entry, size);
130 auto& current_page = page_table[PageEntryIndex(gpu_addr)];
131 if (current_page.IsValid() != page_entry.IsValid() ||
132 current_page.ToAddress() != page_entry.ToAddress()) {
133 rasterizer->ModifyGPUMemory(gpu_addr, size);
134 }
130 135
131 page_table[PageEntryIndex(gpu_addr)] = page_entry; 136 current_page = page_entry;
132} 137}
133 138
134std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, 139std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 07939432f..c0ba32346 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -87,6 +87,9 @@ public:
87 /// Unmap memory range 87 /// Unmap memory range
88 virtual void UnmapMemory(VAddr addr, u64 size) = 0; 88 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
89 89
90 /// Unmap memory range
91 virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
92
90 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 93 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
91 /// and invalidated 94 /// and invalidated
92 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 95 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..07ad0e205 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
611 shader_cache.OnCPUWrite(addr, size); 611 shader_cache.OnCPUWrite(addr, size);
612} 612}
613 613
614void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
615 {
616 std::scoped_lock lock{texture_cache.mutex};
617 texture_cache.UnmapGPUMemory(addr, size);
618 }
619}
620
614void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 621void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
615 if (!gpu.IsAsync()) { 622 if (!gpu.IsAsync()) {
616 gpu_memory.Write<u32>(addr, value); 623 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..482efed7a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,6 +80,7 @@ public:
80 void OnCPUWrite(VAddr addr, u64 size) override; 80 void OnCPUWrite(VAddr addr, u64 size) override;
81 void SyncGuestHost() override; 81 void SyncGuestHost() override;
82 void UnmapMemory(VAddr addr, u64 size) override; 82 void UnmapMemory(VAddr addr, u64 size) override;
83 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
83 void SignalSemaphore(GPUVAddr addr, u32 value) override; 84 void SignalSemaphore(GPUVAddr addr, u32 value) override;
84 void SignalSyncPoint(u32 value) override; 85 void SignalSyncPoint(u32 value) override;
85 void ReleaseFences() override; 86 void ReleaseFences() override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 1c9120170..bd4d649cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
557 pipeline_cache.OnCPUWrite(addr, size); 557 pipeline_cache.OnCPUWrite(addr, size);
558} 558}
559 559
560void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
561 {
562 std::scoped_lock lock{texture_cache.mutex};
563 texture_cache.UnmapGPUMemory(addr, size);
564 }
565}
566
560void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 567void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
561 if (!gpu.IsAsync()) { 568 if (!gpu.IsAsync()) {
562 gpu_memory.Write<u32>(addr, value); 569 gpu_memory.Write<u32>(addr, value);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index cb8c5c279..41459c5c5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -72,6 +72,7 @@ public:
72 void OnCPUWrite(VAddr addr, u64 size) override; 72 void OnCPUWrite(VAddr addr, u64 size) override;
73 void SyncGuestHost() override; 73 void SyncGuestHost() override;
74 void UnmapMemory(VAddr addr, u64 size) override; 74 void UnmapMemory(VAddr addr, u64 size) override;
75 void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
75 void SignalSemaphore(GPUVAddr addr, u32 value) override; 76 void SignalSemaphore(GPUVAddr addr, u32 value) override;
76 void SignalSyncPoint(u32 value) override; 77 void SignalSyncPoint(u32 value) override;
77 void ReleaseFences() override; 78 void ReleaseFences() override;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index ad69d32d1..2aae338b6 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
69 } 69 }
70} 70}
71 71
72ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
73 : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
74
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { 75std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) { 76 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base 77 // Subresource address can't be lower than the base
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index e326cab71..004ec23e4 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -57,6 +57,12 @@ struct ImageBase {
57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; 57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
58 } 58 }
59 59
60 [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
61 const VAddr overlap_end = overlap_gpu_addr + overlap_size;
62 const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
63 return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
64 }
65
60 void CheckBadOverlapState(); 66 void CheckBadOverlapState();
61 void CheckAliasState(); 67 void CheckAliasState();
62 68
@@ -84,6 +90,8 @@ struct ImageBase {
84 90
85 std::vector<AliasedImage> aliased_images; 91 std::vector<AliasedImage> aliased_images;
86 std::vector<ImageId> overlapping_images; 92 std::vector<ImageId> overlapping_images;
93 ImageMapId map_view_id{};
94 bool is_sparse{};
87}; 95};
88 96
89struct ImageAllocBase { 97struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d8dbd3824..9f6410d58 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -152,6 +152,9 @@ public:
152 /// Remove images in a region 152 /// Remove images in a region
153 void UnmapMemory(VAddr cpu_addr, size_t size); 153 void UnmapMemory(VAddr cpu_addr, size_t size);
154 154
155 /// Remove images in a region
156 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
157
155 /// Blit an image with the given parameters 158 /// Blit an image with the given parameters
156 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 159 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 const Tegra::Engines::Fermi2D::Surface& src, 160 const Tegra::Engines::Fermi2D::Surface& src,
@@ -190,7 +193,22 @@ public:
190private: 193private:
191 /// Iterate over all page indices in a range 194 /// Iterate over all page indices in a range
192 template <typename Func> 195 template <typename Func>
193 static void ForEachPage(VAddr addr, size_t size, Func&& func) { 196 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
197 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
198 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
199 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
200 if constexpr (RETURNS_BOOL) {
201 if (func(page)) {
202 break;
203 }
204 } else {
205 func(page);
206 }
207 }
208 }
209
210 template <typename Func>
211 static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
194 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 212 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
195 const u64 page_end = (addr + size - 1) >> PAGE_BITS; 213 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
196 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { 214 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
@@ -269,6 +287,13 @@ private:
269 template <typename Func> 287 template <typename Func>
270 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 288 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
271 289
290 template <typename Func>
291 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
292
293 /// Iterates over all the images in a region calling func
294 template <typename Func>
295 void ForEachSparseSegment(ImageBase& image, Func&& func);
296
272 /// Find or create an image view in the given image with the passed parameters 297 /// Find or create an image view in the given image with the passed parameters
273 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); 298 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
274 299
@@ -340,7 +365,8 @@ private:
340 std::unordered_map<TSCEntry, SamplerId> samplers; 365 std::unordered_map<TSCEntry, SamplerId> samplers;
341 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 366 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
342 367
343 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; 368 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
369 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
344 370
345 bool has_deleted_images = false; 371 bool has_deleted_images = false;
346 u64 total_used_memory = 0; 372 u64 total_used_memory = 0;
@@ -349,6 +375,7 @@ private:
349 u64 critical_memory; 375 u64 critical_memory;
350 376
351 SlotVector<Image> slot_images; 377 SlotVector<Image> slot_images;
378 SlotVector<ImageMapView> slot_map_views;
352 SlotVector<ImageView> slot_image_views; 379 SlotVector<ImageView> slot_image_views;
353 SlotVector<ImageAlloc> slot_image_allocs; 380 SlotVector<ImageAlloc> slot_image_allocs;
354 SlotVector<Sampler> slot_samplers; 381 SlotVector<Sampler> slot_samplers;
@@ -703,6 +730,21 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
703} 730}
704 731
705template <class P> 732template <class P>
733void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
734 std::vector<ImageId> deleted_images;
735 ForEachImageInRegionGPU(gpu_addr, size,
736 [&](ImageId id, Image&) { deleted_images.push_back(id); });
737 for (const ImageId id : deleted_images) {
738 Image& image = slot_images[id];
739 if (True(image.flags & ImageFlagBits::Tracked)) {
740 UntrackImage(image);
741 }
742 UnregisterImage(id);
743 DeleteImage(id);
744 }
745}
746
747template <class P>
706void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 748void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
707 const Tegra::Engines::Fermi2D::Surface& src, 749 const Tegra::Engines::Fermi2D::Surface& src,
708 const Tegra::Engines::Fermi2D::Config& copy, 750 const Tegra::Engines::Fermi2D::Config& copy,
@@ -833,9 +875,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
833 if (it == page_table.end()) { 875 if (it == page_table.end()) {
834 return nullptr; 876 return nullptr;
835 } 877 }
836 const auto& image_ids = it->second; 878 const auto& image_map_ids = it->second;
837 for (const ImageId image_id : image_ids) { 879 for (const ImageMapId map_id : image_map_ids) {
838 const ImageBase& image = slot_images[image_id]; 880 const ImageMapView& map = slot_map_views[map_id];
881 const ImageBase& image = slot_images[map.image_id];
839 if (image.cpu_addr != cpu_addr) { 882 if (image.cpu_addr != cpu_addr) {
840 continue; 883 continue;
841 } 884 }
@@ -958,7 +1001,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
958 1001
959template <class P> 1002template <class P>
960ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 1003ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
961 if (!IsValidAddress(gpu_memory, config)) { 1004 if (!IsValidEntry(gpu_memory, config)) {
962 return NULL_IMAGE_VIEW_ID; 1005 return NULL_IMAGE_VIEW_ID;
963 } 1006 }
964 const auto [pair, is_new] = image_views.try_emplace(config); 1007 const auto [pair, is_new] = image_views.try_emplace(config);
@@ -1026,7 +1069,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1026 } 1069 }
1027 return false; 1070 return false;
1028 }; 1071 };
1029 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); 1072 ForEachImageInRegionGPU(gpu_addr, CalculateGuestSizeInBytes(info), lambda);
1030 return image_id; 1073 return image_id;
1031} 1074}
1032 1075
@@ -1056,7 +1099,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1056 std::vector<ImageId> left_aliased_ids; 1099 std::vector<ImageId> left_aliased_ids;
1057 std::vector<ImageId> right_aliased_ids; 1100 std::vector<ImageId> right_aliased_ids;
1058 std::vector<ImageId> bad_overlap_ids; 1101 std::vector<ImageId> bad_overlap_ids;
1059 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1102 ForEachImageInRegionGPU(gpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
1060 if (info.type == ImageType::Linear) { 1103 if (info.type == ImageType::Linear) {
1061 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1104 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1062 // Alias linear images with the same pitch 1105 // Alias linear images with the same pitch
@@ -1091,6 +1134,24 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1091 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1134 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1092 Image& new_image = slot_images[new_image_id]; 1135 Image& new_image = slot_images[new_image_id];
1093 1136
1137 new_image.is_sparse = false;
1138 if (new_image.info.type != ImageType::Linear && new_image.info.type != ImageType::Buffer) {
1139 const LevelArray offsets = CalculateMipLevelOffsets(new_image.info);
1140 size_t level;
1141 const size_t levels = static_cast<size_t>(new_image.info.resources.levels);
1142 VAddr n_cpu_addr = new_image.cpu_addr;
1143 GPUVAddr n_gpu_addr = new_image.gpu_addr;
1144 for (level = 0; level < levels; level++) {
1145 n_gpu_addr += offsets[level];
1146 n_cpu_addr += offsets[level];
1147 std::optional<VAddr> cpu_addr_opt = gpu_memory.GpuToCpuAddress(n_gpu_addr);
1148 if (!cpu_addr_opt || *cpu_addr_opt == 0 || n_cpu_addr != *cpu_addr_opt) {
1149 new_image.is_sparse = true;
1150 break;
1151 }
1152 }
1153 }
1154
1094 // TODO: Only upload what we need 1155 // TODO: Only upload what we need
1095 RefreshContents(new_image); 1156 RefreshContents(new_image);
1096 1157
@@ -1239,7 +1300,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1239 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1300 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1240 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1301 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1241 boost::container::small_vector<ImageId, 32> images; 1302 boost::container::small_vector<ImageId, 32> images;
1242 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { 1303 boost::container::small_vector<ImageMapId, 32> maps;
1304 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
1243 const auto it = page_table.find(page); 1305 const auto it = page_table.find(page);
1244 if (it == page_table.end()) { 1306 if (it == page_table.end()) {
1245 if constexpr (BOOL_BREAK) { 1307 if constexpr (BOOL_BREAK) {
@@ -1248,12 +1310,63 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1248 return; 1310 return;
1249 } 1311 }
1250 } 1312 }
1313 for (const ImageMapId map_id : it->second) {
1314 ImageMapView& map = slot_map_views[map_id];
1315 if (map.picked) {
1316 continue;
1317 }
1318 if (!map.Overlaps(cpu_addr, size)) {
1319 continue;
1320 }
1321 map.picked = true;
1322 maps.push_back(map_id);
1323 Image& image = slot_images[map.image_id];
1324 if (True(image.flags & ImageFlagBits::Picked)) {
1325 continue;
1326 }
1327 image.flags |= ImageFlagBits::Picked;
1328 images.push_back(map.image_id);
1329 if constexpr (BOOL_BREAK) {
1330 if (func(map.image_id, image)) {
1331 return true;
1332 }
1333 } else {
1334 func(map.image_id, image);
1335 }
1336 }
1337 if constexpr (BOOL_BREAK) {
1338 return false;
1339 }
1340 });
1341 for (const ImageId image_id : images) {
1342 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1343 }
1344 for (const ImageMapId map_id : maps) {
1345 slot_map_views[map_id].picked = false;
1346 }
1347}
1348
1349template <class P>
1350template <typename Func>
1351void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
1352 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1353 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1354 boost::container::small_vector<ImageId, 8> images;
1355 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1356 const auto it = gpu_page_table.find(page);
1357 if (it == gpu_page_table.end()) {
1358 if constexpr (BOOL_BREAK) {
1359 return false;
1360 } else {
1361 return;
1362 }
1363 }
1251 for (const ImageId image_id : it->second) { 1364 for (const ImageId image_id : it->second) {
1252 Image& image = slot_images[image_id]; 1365 Image& image = slot_images[image_id];
1253 if (True(image.flags & ImageFlagBits::Picked)) { 1366 if (True(image.flags & ImageFlagBits::Picked)) {
1254 continue; 1367 continue;
1255 } 1368 }
1256 if (!image.Overlaps(cpu_addr, size)) { 1369 if (!image.OverlapsGPU(gpu_addr, size)) {
1257 continue; 1370 continue;
1258 } 1371 }
1259 image.flags |= ImageFlagBits::Picked; 1372 image.flags |= ImageFlagBits::Picked;
@@ -1276,6 +1389,30 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1276} 1389}
1277 1390
1278template <class P> 1391template <class P>
1392template <typename Func>
1393void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1394 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1395 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1396 GPUVAddr gpu_addr = image.gpu_addr;
1397 const size_t levels = image.info.resources.levels;
1398 const auto mipmap_sizes = CalculateMipLevelSizes(image.info);
1399 for (size_t level = 0; level < levels; level++) {
1400 const size_t size = mipmap_sizes[level];
1401 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1402 if (cpu_addr && *cpu_addr != 0) {
1403 if constexpr (BOOL_BREAK) {
1404 if (func(gpu_addr, *cpu_addr, size)) {
1405 return true;
1406 }
1407 } else {
1408 func(gpu_addr, *cpu_addr, size);
1409 }
1410 }
1411 gpu_addr += size;
1412 }
1413}
1414
1415template <class P>
1279ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { 1416ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1280 Image& image = slot_images[image_id]; 1417 Image& image = slot_images[image_id];
1281 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { 1418 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
@@ -1292,8 +1429,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1292 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), 1429 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1293 "Trying to register an already registered image"); 1430 "Trying to register an already registered image");
1294 image.flags |= ImageFlagBits::Registered; 1431 image.flags |= ImageFlagBits::Registered;
1295 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1296 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1297 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); 1432 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1298 if ((IsPixelFormatASTC(image.info.format) && 1433 if ((IsPixelFormatASTC(image.info.format) &&
1299 True(image.flags & ImageFlagBits::AcceleratedUpload)) || 1434 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
@@ -1301,6 +1436,21 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1301 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1436 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1302 } 1437 }
1303 total_used_memory += Common::AlignUp(tentative_size, 1024); 1438 total_used_memory += Common::AlignUp(tentative_size, 1024);
1439 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1440 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1441 if (!image.is_sparse) {
1442 auto map_id =
1443 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1444 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1445 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1446 image.map_view_id = map_id;
1447 return;
1448 }
1449 ForEachSparseSegment(image, [this, image_id](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1450 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1451 ForEachCPUPage(cpu_addr, size,
1452 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1453 });
1304} 1454}
1305 1455
1306template <class P> 1456template <class P>
@@ -1317,9 +1467,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1317 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1467 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1318 } 1468 }
1319 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1469 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1320 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1470 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1321 const auto page_it = page_table.find(page); 1471 const auto page_it = gpu_page_table.find(page);
1322 if (page_it == page_table.end()) { 1472 if (page_it == gpu_page_table.end()) {
1323 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); 1473 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1324 return; 1474 return;
1325 } 1475 }
@@ -1331,20 +1481,84 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1331 } 1481 }
1332 image_ids.erase(vector_it); 1482 image_ids.erase(vector_it);
1333 }); 1483 });
1484 if (!image.is_sparse) {
1485 const auto map_id = image.map_view_id;
1486 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1487 const auto page_it = page_table.find(page);
1488 if (page_it == page_table.end()) {
1489 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1490 return;
1491 }
1492 std::vector<ImageMapId>& image_map_ids = page_it->second;
1493 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1494 if (vector_it == image_map_ids.end()) {
1495 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1496 page << PAGE_BITS);
1497 return;
1498 }
1499 image_map_ids.erase(vector_it);
1500 });
1501 slot_map_views.erase(map_id);
1502 return;
1503 }
1504 boost::container::small_vector<ImageMapId, 8> maps_to_delete;
1505 ForEachSparseSegment(
1506 image, [this, image_id, &maps_to_delete]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr,
1507 size_t size) {
1508 ForEachCPUPage(cpu_addr, size, [this, image_id, &maps_to_delete](u64 page) {
1509 const auto page_it = page_table.find(page);
1510 if (page_it == page_table.end()) {
1511 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1512 return;
1513 }
1514 std::vector<ImageMapId>& image_map_ids = page_it->second;
1515 auto vector_it = image_map_ids.begin();
1516 while (vector_it != image_map_ids.end()) {
1517 ImageMapView& map = slot_map_views[*vector_it];
1518 if (map.image_id != image_id) {
1519 vector_it++;
1520 continue;
1521 }
1522 if (!map.picked) {
1523 maps_to_delete.push_back(*vector_it);
1524 map.picked = true;
1525 }
1526 vector_it = image_map_ids.erase(vector_it);
1527 }
1528 });
1529 });
1530
1531 for (const ImageMapId map_id : maps_to_delete) {
1532 slot_map_views.erase(map_id);
1533 }
1334} 1534}
1335 1535
1336template <class P> 1536template <class P>
1337void TextureCache<P>::TrackImage(ImageBase& image) { 1537void TextureCache<P>::TrackImage(ImageBase& image) {
1338 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 1538 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1339 image.flags |= ImageFlagBits::Tracked; 1539 image.flags |= ImageFlagBits::Tracked;
1340 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 1540 if (!image.is_sparse) {
1541 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1542 return;
1543 }
1544 ForEachSparseSegment(image,
1545 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1546 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1547 });
1341} 1548}
1342 1549
1343template <class P> 1550template <class P>
1344void TextureCache<P>::UntrackImage(ImageBase& image) { 1551void TextureCache<P>::UntrackImage(ImageBase& image) {
1345 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 1552 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1346 image.flags &= ~ImageFlagBits::Tracked; 1553 image.flags &= ~ImageFlagBits::Tracked;
1347 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); 1554 if (!image.is_sparse) {
1555 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1556 return;
1557 }
1558 ForEachSparseSegment(image,
1559 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1560 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1561 });
1348} 1562}
1349 1563
1350template <class P> 1564template <class P>
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index c9571f7e4..9fbdc1ac6 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
16constexpr SlotId CORRUPT_ID{0xfffffffe}; 16constexpr SlotId CORRUPT_ID{0xfffffffe};
17 17
18using ImageId = SlotId; 18using ImageId = SlotId;
19using ImageMapId = SlotId;
19using ImageViewId = SlotId; 20using ImageViewId = SlotId;
20using ImageAllocId = SlotId; 21using ImageAllocId = SlotId;
21using SamplerId = SlotId; 22using SamplerId = SlotId;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 4efe042b6..96bf8f8d9 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
664 return offsets; 664 return offsets;
665} 665}
666 666
667LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
668 const u32 num_levels = info.resources.levels;
669 const LevelInfo level_info = MakeLevelInfo(info);
670 LevelArray sizes{};
671 for (u32 level = 0; level < num_levels; ++level) {
672 sizes[level] = CalculateLevelSize(level_info, level);
673 }
674 return sizes;
675}
676
667std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 677std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
668 ASSERT(info.type == ImageType::e3D); 678 ASSERT(info.type == ImageType::e3D);
669 std::vector<u32> offsets; 679 std::vector<u32> offsets;
@@ -776,14 +786,37 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
776 return copies; 786 return copies;
777} 787}
778 788
779bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { 789bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr) {
780 if (config.Address() == 0) { 790 if (gpu_addr == 0) {
781 return false; 791 return false;
782 } 792 }
783 if (config.Address() > (u64(1) << 48)) { 793 if (gpu_addr > (u64(1) << 48)) {
784 return false; 794 return false;
785 } 795 }
786 return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); 796 const auto cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
797 return cpu_addr.has_value() && *cpu_addr != 0;
798}
799
800bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
801 const GPUVAddr gpu_addr = config.Address();
802 if (IsValidAddress(gpu_memory, gpu_addr)) {
803 return true;
804 }
805 if (!config.IsBlockLinear()) {
806 return false;
807 }
808 const size_t levels = config.max_mip_level + 1;
809 if (levels <= 1) {
810 return false;
811 }
812 const ImageInfo info{config};
813 const LevelArray offsets = CalculateMipLevelOffsets(info);
814 for (size_t level = 1; level < levels; level++) {
815 if (IsValidAddress(gpu_memory, static_cast<GPUVAddr>(gpu_addr + offsets[level]))) {
816 return true;
817 }
818 }
819 return false;
787} 820}
788 821
789std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 822std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index cdc5cbc75..b73361484 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -40,6 +40,8 @@ struct OverlapResult {
40 40
41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; 41[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
42 42
43[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
44
43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 45[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
44 46
45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 47[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
@@ -55,7 +57,9 @@ struct OverlapResult {
55 const ImageInfo& src, 57 const ImageInfo& src,
56 SubresourceBase base); 58 SubresourceBase base);
57 59
58[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 60[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr);
61
62[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
59 63
60[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 64[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
61 GPUVAddr gpu_addr, const ImageInfo& info, 65 GPUVAddr gpu_addr, const ImageInfo& info,