summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/texture_cache/image_base.cpp7
-rw-r--r--src/video_core/texture_cache/image_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h141
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h16
4 files changed, 105 insertions, 61 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 91512022f..d79594ce5 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
155 flags &= ~ImageFlagBits::Alias; 155 flags &= ~ImageFlagBits::Alias;
156} 156}
157 157
158void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { 158bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; 159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
160 ASSERT(lhs.info.type == rhs.info.type); 160 ASSERT(lhs.info.type == rhs.info.type);
161 std::optional<SubresourceBase> base; 161 std::optional<SubresourceBase> base;
@@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
169 } 169 }
170 if (!base) { 170 if (!base) {
171 LOG_ERROR(HW_GPU, "Image alias should have been flipped"); 171 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
172 return; 172 return false;
173 } 173 }
174 const PixelFormat lhs_format = lhs.info.format; 174 const PixelFormat lhs_format = lhs.info.format;
175 const PixelFormat rhs_format = rhs.info.format; 175 const PixelFormat rhs_format = rhs.info.format;
@@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
248 } 248 }
249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); 249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
250 if (lhs_alias.copies.empty()) { 250 if (lhs_alias.copies.empty()) {
251 return; 251 return false;
252 } 252 }
253 lhs.aliased_images.push_back(std::move(lhs_alias)); 253 lhs.aliased_images.push_back(std::move(lhs_alias));
254 rhs.aliased_images.push_back(std::move(rhs_alias)); 254 rhs.aliased_images.push_back(std::move(rhs_alias));
255 lhs.flags &= ~ImageFlagBits::IsRescalable; 255 lhs.flags &= ~ImageFlagBits::IsRescalable;
256 rhs.flags &= ~ImageFlagBits::IsRescalable; 256 rhs.flags &= ~ImageFlagBits::IsRescalable;
257 return true;
257} 258}
258 259
259} // namespace VideoCommon 260} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 329396bb6..1b8a17ee8 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -142,6 +142,6 @@ struct ImageAllocBase {
142 std::vector<ImageId> images; 142 std::vector<ImageId> images;
143}; 143};
144 144
145void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); 145bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
146 146
147} // namespace VideoCommon 147} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index fe13cac93..2cf082c5d 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1311,17 +1311,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1311 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 1311 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1312 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1312 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1313 const bool native_bgr = runtime.HasNativeBgr(); 1313 const bool native_bgr = runtime.HasNativeBgr();
1314 boost::container::small_vector<ImageId, 4> overlap_ids; 1314 join_overlap_ids.clear();
1315 std::unordered_set<ImageId> overlaps_found; 1315 join_overlaps_found.clear();
1316 boost::container::small_vector<ImageId, 4> left_aliased_ids; 1316 join_left_aliased_ids.clear();
1317 boost::container::small_vector<ImageId, 4> right_aliased_ids; 1317 join_right_aliased_ids.clear();
1318 std::unordered_set<ImageId> ignore_textures; 1318 join_ignore_textures.clear();
1319 boost::container::small_vector<ImageId, 4> bad_overlap_ids; 1319 join_bad_overlap_ids.clear();
1320 boost::container::small_vector<ImageId, 4> all_siblings; 1320 join_copies_to_do.clear();
1321 join_alias_indices.clear();
1321 const bool this_is_linear = info.type == ImageType::Linear; 1322 const bool this_is_linear = info.type == ImageType::Linear;
1322 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { 1323 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1323 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1324 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1324 ignore_textures.insert(overlap_id); 1325 join_ignore_textures.insert(overlap_id);
1325 return; 1326 return;
1326 } 1327 }
1327 const bool overlap_is_linear = overlap.info.type == ImageType::Linear; 1328 const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
@@ -1331,11 +1332,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1331 if (this_is_linear && overlap_is_linear) { 1332 if (this_is_linear && overlap_is_linear) {
1332 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1333 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1333 // Alias linear images with the same pitch 1334 // Alias linear images with the same pitch
1334 left_aliased_ids.push_back(overlap_id); 1335 join_left_aliased_ids.push_back(overlap_id);
1335 } 1336 }
1336 return; 1337 return;
1337 } 1338 }
1338 overlaps_found.insert(overlap_id); 1339 join_overlaps_found.insert(overlap_id);
1339 static constexpr bool strict_size = true; 1340 static constexpr bool strict_size = true;
1340 const std::optional<OverlapResult> solution = ResolveOverlap( 1341 const std::optional<OverlapResult> solution = ResolveOverlap(
1341 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1342 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1343,33 +1344,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1343 gpu_addr = solution->gpu_addr; 1344 gpu_addr = solution->gpu_addr;
1344 cpu_addr = solution->cpu_addr; 1345 cpu_addr = solution->cpu_addr;
1345 new_info.resources = solution->resources; 1346 new_info.resources = solution->resources;
1346 overlap_ids.push_back(overlap_id); 1347 join_overlap_ids.push_back(overlap_id);
1347 all_siblings.push_back(overlap_id); 1348 join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
1348 return; 1349 return;
1349 } 1350 }
1350 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; 1351 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1351 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 1352 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
1352 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { 1353 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
1353 left_aliased_ids.push_back(overlap_id); 1354 join_left_aliased_ids.push_back(overlap_id);
1354 overlap.flags |= ImageFlagBits::Alias; 1355 overlap.flags |= ImageFlagBits::Alias;
1355 all_siblings.push_back(overlap_id); 1356 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1356 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 1357 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
1357 broken_views, native_bgr)) { 1358 broken_views, native_bgr)) {
1358 right_aliased_ids.push_back(overlap_id); 1359 join_right_aliased_ids.push_back(overlap_id);
1359 overlap.flags |= ImageFlagBits::Alias; 1360 overlap.flags |= ImageFlagBits::Alias;
1360 all_siblings.push_back(overlap_id); 1361 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1361 } else { 1362 } else {
1362 bad_overlap_ids.push_back(overlap_id); 1363 join_bad_overlap_ids.push_back(overlap_id);
1363 } 1364 }
1364 }; 1365 };
1365 ForEachImageInRegion(cpu_addr, size_bytes, region_check); 1366 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1366 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { 1367 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1367 if (!overlaps_found.contains(overlap_id)) { 1368 if (!join_overlaps_found.contains(overlap_id)) {
1368 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1369 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1369 ignore_textures.insert(overlap_id); 1370 join_ignore_textures.insert(overlap_id);
1370 } 1371 }
1371 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { 1372 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
1372 ignore_textures.insert(overlap_id); 1373 join_ignore_textures.insert(overlap_id);
1373 } 1374 }
1374 } 1375 }
1375 }; 1376 };
@@ -1377,11 +1378,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1377 1378
1378 bool can_rescale = info.rescaleable; 1379 bool can_rescale = info.rescaleable;
1379 bool any_rescaled = false; 1380 bool any_rescaled = false;
1380 for (const ImageId sibling_id : all_siblings) { 1381 for (const auto& copy : join_copies_to_do) {
1381 if (!can_rescale) { 1382 if (!can_rescale) {
1382 break; 1383 break;
1383 } 1384 }
1384 Image& sibling = slot_images[sibling_id]; 1385 Image& sibling = slot_images[copy.id];
1385 can_rescale &= ImageCanRescale(sibling); 1386 can_rescale &= ImageCanRescale(sibling);
1386 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); 1387 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
1387 } 1388 }
@@ -1389,13 +1390,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1389 can_rescale &= any_rescaled; 1390 can_rescale &= any_rescaled;
1390 1391
1391 if (can_rescale) { 1392 if (can_rescale) {
1392 for (const ImageId sibling_id : all_siblings) { 1393 for (const auto& copy : join_copies_to_do) {
1393 Image& sibling = slot_images[sibling_id]; 1394 Image& sibling = slot_images[copy.id];
1394 ScaleUp(sibling); 1395 ScaleUp(sibling);
1395 } 1396 }
1396 } else { 1397 } else {
1397 for (const ImageId sibling_id : all_siblings) { 1398 for (const auto& copy : join_copies_to_do) {
1398 Image& sibling = slot_images[sibling_id]; 1399 Image& sibling = slot_images[copy.id];
1399 ScaleDown(sibling); 1400 ScaleDown(sibling);
1400 } 1401 }
1401 } 1402 }
@@ -1407,7 +1408,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1407 new_image.flags |= ImageFlagBits::Sparse; 1408 new_image.flags |= ImageFlagBits::Sparse;
1408 } 1409 }
1409 1410
1410 for (const ImageId overlap_id : ignore_textures) { 1411 for (const ImageId overlap_id : join_ignore_textures) {
1411 Image& overlap = slot_images[overlap_id]; 1412 Image& overlap = slot_images[overlap_id];
1412 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1413 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1413 UNIMPLEMENTED(); 1414 UNIMPLEMENTED();
@@ -1428,14 +1429,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1428 ScaleDown(new_image); 1429 ScaleDown(new_image);
1429 } 1430 }
1430 1431
1431 std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { 1432 std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
1432 const ImageBase& lhs_image = slot_images[lhs]; 1433 const ImageBase& lhs_image = slot_images[lhs.id];
1433 const ImageBase& rhs_image = slot_images[rhs]; 1434 const ImageBase& rhs_image = slot_images[rhs.id];
1434 return lhs_image.modification_tick < rhs_image.modification_tick; 1435 return lhs_image.modification_tick < rhs_image.modification_tick;
1435 }); 1436 });
1436 1437
1437 for (const ImageId overlap_id : overlap_ids) { 1438 ImageBase& new_image_base = new_image;
1438 Image& overlap = slot_images[overlap_id]; 1439 for (const ImageId aliased_id : join_right_aliased_ids) {
1440 ImageBase& aliased = slot_images[aliased_id];
1441 size_t alias_index = new_image_base.aliased_images.size();
1442 if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
1443 continue;
1444 }
1445 join_alias_indices.emplace(aliased_id, alias_index);
1446 new_image.flags |= ImageFlagBits::Alias;
1447 }
1448 for (const ImageId aliased_id : join_left_aliased_ids) {
1449 ImageBase& aliased = slot_images[aliased_id];
1450 size_t alias_index = new_image_base.aliased_images.size();
1451 if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
1452 continue;
1453 }
1454 join_alias_indices.emplace(aliased_id, alias_index);
1455 new_image.flags |= ImageFlagBits::Alias;
1456 }
1457 for (const ImageId aliased_id : join_bad_overlap_ids) {
1458 ImageBase& aliased = slot_images[aliased_id];
1459 aliased.overlapping_images.push_back(new_image_id);
1460 new_image.overlapping_images.push_back(aliased_id);
1461 if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
1462 aliased.overlapping_images.size() > 1) {
1463 aliased.flags |= ImageFlagBits::BadOverlap;
1464 }
1465 if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
1466 new_image.overlapping_images.size() > 1) {
1467 new_image.flags |= ImageFlagBits::BadOverlap;
1468 }
1469 }
1470
1471 for (const auto& copy_object : join_copies_to_do) {
1472 Image& overlap = slot_images[copy_object.id];
1473 if (copy_object.is_alias) {
1474 if (!overlap.IsSafeDownload()) {
1475 continue;
1476 }
1477 const auto alias_pointer = join_alias_indices.find(copy_object.id);
1478 if (alias_pointer == join_alias_indices.end()) {
1479 continue;
1480 }
1481 const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
1482 CopyImage(new_image_id, aliased.id, aliased.copies);
1483 new_image.modification_tick = overlap.modification_tick;
1484 continue;
1485 }
1439 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1486 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1440 new_image.flags |= ImageFlagBits::GpuModified; 1487 new_image.flags |= ImageFlagBits::GpuModified;
1441 const auto& resolution = Settings::values.resolution_info; 1488 const auto& resolution = Settings::values.resolution_info;
@@ -1448,35 +1495,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1448 } else { 1495 } else {
1449 runtime.CopyImage(new_image, overlap, std::move(copies)); 1496 runtime.CopyImage(new_image, overlap, std::move(copies));
1450 } 1497 }
1498 new_image.modification_tick = overlap.modification_tick;
1451 } 1499 }
1452 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1500 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1453 UntrackImage(overlap, overlap_id); 1501 UntrackImage(overlap, copy_object.id);
1454 }
1455 UnregisterImage(overlap_id);
1456 DeleteImage(overlap_id);
1457 }
1458 ImageBase& new_image_base = new_image;
1459 for (const ImageId aliased_id : right_aliased_ids) {
1460 ImageBase& aliased = slot_images[aliased_id];
1461 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1462 new_image.flags |= ImageFlagBits::Alias;
1463 }
1464 for (const ImageId aliased_id : left_aliased_ids) {
1465 ImageBase& aliased = slot_images[aliased_id];
1466 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1467 new_image.flags |= ImageFlagBits::Alias;
1468 }
1469 for (const ImageId aliased_id : bad_overlap_ids) {
1470 ImageBase& aliased = slot_images[aliased_id];
1471 aliased.overlapping_images.push_back(new_image_id);
1472 new_image.overlapping_images.push_back(aliased_id);
1473 if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
1474 aliased.flags |= ImageFlagBits::BadOverlap;
1475 }
1476 if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
1477 new_image.flags |= ImageFlagBits::BadOverlap;
1478 } 1502 }
1503 UnregisterImage(copy_object.id);
1504 DeleteImage(copy_object.id);
1479 } 1505 }
1506
1480 RegisterImage(new_image_id); 1507 RegisterImage(new_image_id);
1481 return new_image_id; 1508 return new_image_id;
1482} 1509}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index cc27286f7..3bfa92154 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -10,7 +10,9 @@
10#include <span> 10#include <span>
11#include <type_traits> 11#include <type_traits>
12#include <unordered_map> 12#include <unordered_map>
13#include <unordered_set>
13#include <vector> 14#include <vector>
15#include <boost/container/small_vector.hpp>
14#include <queue> 16#include <queue>
15 17
16#include "common/common_types.h" 18#include "common/common_types.h"
@@ -476,6 +478,20 @@ private:
476 478
477 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; 479 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
478 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; 480 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
481
482 // Join caching
483 boost::container::small_vector<ImageId, 4> join_overlap_ids;
484 std::unordered_set<ImageId> join_overlaps_found;
485 boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
486 boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
487 std::unordered_set<ImageId> join_ignore_textures;
488 boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
489 struct JoinCopy {
490 bool is_alias;
491 ImageId id;
492 };
493 boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
494 std::unordered_map<ImageId, size_t> join_alias_indices;
479}; 495};
480 496
481} // namespace VideoCommon 497} // namespace VideoCommon