summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-05-23 01:34:46 -0400
committerGravatar Fernando Sahmkow2023-05-24 09:53:42 +0200
commit72c1ee1bf96be45e2794f26ad83dbd13e892c22d (patch)
treedd7fbc88c130471ae9f77049fa9782b33d929d22 /src
parentMerge pull request #10386 from german77/high (diff)
downloadyuzu-72c1ee1bf96be45e2794f26ad83dbd13e892c22d.tar.gz
yuzu-72c1ee1bf96be45e2794f26ad83dbd13e892c22d.tar.xz
yuzu-72c1ee1bf96be45e2794f26ad83dbd13e892c22d.zip
texture_cache: process aliases and overlaps in the correct order
Diffstat (limited to 'src')
-rw-r--r--src/video_core/texture_cache/image_base.cpp7
-rw-r--r--src/video_core/texture_cache/image_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h141
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h16
4 files changed, 105 insertions, 61 deletions
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 91512022f..d79594ce5 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
155 flags &= ~ImageFlagBits::Alias; 155 flags &= ~ImageFlagBits::Alias;
156} 156}
157 157
158void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { 158bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; 159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
160 ASSERT(lhs.info.type == rhs.info.type); 160 ASSERT(lhs.info.type == rhs.info.type);
161 std::optional<SubresourceBase> base; 161 std::optional<SubresourceBase> base;
@@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
169 } 169 }
170 if (!base) { 170 if (!base) {
171 LOG_ERROR(HW_GPU, "Image alias should have been flipped"); 171 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
172 return; 172 return false;
173 } 173 }
174 const PixelFormat lhs_format = lhs.info.format; 174 const PixelFormat lhs_format = lhs.info.format;
175 const PixelFormat rhs_format = rhs.info.format; 175 const PixelFormat rhs_format = rhs.info.format;
@@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
248 } 248 }
249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); 249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
250 if (lhs_alias.copies.empty()) { 250 if (lhs_alias.copies.empty()) {
251 return; 251 return false;
252 } 252 }
253 lhs.aliased_images.push_back(std::move(lhs_alias)); 253 lhs.aliased_images.push_back(std::move(lhs_alias));
254 rhs.aliased_images.push_back(std::move(rhs_alias)); 254 rhs.aliased_images.push_back(std::move(rhs_alias));
255 lhs.flags &= ~ImageFlagBits::IsRescalable; 255 lhs.flags &= ~ImageFlagBits::IsRescalable;
256 rhs.flags &= ~ImageFlagBits::IsRescalable; 256 rhs.flags &= ~ImageFlagBits::IsRescalable;
257 return true;
257} 258}
258 259
259} // namespace VideoCommon 260} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 329396bb6..1b8a17ee8 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -142,6 +142,6 @@ struct ImageAllocBase {
142 std::vector<ImageId> images; 142 std::vector<ImageId> images;
143}; 143};
144 144
145void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); 145bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
146 146
147} // namespace VideoCommon 147} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b24086fce..f5c12d992 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1274,17 +1274,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1274 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 1274 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1275 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1275 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1276 const bool native_bgr = runtime.HasNativeBgr(); 1276 const bool native_bgr = runtime.HasNativeBgr();
1277 boost::container::small_vector<ImageId, 4> overlap_ids; 1277 join_overlap_ids.clear();
1278 std::unordered_set<ImageId> overlaps_found; 1278 join_overlaps_found.clear();
1279 boost::container::small_vector<ImageId, 4> left_aliased_ids; 1279 join_left_aliased_ids.clear();
1280 boost::container::small_vector<ImageId, 4> right_aliased_ids; 1280 join_right_aliased_ids.clear();
1281 std::unordered_set<ImageId> ignore_textures; 1281 join_ignore_textures.clear();
1282 boost::container::small_vector<ImageId, 4> bad_overlap_ids; 1282 join_bad_overlap_ids.clear();
1283 boost::container::small_vector<ImageId, 4> all_siblings; 1283 join_copies_to_do.clear();
1284 join_alias_indices.clear();
1284 const bool this_is_linear = info.type == ImageType::Linear; 1285 const bool this_is_linear = info.type == ImageType::Linear;
1285 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { 1286 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1286 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1287 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1287 ignore_textures.insert(overlap_id); 1288 join_ignore_textures.insert(overlap_id);
1288 return; 1289 return;
1289 } 1290 }
1290 const bool overlap_is_linear = overlap.info.type == ImageType::Linear; 1291 const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
@@ -1294,11 +1295,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1294 if (this_is_linear && overlap_is_linear) { 1295 if (this_is_linear && overlap_is_linear) {
1295 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1296 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1296 // Alias linear images with the same pitch 1297 // Alias linear images with the same pitch
1297 left_aliased_ids.push_back(overlap_id); 1298 join_left_aliased_ids.push_back(overlap_id);
1298 } 1299 }
1299 return; 1300 return;
1300 } 1301 }
1301 overlaps_found.insert(overlap_id); 1302 join_overlaps_found.insert(overlap_id);
1302 static constexpr bool strict_size = true; 1303 static constexpr bool strict_size = true;
1303 const std::optional<OverlapResult> solution = ResolveOverlap( 1304 const std::optional<OverlapResult> solution = ResolveOverlap(
1304 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1305 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1306,33 +1307,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1306 gpu_addr = solution->gpu_addr; 1307 gpu_addr = solution->gpu_addr;
1307 cpu_addr = solution->cpu_addr; 1308 cpu_addr = solution->cpu_addr;
1308 new_info.resources = solution->resources; 1309 new_info.resources = solution->resources;
1309 overlap_ids.push_back(overlap_id); 1310 join_overlap_ids.push_back(overlap_id);
1310 all_siblings.push_back(overlap_id); 1311 join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
1311 return; 1312 return;
1312 } 1313 }
1313 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; 1314 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1314 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 1315 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
1315 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { 1316 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
1316 left_aliased_ids.push_back(overlap_id); 1317 join_left_aliased_ids.push_back(overlap_id);
1317 overlap.flags |= ImageFlagBits::Alias; 1318 overlap.flags |= ImageFlagBits::Alias;
1318 all_siblings.push_back(overlap_id); 1319 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1319 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 1320 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
1320 broken_views, native_bgr)) { 1321 broken_views, native_bgr)) {
1321 right_aliased_ids.push_back(overlap_id); 1322 join_right_aliased_ids.push_back(overlap_id);
1322 overlap.flags |= ImageFlagBits::Alias; 1323 overlap.flags |= ImageFlagBits::Alias;
1323 all_siblings.push_back(overlap_id); 1324 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1324 } else { 1325 } else {
1325 bad_overlap_ids.push_back(overlap_id); 1326 join_bad_overlap_ids.push_back(overlap_id);
1326 } 1327 }
1327 }; 1328 };
1328 ForEachImageInRegion(cpu_addr, size_bytes, region_check); 1329 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1329 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { 1330 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1330 if (!overlaps_found.contains(overlap_id)) { 1331 if (!join_overlaps_found.contains(overlap_id)) {
1331 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1332 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1332 ignore_textures.insert(overlap_id); 1333 join_ignore_textures.insert(overlap_id);
1333 } 1334 }
1334 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { 1335 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
1335 ignore_textures.insert(overlap_id); 1336 join_ignore_textures.insert(overlap_id);
1336 } 1337 }
1337 } 1338 }
1338 }; 1339 };
@@ -1340,11 +1341,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1340 1341
1341 bool can_rescale = info.rescaleable; 1342 bool can_rescale = info.rescaleable;
1342 bool any_rescaled = false; 1343 bool any_rescaled = false;
1343 for (const ImageId sibling_id : all_siblings) { 1344 for (const auto& copy : join_copies_to_do) {
1344 if (!can_rescale) { 1345 if (!can_rescale) {
1345 break; 1346 break;
1346 } 1347 }
1347 Image& sibling = slot_images[sibling_id]; 1348 Image& sibling = slot_images[copy.id];
1348 can_rescale &= ImageCanRescale(sibling); 1349 can_rescale &= ImageCanRescale(sibling);
1349 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); 1350 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
1350 } 1351 }
@@ -1352,13 +1353,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1352 can_rescale &= any_rescaled; 1353 can_rescale &= any_rescaled;
1353 1354
1354 if (can_rescale) { 1355 if (can_rescale) {
1355 for (const ImageId sibling_id : all_siblings) { 1356 for (const auto& copy : join_copies_to_do) {
1356 Image& sibling = slot_images[sibling_id]; 1357 Image& sibling = slot_images[copy.id];
1357 ScaleUp(sibling); 1358 ScaleUp(sibling);
1358 } 1359 }
1359 } else { 1360 } else {
1360 for (const ImageId sibling_id : all_siblings) { 1361 for (const auto& copy : join_copies_to_do) {
1361 Image& sibling = slot_images[sibling_id]; 1362 Image& sibling = slot_images[copy.id];
1362 ScaleDown(sibling); 1363 ScaleDown(sibling);
1363 } 1364 }
1364 } 1365 }
@@ -1370,7 +1371,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1370 new_image.flags |= ImageFlagBits::Sparse; 1371 new_image.flags |= ImageFlagBits::Sparse;
1371 } 1372 }
1372 1373
1373 for (const ImageId overlap_id : ignore_textures) { 1374 for (const ImageId overlap_id : join_ignore_textures) {
1374 Image& overlap = slot_images[overlap_id]; 1375 Image& overlap = slot_images[overlap_id];
1375 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1376 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1376 UNIMPLEMENTED(); 1377 UNIMPLEMENTED();
@@ -1391,14 +1392,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1391 ScaleDown(new_image); 1392 ScaleDown(new_image);
1392 } 1393 }
1393 1394
1394 std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { 1395 std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
1395 const ImageBase& lhs_image = slot_images[lhs]; 1396 const ImageBase& lhs_image = slot_images[lhs.id];
1396 const ImageBase& rhs_image = slot_images[rhs]; 1397 const ImageBase& rhs_image = slot_images[rhs.id];
1397 return lhs_image.modification_tick < rhs_image.modification_tick; 1398 return lhs_image.modification_tick < rhs_image.modification_tick;
1398 }); 1399 });
1399 1400
1400 for (const ImageId overlap_id : overlap_ids) { 1401 ImageBase& new_image_base = new_image;
1401 Image& overlap = slot_images[overlap_id]; 1402 for (const ImageId aliased_id : join_right_aliased_ids) {
1403 ImageBase& aliased = slot_images[aliased_id];
1404 size_t alias_index = new_image_base.aliased_images.size();
1405 if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
1406 continue;
1407 }
1408 join_alias_indices.emplace(aliased_id, alias_index);
1409 new_image.flags |= ImageFlagBits::Alias;
1410 }
1411 for (const ImageId aliased_id : join_left_aliased_ids) {
1412 ImageBase& aliased = slot_images[aliased_id];
1413 size_t alias_index = new_image_base.aliased_images.size();
1414 if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
1415 continue;
1416 }
1417 join_alias_indices.emplace(aliased_id, alias_index);
1418 new_image.flags |= ImageFlagBits::Alias;
1419 }
1420 for (const ImageId aliased_id : join_bad_overlap_ids) {
1421 ImageBase& aliased = slot_images[aliased_id];
1422 aliased.overlapping_images.push_back(new_image_id);
1423 new_image.overlapping_images.push_back(aliased_id);
1424 if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
1425 aliased.overlapping_images.size() > 1) {
1426 aliased.flags |= ImageFlagBits::BadOverlap;
1427 }
1428 if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
1429 new_image.overlapping_images.size() > 1) {
1430 new_image.flags |= ImageFlagBits::BadOverlap;
1431 }
1432 }
1433
1434 for (const auto& copy_object : join_copies_to_do) {
1435 Image& overlap = slot_images[copy_object.id];
1436 if (copy_object.is_alias) {
1437 if (!overlap.IsSafeDownload()) {
1438 continue;
1439 }
1440 const auto alias_pointer = join_alias_indices.find(copy_object.id);
1441 if (alias_pointer == join_alias_indices.end()) {
1442 continue;
1443 }
1444 const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
1445 CopyImage(new_image_id, aliased.id, aliased.copies);
1446 new_image.modification_tick = overlap.modification_tick;
1447 continue;
1448 }
1402 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1449 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1403 new_image.flags |= ImageFlagBits::GpuModified; 1450 new_image.flags |= ImageFlagBits::GpuModified;
1404 const auto& resolution = Settings::values.resolution_info; 1451 const auto& resolution = Settings::values.resolution_info;
@@ -1411,35 +1458,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1411 } else { 1458 } else {
1412 runtime.CopyImage(new_image, overlap, std::move(copies)); 1459 runtime.CopyImage(new_image, overlap, std::move(copies));
1413 } 1460 }
1461 new_image.modification_tick = overlap.modification_tick;
1414 } 1462 }
1415 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1463 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1416 UntrackImage(overlap, overlap_id); 1464 UntrackImage(overlap, copy_object.id);
1417 }
1418 UnregisterImage(overlap_id);
1419 DeleteImage(overlap_id);
1420 }
1421 ImageBase& new_image_base = new_image;
1422 for (const ImageId aliased_id : right_aliased_ids) {
1423 ImageBase& aliased = slot_images[aliased_id];
1424 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1425 new_image.flags |= ImageFlagBits::Alias;
1426 }
1427 for (const ImageId aliased_id : left_aliased_ids) {
1428 ImageBase& aliased = slot_images[aliased_id];
1429 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1430 new_image.flags |= ImageFlagBits::Alias;
1431 }
1432 for (const ImageId aliased_id : bad_overlap_ids) {
1433 ImageBase& aliased = slot_images[aliased_id];
1434 aliased.overlapping_images.push_back(new_image_id);
1435 new_image.overlapping_images.push_back(aliased_id);
1436 if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
1437 aliased.flags |= ImageFlagBits::BadOverlap;
1438 }
1439 if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
1440 new_image.flags |= ImageFlagBits::BadOverlap;
1441 } 1465 }
1466 UnregisterImage(copy_object.id);
1467 DeleteImage(copy_object.id);
1442 } 1468 }
1469
1443 RegisterImage(new_image_id); 1470 RegisterImage(new_image_id);
1444 return new_image_id; 1471 return new_image_id;
1445} 1472}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 0720494e5..9e5289977 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -10,7 +10,9 @@
10#include <span> 10#include <span>
11#include <type_traits> 11#include <type_traits>
12#include <unordered_map> 12#include <unordered_map>
13#include <unordered_set>
13#include <vector> 14#include <vector>
15#include <boost/container/small_vector.hpp>
14#include <queue> 16#include <queue>
15 17
16#include "common/common_types.h" 18#include "common/common_types.h"
@@ -474,6 +476,20 @@ private:
474 476
475 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; 477 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
476 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; 478 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
479
480 // Join caching
481 boost::container::small_vector<ImageId, 4> join_overlap_ids;
482 std::unordered_set<ImageId> join_overlaps_found;
483 boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
484 boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
485 std::unordered_set<ImageId> join_ignore_textures;
486 boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
487 struct JoinCopy {
488 bool is_alias;
489 ImageId id;
490 };
491 boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
492 std::unordered_map<ImageId, size_t> join_alias_indices;
477}; 493};
478 494
479} // namespace VideoCommon 495} // namespace VideoCommon