diff options
| author | 2023-06-22 21:53:07 -0700 | |
|---|---|---|
| committer | 2023-06-22 21:53:07 -0700 | |
| commit | 2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8 (patch) | |
| tree | d82f2cf4f7a5e9773616846c095a941b282a84f6 /src/video_core/texture_cache | |
| parent | Merge pull request #10806 from liamwhite/worst-fs-implementation-ever (diff) | |
| parent | Remove memory allocations in some hot paths (diff) | |
| download | yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.gz yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.xz yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.zip | |
Merge pull request #10457 from Kelebek1/optimise
Remove memory allocations in some hot paths
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 14 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 31 |
5 files changed, 55 insertions, 47 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | 10 | ||
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -108,8 +109,8 @@ struct ImageBase { | |||
| 108 | std::vector<ImageViewInfo> image_view_infos; | 109 | std::vector<ImageViewInfo> image_view_infos; |
| 109 | std::vector<ImageViewId> image_view_ids; | 110 | std::vector<ImageViewId> image_view_ids; |
| 110 | 111 | ||
| 111 | std::vector<u32> slice_offsets; | 112 | boost::container::small_vector<u32, 16> slice_offsets; |
| 112 | std::vector<SubresourceBase> slice_subresources; | 113 | boost::container::small_vector<SubresourceBase, 16> slice_subresources; |
| 113 | 114 | ||
| 114 | std::vector<AliasedImage> aliased_images; | 115 | std::vector<AliasedImage> aliased_images; |
| 115 | std::vector<ImageId> overlapping_images; | 116 | std::vector<ImageId> overlapping_images; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d58bb69ff..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 526 | 526 | ||
| 527 | template <class P> | 527 | template <class P> |
| 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 529 | std::vector<ImageId> images; | 529 | boost::container::small_vector<ImageId, 16> images; |
| 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 531 | if (!image.IsSafeDownload()) { | 531 | if (!image.IsSafeDownload()) { |
| 532 | return; | 532 | return; |
| @@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 579 | 579 | ||
| 580 | template <class P> | 580 | template <class P> |
| 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 582 | std::vector<ImageId> deleted_images; | 582 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 584 | for (const ImageId id : deleted_images) { | 584 | for (const ImageId id : deleted_images) { |
| 585 | Image& image = slot_images[id]; | 585 | Image& image = slot_images[id]; |
| @@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 593 | 593 | ||
| 594 | template <class P> | 594 | template <class P> |
| 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { | 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { |
| 596 | std::vector<ImageId> deleted_images; | 596 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, | 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, |
| 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 599 | for (const ImageId id : deleted_images) { | 599 | for (const ImageId id : deleted_images) { |
| @@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1101 | const bool native_bgr = runtime.HasNativeBgr(); | 1101 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1102 | const bool flexible_formats = True(options & RelaxedOptions::Format); | 1102 | const bool flexible_formats = True(options & RelaxedOptions::Format); |
| 1103 | ImageId image_id{}; | 1103 | ImageId image_id{}; |
| 1104 | boost::container::small_vector<ImageId, 1> image_ids; | 1104 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1105 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1105 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1106 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1106 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1107 | return false; | 1107 | return false; |
| @@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) | |||
| 1622 | } | 1622 | } |
| 1623 | } | 1623 | } |
| 1624 | ImageId image_id{}; | 1624 | ImageId image_id{}; |
| 1625 | boost::container::small_vector<ImageId, 1> image_ids; | 1625 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1626 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1626 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1627 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1627 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1628 | return false; | 1628 | return false; |
| @@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1942 | image.map_view_id = map_id; | 1942 | image.map_view_id = map_id; |
| 1943 | return; | 1943 | return; |
| 1944 | } | 1944 | } |
| 1945 | std::vector<ImageViewId> sparse_maps{}; | 1945 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1946 | ForEachSparseSegment( | 1946 | ForEachSparseSegment( |
| 1947 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1947 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1948 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1948 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| @@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | |||
| 2217 | 2217 | ||
| 2218 | template <class P> | 2218 | template <class P> |
| 2219 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | 2219 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 2220 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 2220 | boost::container::small_vector<const AliasedImage*, 8> aliased_images; |
| 2221 | Image& image = slot_images[image_id]; | 2221 | Image& image = slot_images[image_id]; |
| 2222 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | 2222 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); |
| 2223 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); | 2223 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 44232b961..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -56,7 +56,7 @@ struct ImageViewInOut { | |||
| 56 | struct AsyncDecodeContext { | 56 | struct AsyncDecodeContext { |
| 57 | ImageId image_id; | 57 | ImageId image_id; |
| 58 | Common::ScratchBuffer<u8> decoded_data; | 58 | Common::ScratchBuffer<u8> decoded_data; |
| 59 | std::vector<BufferImageCopy> copies; | 59 | boost::container::small_vector<BufferImageCopy, 16> copies; |
| 60 | std::mutex mutex; | 60 | std::mutex mutex; |
| 61 | std::atomic_bool complete; | 61 | std::atomic_bool complete; |
| 62 | }; | 62 | }; |
| @@ -429,7 +429,7 @@ private: | |||
| 429 | 429 | ||
| 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; |
| 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 432 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 432 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 433 | 433 | ||
| 434 | VAddr virtual_invalid_space{}; | 434 | VAddr virtual_invalid_space{}; |
| 435 | 435 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -329,13 +329,13 @@ template <u32 GOB_EXTENT> | |||
| 329 | 329 | ||
| 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( |
| 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { |
| 332 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | 332 | const auto slice_offsets = CalculateSliceOffsets(new_info); |
| 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); |
| 334 | const auto it = std::ranges::find(slice_offsets, diff); | 334 | const auto it = std::ranges::find(slice_offsets, diff); |
| 335 | if (it == slice_offsets.end()) { | 335 | if (it == slice_offsets.end()) { |
| 336 | return std::nullopt; | 336 | return std::nullopt; |
| 337 | } | 337 | } |
| 338 | const std::vector subresources = CalculateSliceSubresources(new_info); | 338 | const auto subresources = CalculateSliceSubresources(new_info); |
| 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; |
| 340 | const ImageInfo& info = overlap.info; | 340 | const ImageInfo& info = overlap.info; |
| 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { |
| @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | |||
| 655 | return sizes; | 655 | return sizes; |
| 656 | } | 656 | } |
| 657 | 657 | ||
| 658 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | 658 | boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { |
| 659 | ASSERT(info.type == ImageType::e3D); | 659 | ASSERT(info.type == ImageType::e3D); |
| 660 | std::vector<u32> offsets; | 660 | boost::container::small_vector<u32, 16> offsets; |
| 661 | offsets.reserve(NumSlices(info)); | 661 | offsets.reserve(NumSlices(info)); |
| 662 | 662 | ||
| 663 | const LevelInfo level_info = MakeLevelInfo(info); | 663 | const LevelInfo level_info = MakeLevelInfo(info); |
| @@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | |||
| 679 | return offsets; | 679 | return offsets; |
| 680 | } | 680 | } |
| 681 | 681 | ||
| 682 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | 682 | boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 683 | const ImageInfo& info) { | ||
| 683 | ASSERT(info.type == ImageType::e3D); | 684 | ASSERT(info.type == ImageType::e3D); |
| 684 | std::vector<SubresourceBase> subresources; | 685 | boost::container::small_vector<SubresourceBase, 16> subresources; |
| 685 | subresources.reserve(NumSlices(info)); | 686 | subresources.reserve(NumSlices(info)); |
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | 687 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 687 | const s32 depth = AdjustMipSize(info.size.depth, level); | 688 | const s32 depth = AdjustMipSize(info.size.depth, level); |
| @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | |||
| 723 | } | 724 | } |
| 724 | } | 725 | } |
| 725 | 726 | ||
| 726 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | 727 | boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, |
| 727 | SubresourceBase base, u32 up_scale, u32 down_shift) { | 728 | const ImageInfo& src, |
| 729 | SubresourceBase base, | ||
| 730 | u32 up_scale, u32 down_shift) { | ||
| 728 | ASSERT(dst.resources.levels >= src.resources.levels); | 731 | ASSERT(dst.resources.levels >= src.resources.levels); |
| 729 | 732 | ||
| 730 | const bool is_dst_3d = dst.type == ImageType::e3D; | 733 | const bool is_dst_3d = dst.type == ImageType::e3D; |
| @@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 733 | ASSERT(src.resources.levels == 1); | 736 | ASSERT(src.resources.levels == 1); |
| 734 | } | 737 | } |
| 735 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; | 738 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; |
| 736 | std::vector<ImageCopy> copies; | 739 | boost::container::small_vector<ImageCopy, 16> copies; |
| 737 | copies.reserve(src.resources.levels); | 740 | copies.reserve(src.resources.levels); |
| 738 | for (s32 level = 0; level < src.resources.levels; ++level) { | 741 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| 739 | ImageCopy& copy = copies.emplace_back(); | 742 | ImageCopy& copy = copies.emplace_back(); |
| @@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 770 | return copies; | 773 | return copies; |
| 771 | } | 774 | } |
| 772 | 775 | ||
| 773 | std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, | 776 | boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, |
| 774 | u32 down_shift) { | 777 | u32 up_scale, |
| 775 | std::vector<ImageCopy> copies; | 778 | u32 down_shift) { |
| 779 | boost::container::small_vector<ImageCopy, 16> copies; | ||
| 776 | copies.reserve(src.resources.levels); | 780 | copies.reserve(src.resources.levels); |
| 777 | const bool is_3d = src.type == ImageType::e3D; | 781 | const bool is_3d = src.type == ImageType::e3D; |
| 778 | for (s32 level = 0; level < src.resources.levels; ++level) { | 782 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||
| 824 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); | 828 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); |
| 825 | } | 829 | } |
| 826 | 830 | ||
| 827 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 831 | boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 828 | const ImageInfo& info, std::span<const u8> input, | 832 | GPUVAddr gpu_addr, |
| 829 | std::span<u8> output) { | 833 | const ImageInfo& info, |
| 834 | std::span<const u8> input, | ||
| 835 | std::span<u8> output) { | ||
| 830 | const size_t guest_size_bytes = input.size_bytes(); | 836 | const size_t guest_size_bytes = input.size_bytes(); |
| 831 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 837 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 832 | const Extent3D size = info.size; | 838 | const Extent3D size = info.size; |
| @@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||
| 861 | info.tile_width_spacing); | 867 | info.tile_width_spacing); |
| 862 | size_t guest_offset = 0; | 868 | size_t guest_offset = 0; |
| 863 | u32 host_offset = 0; | 869 | u32 host_offset = 0; |
| 864 | std::vector<BufferImageCopy> copies(num_levels); | 870 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 865 | 871 | ||
| 866 | for (s32 level = 0; level < num_levels; ++level) { | 872 | for (s32 level = 0; level < num_levels; ++level) { |
| 867 | const Extent3D level_size = AdjustMipSize(size, level); | 873 | const Extent3D level_size = AdjustMipSize(size, level); |
| @@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 978 | } | 984 | } |
| 979 | } | 985 | } |
| 980 | 986 | ||
| 981 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | 987 | boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { |
| 982 | const Extent3D size = info.size; | 988 | const Extent3D size = info.size; |
| 983 | const u32 bytes_per_block = BytesPerBlock(info.format); | 989 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 984 | if (info.type == ImageType::Linear) { | 990 | if (info.type == ImageType::Linear) { |
| @@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | |||
| 1006 | 1012 | ||
| 1007 | u32 host_offset = 0; | 1013 | u32 host_offset = 0; |
| 1008 | 1014 | ||
| 1009 | std::vector<BufferImageCopy> copies(num_levels); | 1015 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 1010 | for (s32 level = 0; level < num_levels; ++level) { | 1016 | for (s32 level = 0; level < num_levels; ++level) { |
| 1011 | const Extent3D level_size = AdjustMipSize(size, level); | 1017 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1012 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | 1018 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); |
| @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | |||
| 1042 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | 1048 | return AdjustMipBlockSize(num_tiles, level_info.block, level); |
| 1043 | } | 1049 | } |
| 1044 | 1050 | ||
| 1045 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | 1051 | boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { |
| 1046 | const Extent2D tile_size = DefaultBlockSize(info.format); | 1052 | const Extent2D tile_size = DefaultBlockSize(info.format); |
| 1047 | if (info.type == ImageType::Linear) { | 1053 | if (info.type == ImageType::Linear) { |
| 1048 | return std::vector{SwizzleParameters{ | 1054 | return {SwizzleParameters{ |
| 1049 | .num_tiles = AdjustTileSize(info.size, tile_size), | 1055 | .num_tiles = AdjustTileSize(info.size, tile_size), |
| 1050 | .block = {}, | 1056 | .block = {}, |
| 1051 | .buffer_offset = 0, | 1057 | .buffer_offset = 0, |
| @@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | |||
| 1057 | const s32 num_levels = info.resources.levels; | 1063 | const s32 num_levels = info.resources.levels; |
| 1058 | 1064 | ||
| 1059 | u32 guest_offset = 0; | 1065 | u32 guest_offset = 0; |
| 1060 | std::vector<SwizzleParameters> params(num_levels); | 1066 | boost::container::small_vector<SwizzleParameters, 16> params(num_levels); |
| 1061 | for (s32 level = 0; level < num_levels; ++level) { | 1067 | for (s32 level = 0; level < num_levels; ++level) { |
| 1062 | const Extent3D level_size = AdjustMipSize(size, level); | 1068 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1063 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | 1069 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| @@ -40,9 +41,10 @@ struct OverlapResult { | |||
| 40 | 41 | ||
| 41 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | 42 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; |
| 42 | 43 | ||
| 43 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | 44 | [[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); |
| 44 | 45 | ||
| 45 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | 46 | [[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 47 | const ImageInfo& info); | ||
| 46 | 48 | ||
| 47 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | 49 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); |
| 48 | 50 | ||
| @@ -51,21 +53,18 @@ struct OverlapResult { | |||
| 51 | 53 | ||
| 52 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | 54 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; |
| 53 | 55 | ||
| 54 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | 56 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( |
| 55 | const ImageInfo& src, | 57 | const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, |
| 56 | SubresourceBase base, u32 up_scale = 1, | 58 | u32 down_shift = 0); |
| 57 | u32 down_shift = 0); | ||
| 58 | 59 | ||
| 59 | [[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, | 60 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( |
| 60 | u32 up_scale = 1, | 61 | const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); |
| 61 | u32 down_shift = 0); | ||
| 62 | 62 | ||
| 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 64 | 64 | ||
| 65 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 65 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( |
| 66 | GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | std::span<u8> output); | ||
| 69 | 68 | ||
| 70 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 71 | const ImageBase& image, std::span<u8> output); | 70 | const ImageBase& image, std::span<u8> output); |
| @@ -73,13 +72,15 @@ struct OverlapResult { | |||
| 73 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 74 | std::span<BufferImageCopy> copies); | 73 | std::span<BufferImageCopy> copies); |
| 75 | 74 | ||
| 76 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | 75 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( |
| 76 | const ImageInfo& info); | ||
| 77 | 77 | ||
| 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); |
| 79 | 79 | ||
| 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); |
| 81 | 81 | ||
| 82 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | 82 | [[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( |
| 83 | const ImageInfo& info); | ||
| 83 | 84 | ||
| 84 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 85 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 85 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, | 86 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |