diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 35 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 5 |
4 files changed, 44 insertions, 22 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8e68a2e53..27c82cd20 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -39,6 +39,12 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 39 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | 39 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); |
| 40 | sampler_descriptor.cubemap_anisotropy.Assign(1); | 40 | sampler_descriptor.cubemap_anisotropy.Assign(1); |
| 41 | 41 | ||
| 42 | // These values were chosen based on typical peak swizzle data sizes seen in some titles | ||
| 43 | static constexpr size_t SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 8_MiB; | ||
| 44 | static constexpr size_t UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 1_MiB; | ||
| 45 | swizzle_data_buffer.resize_destructive(SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); | ||
| 46 | unswizzle_data_buffer.resize_destructive(UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY); | ||
| 47 | |||
| 42 | // Make sure the first index is reserved for the null resources | 48 | // Make sure the first index is reserved for the null resources |
| 43 | // This way the null resource becomes a compile time constant | 49 | // This way the null resource becomes a compile time constant |
| 44 | void(slot_images.insert(NullImageParams{})); | 50 | void(slot_images.insert(NullImageParams{})); |
| @@ -90,7 +96,8 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 90 | const auto copies = FullDownloadCopies(image.info); | 96 | const auto copies = FullDownloadCopies(image.info); |
| 91 | image.DownloadMemory(map, copies); | 97 | image.DownloadMemory(map, copies); |
| 92 | runtime.Finish(); | 98 | runtime.Finish(); |
| 93 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 99 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, |
| 100 | swizzle_data_buffer); | ||
| 94 | } | 101 | } |
| 95 | if (True(image.flags & ImageFlagBits::Tracked)) { | 102 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 96 | UntrackImage(image, image_id); | 103 | UntrackImage(image, image_id); |
| @@ -461,7 +468,8 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 461 | const auto copies = FullDownloadCopies(image.info); | 468 | const auto copies = FullDownloadCopies(image.info); |
| 462 | image.DownloadMemory(map, copies); | 469 | image.DownloadMemory(map, copies); |
| 463 | runtime.Finish(); | 470 | runtime.Finish(); |
| 464 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 471 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, |
| 472 | swizzle_data_buffer); | ||
| 465 | } | 473 | } |
| 466 | } | 474 | } |
| 467 | 475 | ||
| @@ -672,7 +680,8 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 672 | for (const ImageId image_id : download_ids) { | 680 | for (const ImageId image_id : download_ids) { |
| 673 | const ImageBase& image = slot_images[image_id]; | 681 | const ImageBase& image = slot_images[image_id]; |
| 674 | const auto copies = FullDownloadCopies(image.info); | 682 | const auto copies = FullDownloadCopies(image.info); |
| 675 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); | 683 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, |
| 684 | swizzle_data_buffer); | ||
| 676 | download_map.offset += image.unswizzled_size_bytes; | 685 | download_map.offset += image.unswizzled_size_bytes; |
| 677 | download_span = download_span.subspan(image.unswizzled_size_bytes); | 686 | download_span = download_span.subspan(image.unswizzled_size_bytes); |
| 678 | } | 687 | } |
| @@ -734,13 +743,21 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 734 | gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | 743 | gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
| 735 | const auto uploads = FullUploadSwizzles(image.info); | 744 | const auto uploads = FullUploadSwizzles(image.info); |
| 736 | runtime.AccelerateImageUpload(image, staging, uploads); | 745 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 737 | } else if (True(image.flags & ImageFlagBits::Converted)) { | 746 | return; |
| 738 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | 747 | } |
| 739 | auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); | 748 | const size_t guest_size_bytes = image.guest_size_bytes; |
| 740 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 749 | swizzle_data_buffer.resize_destructive(guest_size_bytes); |
| 750 | gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | ||
| 751 | |||
| 752 | if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 753 | unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | ||
| 754 | auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, | ||
| 755 | unswizzle_data_buffer); | ||
| 756 | ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); | ||
| 741 | image.UploadMemory(staging, copies); | 757 | image.UploadMemory(staging, copies); |
| 742 | } else { | 758 | } else { |
| 743 | const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); | 759 | const auto copies = |
| 760 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); | ||
| 744 | image.UploadMemory(staging, copies); | 761 | image.UploadMemory(staging, copies); |
| 745 | } | 762 | } |
| 746 | } | 763 | } |
| @@ -910,7 +927,7 @@ void TextureCache<P>::InvalidateScale(Image& image) { | |||
| 910 | } | 927 | } |
| 911 | 928 | ||
| 912 | template <class P> | 929 | template <class P> |
| 913 | u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { | 930 | u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) { |
| 914 | const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * | 931 | const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * |
| 915 | Settings::values.resolution_info.up_scale); | 932 | Settings::values.resolution_info.up_scale); |
| 916 | const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + | 933 | const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 587339a31..4fd677a80 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "common/literals.h" | 17 | #include "common/literals.h" |
| 18 | #include "common/lru_cache.h" | 18 | #include "common/lru_cache.h" |
| 19 | #include "common/polyfill_ranges.h" | 19 | #include "common/polyfill_ranges.h" |
| 20 | #include "common/scratch_buffer.h" | ||
| 20 | #include "video_core/compatible_formats.h" | 21 | #include "video_core/compatible_formats.h" |
| 21 | #include "video_core/control/channel_state_cache.h" | 22 | #include "video_core/control/channel_state_cache.h" |
| 22 | #include "video_core/delayed_destruction_ring.h" | 23 | #include "video_core/delayed_destruction_ring.h" |
| @@ -368,7 +369,7 @@ private: | |||
| 368 | void InvalidateScale(Image& image); | 369 | void InvalidateScale(Image& image); |
| 369 | bool ScaleUp(Image& image); | 370 | bool ScaleUp(Image& image); |
| 370 | bool ScaleDown(Image& image); | 371 | bool ScaleDown(Image& image); |
| 371 | u64 GetScaledImageSizeBytes(ImageBase& image); | 372 | u64 GetScaledImageSizeBytes(const ImageBase& image); |
| 372 | 373 | ||
| 373 | Runtime& runtime; | 374 | Runtime& runtime; |
| 374 | 375 | ||
| @@ -417,6 +418,9 @@ private: | |||
| 417 | 418 | ||
| 418 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | 419 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; |
| 419 | 420 | ||
| 421 | Common::ScratchBuffer<u8> swizzle_data_buffer; | ||
| 422 | Common::ScratchBuffer<u8> unswizzle_data_buffer; | ||
| 423 | |||
| 420 | u64 modification_tick = 0; | 424 | u64 modification_tick = 0; |
| 421 | u64 frame_tick = 0; | 425 | u64 frame_tick = 0; |
| 422 | }; | 426 | }; |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e8c908b42..03acc68d9 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -505,7 +505,7 @@ void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 505 | 505 | ||
| 506 | void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 506 | void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 507 | const ImageInfo& info, const BufferImageCopy& copy, | 507 | const ImageInfo& info, const BufferImageCopy& copy, |
| 508 | std::span<const u8> input) { | 508 | std::span<const u8> input, Common::ScratchBuffer<u8>& tmp_buffer) { |
| 509 | const Extent3D size = info.size; | 509 | const Extent3D size = info.size; |
| 510 | const LevelInfo level_info = MakeLevelInfo(info); | 510 | const LevelInfo level_info = MakeLevelInfo(info); |
| 511 | const Extent2D tile_size = DefaultBlockSize(info.format); | 511 | const Extent2D tile_size = DefaultBlockSize(info.format); |
| @@ -534,8 +534,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 534 | tile_size.height, info.tile_width_spacing); | 534 | tile_size.height, info.tile_width_spacing); |
| 535 | const size_t subresource_size = sizes[level]; | 535 | const size_t subresource_size = sizes[level]; |
| 536 | 536 | ||
| 537 | const auto dst_data = std::make_unique<u8[]>(subresource_size); | 537 | tmp_buffer.resize_destructive(subresource_size); |
| 538 | const std::span<u8> dst(dst_data.get(), subresource_size); | 538 | const std::span<u8> dst(tmp_buffer); |
| 539 | 539 | ||
| 540 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 540 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 541 | const std::span<const u8> src = input.subspan(host_offset); | 541 | const std::span<const u8> src = input.subspan(host_offset); |
| @@ -765,8 +765,9 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||
| 765 | } | 765 | } |
| 766 | 766 | ||
| 767 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 767 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 768 | const ImageInfo& info, std::span<u8> output) { | 768 | const ImageInfo& info, std::span<const u8> input, |
| 769 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); | 769 | std::span<u8> output) { |
| 770 | const size_t guest_size_bytes = input.size_bytes(); | ||
| 770 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 771 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 771 | const Extent3D size = info.size; | 772 | const Extent3D size = info.size; |
| 772 | 773 | ||
| @@ -789,10 +790,6 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||
| 789 | .image_extent = size, | 790 | .image_extent = size, |
| 790 | }}; | 791 | }}; |
| 791 | } | 792 | } |
| 792 | const auto input_data = std::make_unique<u8[]>(guest_size_bytes); | ||
| 793 | gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); | ||
| 794 | const std::span<const u8> input(input_data.get(), guest_size_bytes); | ||
| 795 | |||
| 796 | const LevelInfo level_info = MakeLevelInfo(info); | 793 | const LevelInfo level_info = MakeLevelInfo(info); |
| 797 | const s32 num_layers = info.resources.layers; | 794 | const s32 num_layers = info.resources.layers; |
| 798 | const s32 num_levels = info.resources.levels; | 795 | const s32 num_levels = info.resources.levels; |
| @@ -980,13 +977,14 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | |||
| 980 | } | 977 | } |
| 981 | 978 | ||
| 982 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 979 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 983 | std::span<const BufferImageCopy> copies, std::span<const u8> memory) { | 980 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |
| 981 | Common::ScratchBuffer<u8>& tmp_buffer) { | ||
| 984 | const bool is_pitch_linear = info.type == ImageType::Linear; | 982 | const bool is_pitch_linear = info.type == ImageType::Linear; |
| 985 | for (const BufferImageCopy& copy : copies) { | 983 | for (const BufferImageCopy& copy : copies) { |
| 986 | if (is_pitch_linear) { | 984 | if (is_pitch_linear) { |
| 987 | SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); | 985 | SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); |
| 988 | } else { | 986 | } else { |
| 989 | SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); | 987 | SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory, tmp_buffer); |
| 990 | } | 988 | } |
| 991 | } | 989 | } |
| 992 | } | 990 | } |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 5e28f4ab3..d103db8ae 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <span> | 7 | #include <span> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | ||
| 10 | 11 | ||
| 11 | #include "video_core/surface.h" | 12 | #include "video_core/surface.h" |
| 12 | #include "video_core/texture_cache/image_base.h" | 13 | #include "video_core/texture_cache/image_base.h" |
| @@ -59,6 +60,7 @@ struct OverlapResult { | |||
| 59 | 60 | ||
| 60 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 61 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 61 | GPUVAddr gpu_addr, const ImageInfo& info, | 62 | GPUVAddr gpu_addr, const ImageInfo& info, |
| 63 | std::span<const u8> input, | ||
| 62 | std::span<u8> output); | 64 | std::span<u8> output); |
| 63 | 65 | ||
| 64 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 66 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| @@ -76,7 +78,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 76 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | 78 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); |
| 77 | 79 | ||
| 78 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 80 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 79 | std::span<const BufferImageCopy> copies, std::span<const u8> memory); | 81 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |
| 82 | Common::ScratchBuffer<u8>& tmp_buffer); | ||
| 80 | 83 | ||
| 81 | [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, | 84 | [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, |
| 82 | const ImageInfo& overlap_info, u32 new_level, | 85 | const ImageInfo& overlap_info, u32 new_level, |