summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/texture_cache/texture_cache.h35
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h6
-rw-r--r--src/video_core/texture_cache/util.cpp20
-rw-r--r--src/video_core/texture_cache/util.h5
4 files changed, 44 insertions, 22 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8e68a2e53..27c82cd20 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -39,6 +39,12 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
39 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); 39 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
40 sampler_descriptor.cubemap_anisotropy.Assign(1); 40 sampler_descriptor.cubemap_anisotropy.Assign(1);
41 41
42 // These values were chosen based on typical peak swizzle data sizes seen in some titles
43 static constexpr size_t SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 8_MiB;
44 static constexpr size_t UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY = 1_MiB;
45 swizzle_data_buffer.resize_destructive(SWIZZLE_DATA_BUFFER_INITIAL_CAPACITY);
46 unswizzle_data_buffer.resize_destructive(UNSWIZZLE_DATA_BUFFER_INITIAL_CAPACITY);
47
42 // Make sure the first index is reserved for the null resources 48 // Make sure the first index is reserved for the null resources
43 // This way the null resource becomes a compile time constant 49 // This way the null resource becomes a compile time constant
44 void(slot_images.insert(NullImageParams{})); 50 void(slot_images.insert(NullImageParams{}));
@@ -90,7 +96,8 @@ void TextureCache<P>::RunGarbageCollector() {
90 const auto copies = FullDownloadCopies(image.info); 96 const auto copies = FullDownloadCopies(image.info);
91 image.DownloadMemory(map, copies); 97 image.DownloadMemory(map, copies);
92 runtime.Finish(); 98 runtime.Finish();
93 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 99 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
100 swizzle_data_buffer);
94 } 101 }
95 if (True(image.flags & ImageFlagBits::Tracked)) { 102 if (True(image.flags & ImageFlagBits::Tracked)) {
96 UntrackImage(image, image_id); 103 UntrackImage(image, image_id);
@@ -461,7 +468,8 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
461 const auto copies = FullDownloadCopies(image.info); 468 const auto copies = FullDownloadCopies(image.info);
462 image.DownloadMemory(map, copies); 469 image.DownloadMemory(map, copies);
463 runtime.Finish(); 470 runtime.Finish();
464 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 471 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
472 swizzle_data_buffer);
465 } 473 }
466} 474}
467 475
@@ -672,7 +680,8 @@ void TextureCache<P>::PopAsyncFlushes() {
672 for (const ImageId image_id : download_ids) { 680 for (const ImageId image_id : download_ids) {
673 const ImageBase& image = slot_images[image_id]; 681 const ImageBase& image = slot_images[image_id];
674 const auto copies = FullDownloadCopies(image.info); 682 const auto copies = FullDownloadCopies(image.info);
675 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); 683 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
684 swizzle_data_buffer);
676 download_map.offset += image.unswizzled_size_bytes; 685 download_map.offset += image.unswizzled_size_bytes;
677 download_span = download_span.subspan(image.unswizzled_size_bytes); 686 download_span = download_span.subspan(image.unswizzled_size_bytes);
678 } 687 }
@@ -734,13 +743,21 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
734 gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 743 gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
735 const auto uploads = FullUploadSwizzles(image.info); 744 const auto uploads = FullUploadSwizzles(image.info);
736 runtime.AccelerateImageUpload(image, staging, uploads); 745 runtime.AccelerateImageUpload(image, staging, uploads);
737 } else if (True(image.flags & ImageFlagBits::Converted)) { 746 return;
738 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 747 }
739 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); 748 const size_t guest_size_bytes = image.guest_size_bytes;
740 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 749 swizzle_data_buffer.resize_destructive(guest_size_bytes);
750 gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
751
752 if (True(image.flags & ImageFlagBits::Converted)) {
753 unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
754 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer,
755 unswizzle_data_buffer);
756 ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
741 image.UploadMemory(staging, copies); 757 image.UploadMemory(staging, copies);
742 } else { 758 } else {
743 const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); 759 const auto copies =
760 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span);
744 image.UploadMemory(staging, copies); 761 image.UploadMemory(staging, copies);
745 } 762 }
746} 763}
@@ -910,7 +927,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
910} 927}
911 928
912template <class P> 929template <class P>
913u64 TextureCache<P>::GetScaledImageSizeBytes(ImageBase& image) { 930u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
914 const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale * 931 const u64 scale_up = static_cast<u64>(Settings::values.resolution_info.up_scale *
915 Settings::values.resolution_info.up_scale); 932 Settings::values.resolution_info.up_scale);
916 const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift + 933 const u64 down_shift = static_cast<u64>(Settings::values.resolution_info.down_shift +
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 587339a31..4fd677a80 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -17,6 +17,7 @@
17#include "common/literals.h" 17#include "common/literals.h"
18#include "common/lru_cache.h" 18#include "common/lru_cache.h"
19#include "common/polyfill_ranges.h" 19#include "common/polyfill_ranges.h"
20#include "common/scratch_buffer.h"
20#include "video_core/compatible_formats.h" 21#include "video_core/compatible_formats.h"
21#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
22#include "video_core/delayed_destruction_ring.h" 23#include "video_core/delayed_destruction_ring.h"
@@ -368,7 +369,7 @@ private:
368 void InvalidateScale(Image& image); 369 void InvalidateScale(Image& image);
369 bool ScaleUp(Image& image); 370 bool ScaleUp(Image& image);
370 bool ScaleDown(Image& image); 371 bool ScaleDown(Image& image);
371 u64 GetScaledImageSizeBytes(ImageBase& image); 372 u64 GetScaledImageSizeBytes(const ImageBase& image);
372 373
373 Runtime& runtime; 374 Runtime& runtime;
374 375
@@ -417,6 +418,9 @@ private:
417 418
418 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; 419 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
419 420
421 Common::ScratchBuffer<u8> swizzle_data_buffer;
422 Common::ScratchBuffer<u8> unswizzle_data_buffer;
423
420 u64 modification_tick = 0; 424 u64 modification_tick = 0;
421 u64 frame_tick = 0; 425 u64 frame_tick = 0;
422}; 426};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index e8c908b42..03acc68d9 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -505,7 +505,7 @@ void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
505 505
506void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 506void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
507 const ImageInfo& info, const BufferImageCopy& copy, 507 const ImageInfo& info, const BufferImageCopy& copy,
508 std::span<const u8> input) { 508 std::span<const u8> input, Common::ScratchBuffer<u8>& tmp_buffer) {
509 const Extent3D size = info.size; 509 const Extent3D size = info.size;
510 const LevelInfo level_info = MakeLevelInfo(info); 510 const LevelInfo level_info = MakeLevelInfo(info);
511 const Extent2D tile_size = DefaultBlockSize(info.format); 511 const Extent2D tile_size = DefaultBlockSize(info.format);
@@ -534,8 +534,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
534 tile_size.height, info.tile_width_spacing); 534 tile_size.height, info.tile_width_spacing);
535 const size_t subresource_size = sizes[level]; 535 const size_t subresource_size = sizes[level];
536 536
537 const auto dst_data = std::make_unique<u8[]>(subresource_size); 537 tmp_buffer.resize_destructive(subresource_size);
538 const std::span<u8> dst(dst_data.get(), subresource_size); 538 const std::span<u8> dst(tmp_buffer);
539 539
540 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 540 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
541 const std::span<const u8> src = input.subspan(host_offset); 541 const std::span<const u8> src = input.subspan(host_offset);
@@ -765,8 +765,9 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
765} 765}
766 766
767std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 767std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
768 const ImageInfo& info, std::span<u8> output) { 768 const ImageInfo& info, std::span<const u8> input,
769 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); 769 std::span<u8> output) {
770 const size_t guest_size_bytes = input.size_bytes();
770 const u32 bpp_log2 = BytesPerBlockLog2(info.format); 771 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
771 const Extent3D size = info.size; 772 const Extent3D size = info.size;
772 773
@@ -789,10 +790,6 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
789 .image_extent = size, 790 .image_extent = size,
790 }}; 791 }};
791 } 792 }
792 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
793 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
794 const std::span<const u8> input(input_data.get(), guest_size_bytes);
795
796 const LevelInfo level_info = MakeLevelInfo(info); 793 const LevelInfo level_info = MakeLevelInfo(info);
797 const s32 num_layers = info.resources.layers; 794 const s32 num_layers = info.resources.layers;
798 const s32 num_levels = info.resources.levels; 795 const s32 num_levels = info.resources.levels;
@@ -980,13 +977,14 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
980} 977}
981 978
982void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 979void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
983 std::span<const BufferImageCopy> copies, std::span<const u8> memory) { 980 std::span<const BufferImageCopy> copies, std::span<const u8> memory,
981 Common::ScratchBuffer<u8>& tmp_buffer) {
984 const bool is_pitch_linear = info.type == ImageType::Linear; 982 const bool is_pitch_linear = info.type == ImageType::Linear;
985 for (const BufferImageCopy& copy : copies) { 983 for (const BufferImageCopy& copy : copies) {
986 if (is_pitch_linear) { 984 if (is_pitch_linear) {
987 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); 985 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
988 } else { 986 } else {
989 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); 987 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory, tmp_buffer);
990 } 988 }
991 } 989 }
992} 990}
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 5e28f4ab3..d103db8ae 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -7,6 +7,7 @@
7#include <span> 7#include <span>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/scratch_buffer.h"
10 11
11#include "video_core/surface.h" 12#include "video_core/surface.h"
12#include "video_core/texture_cache/image_base.h" 13#include "video_core/texture_cache/image_base.h"
@@ -59,6 +60,7 @@ struct OverlapResult {
59 60
60[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 61[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
61 GPUVAddr gpu_addr, const ImageInfo& info, 62 GPUVAddr gpu_addr, const ImageInfo& info,
63 std::span<const u8> input,
62 std::span<u8> output); 64 std::span<u8> output);
63 65
64[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 66[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
@@ -76,7 +78,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
76[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); 78[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
77 79
78void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 80void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
79 std::span<const BufferImageCopy> copies, std::span<const u8> memory); 81 std::span<const BufferImageCopy> copies, std::span<const u8> memory,
82 Common::ScratchBuffer<u8>& tmp_buffer);
80 83
81[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, 84[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
82 const ImageInfo& overlap_info, u32 new_level, 85 const ImageInfo& overlap_info, u32 new_level,