diff options
| author | 2023-07-10 18:54:19 -0700 | |
|---|---|---|
| committer | 2023-07-10 18:54:19 -0700 | |
| commit | ce7c418e0cc05d92c18ad69c7cb37fecfa71b037 (patch) | |
| tree | ea1852111c1b3c3c340608ae518fc8711a4fcfe3 /src/video_core/texture_cache | |
| parent | Merge pull request #11050 from SuperSamus/sdl-button-labels (diff) | |
| parent | Fix ScratchBuffer moves (diff) | |
| download | yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.tar.gz yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.tar.xz yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.zip | |
Merge pull request #10996 from Kelebek1/readblock_optimisation
Use spans over guest memory where possible instead of copying data
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 24 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 3 |
3 files changed, 21 insertions, 32 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3a859139c..4457b366f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| 11 | #include "core/memory.h" | ||
| 11 | #include "video_core/control/channel_state.h" | 12 | #include "video_core/control/channel_state.h" |
| 12 | #include "video_core/dirty_flags.h" | 13 | #include "video_core/dirty_flags.h" |
| 13 | #include "video_core/engines/kepler_compute.h" | 14 | #include "video_core/engines/kepler_compute.h" |
| @@ -1026,19 +1027,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 1026 | runtime.AccelerateImageUpload(image, staging, uploads); | 1027 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 1027 | return; | 1028 | return; |
| 1028 | } | 1029 | } |
| 1029 | const size_t guest_size_bytes = image.guest_size_bytes; | 1030 | |
| 1030 | swizzle_data_buffer.resize_destructive(guest_size_bytes); | 1031 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1031 | gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | 1032 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1032 | 1033 | ||
| 1033 | if (True(image.flags & ImageFlagBits::Converted)) { | 1034 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 1034 | unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | 1035 | unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1035 | auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, | 1036 | auto copies = |
| 1036 | unswizzle_data_buffer); | 1037 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); |
| 1037 | ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); | 1038 | ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); |
| 1038 | image.UploadMemory(staging, copies); | 1039 | image.UploadMemory(staging, copies); |
| 1039 | } else { | 1040 | } else { |
| 1040 | const auto copies = | 1041 | const auto copies = |
| 1041 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); | 1042 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); |
| 1042 | image.UploadMemory(staging, copies); | 1043 | image.UploadMemory(staging, copies); |
| 1043 | } | 1044 | } |
| 1044 | } | 1045 | } |
| @@ -1231,11 +1232,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | |||
| 1231 | decode->image_id = image_id; | 1232 | decode->image_id = image_id; |
| 1232 | async_decodes.push_back(std::move(decode)); | 1233 | async_decodes.push_back(std::move(decode)); |
| 1233 | 1234 | ||
| 1234 | Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); | 1235 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; |
| 1235 | const size_t guest_size_bytes = image.guest_size_bytes; | 1236 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1236 | swizzle_data_buffer.resize_destructive(guest_size_bytes); | 1237 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1237 | gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | 1238 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1238 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, | 1239 | |
| 1240 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | ||
| 1239 | local_unswizzle_data_buffer); | 1241 | local_unswizzle_data_buffer); |
| 1240 | const size_t out_size = MapSizeBytes(image); | 1242 | const size_t out_size = MapSizeBytes(image); |
| 1241 | 1243 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0de6ed09d..a83f5d41c 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "common/div_ceil.h" | 20 | #include "common/div_ceil.h" |
| 21 | #include "common/scratch_buffer.h" | 21 | #include "common/scratch_buffer.h" |
| 22 | #include "common/settings.h" | 22 | #include "common/settings.h" |
| 23 | #include "core/memory.h" | ||
| 23 | #include "video_core/compatible_formats.h" | 24 | #include "video_core/compatible_formats.h" |
| 24 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | 26 | #include "video_core/memory_manager.h" |
| @@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 544 | tile_size.height, info.tile_width_spacing); | 545 | tile_size.height, info.tile_width_spacing); |
| 545 | const size_t subresource_size = sizes[level]; | 546 | const size_t subresource_size = sizes[level]; |
| 546 | 547 | ||
| 547 | tmp_buffer.resize_destructive(subresource_size); | ||
| 548 | const std::span<u8> dst(tmp_buffer); | ||
| 549 | |||
| 550 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 548 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 551 | const std::span<const u8> src = input.subspan(host_offset); | 549 | const std::span<const u8> src = input.subspan(host_offset); |
| 552 | gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | 550 | { |
| 553 | 551 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | |
| 554 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 552 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); |
| 555 | num_tiles.depth, block.height, block.depth); | ||
| 556 | 553 | ||
| 557 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | 554 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
| 555 | num_tiles.depth, block.height, block.depth); | ||
| 556 | } | ||
| 558 | 557 | ||
| 559 | host_offset += host_bytes_per_layer; | 558 | host_offset += host_bytes_per_layer; |
| 560 | guest_offset += layer_stride; | 559 | guest_offset += layer_stride; |
| @@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 837 | const Extent3D size = info.size; | 836 | const Extent3D size = info.size; |
| 838 | 837 | ||
| 839 | if (info.type == ImageType::Linear) { | 838 | if (info.type == ImageType::Linear) { |
| 839 | ASSERT(output.size_bytes() >= guest_size_bytes); | ||
| 840 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | 840 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); |
| 841 | 841 | ||
| 842 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | 842 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); |
| @@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 904 | return copies; | 904 | return copies; |
| 905 | } | 905 | } |
| 906 | 906 | ||
| 907 | BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 908 | const ImageBase& image, std::span<u8> output) { | ||
| 909 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); | ||
| 910 | return BufferCopy{ | ||
| 911 | .src_offset = 0, | ||
| 912 | .dst_offset = 0, | ||
| 913 | .size = image.guest_size_bytes, | ||
| 914 | }; | ||
| 915 | } | ||
| 916 | |||
| 917 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 907 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 918 | std::span<BufferImageCopy> copies) { | 908 | std::span<BufferImageCopy> copies) { |
| 919 | u32 output_offset = 0; | 909 | u32 output_offset = 0; |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index ab45a43c4..5a0649d24 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -66,9 +66,6 @@ struct OverlapResult { | |||
| 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, std::span<u8> output); | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | 68 | ||
| 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 70 | const ImageBase& image, std::span<u8> output); | ||
| 71 | |||
| 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 69 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 73 | std::span<BufferImageCopy> copies); | 70 | std::span<BufferImageCopy> copies); |
| 74 | 71 | ||