summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar Kelebek12023-05-29 00:35:51 +0100
committerGravatar Kelebek12023-07-02 23:09:48 +0100
commit6f7cb69c94bef0795f054d881e061745f69d1eda (patch)
treecc0bec2fed92a5645886dde773add00c84d8b9f4 /src/video_core/texture_cache
parentMerge pull request #10998 from Morph1984/qt-stop-messing-with-me (diff)
downloadyuzu-6f7cb69c94bef0795f054d881e061745f69d1eda.tar.gz
yuzu-6f7cb69c94bef0795f054d881e061745f69d1eda.tar.xz
yuzu-6f7cb69c94bef0795f054d881e061745f69d1eda.zip
Use spans over guest memory where possible instead of copying data.
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/texture_cache.h24
-rw-r--r--src/video_core/texture_cache/util.cpp26
-rw-r--r--src/video_core/texture_cache/util.h3
3 files changed, 21 insertions, 32 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 79f158db4..a1457798a 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -8,6 +8,7 @@
8 8
9#include "common/alignment.h" 9#include "common/alignment.h"
10#include "common/settings.h" 10#include "common/settings.h"
11#include "core/memory.h"
11#include "video_core/control/channel_state.h" 12#include "video_core/control/channel_state.h"
12#include "video_core/dirty_flags.h" 13#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h" 14#include "video_core/engines/kepler_compute.h"
@@ -1022,19 +1023,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
1022 runtime.AccelerateImageUpload(image, staging, uploads); 1023 runtime.AccelerateImageUpload(image, staging, uploads);
1023 return; 1024 return;
1024 } 1025 }
1025 const size_t guest_size_bytes = image.guest_size_bytes; 1026
1026 swizzle_data_buffer.resize_destructive(guest_size_bytes); 1027 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1027 gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); 1028 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1028 1029
1029 if (True(image.flags & ImageFlagBits::Converted)) { 1030 if (True(image.flags & ImageFlagBits::Converted)) {
1030 unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); 1031 unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1031 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, 1032 auto copies =
1032 unswizzle_data_buffer); 1033 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
1033 ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); 1034 ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
1034 image.UploadMemory(staging, copies); 1035 image.UploadMemory(staging, copies);
1035 } else { 1036 } else {
1036 const auto copies = 1037 const auto copies =
1037 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); 1038 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
1038 image.UploadMemory(staging, copies); 1039 image.UploadMemory(staging, copies);
1039 } 1040 }
1040} 1041}
@@ -1227,11 +1228,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
1227 decode->image_id = image_id; 1228 decode->image_id = image_id;
1228 async_decodes.push_back(std::move(decode)); 1229 async_decodes.push_back(std::move(decode));
1229 1230
1230 Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); 1231 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
1231 const size_t guest_size_bytes = image.guest_size_bytes; 1232 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1232 swizzle_data_buffer.resize_destructive(guest_size_bytes); 1233 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1233 gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); 1234 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1234 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, 1235
1236 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
1235 local_unswizzle_data_buffer); 1237 local_unswizzle_data_buffer);
1236 const size_t out_size = MapSizeBytes(image); 1238 const size_t out_size = MapSizeBytes(image);
1237 1239
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 0de6ed09d..a83f5d41c 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -20,6 +20,7 @@
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h" 21#include "common/scratch_buffer.h"
22#include "common/settings.h" 22#include "common/settings.h"
23#include "core/memory.h"
23#include "video_core/compatible_formats.h" 24#include "video_core/compatible_formats.h"
24#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
@@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
544 tile_size.height, info.tile_width_spacing); 545 tile_size.height, info.tile_width_spacing);
545 const size_t subresource_size = sizes[level]; 546 const size_t subresource_size = sizes[level];
546 547
547 tmp_buffer.resize_destructive(subresource_size);
548 const std::span<u8> dst(tmp_buffer);
549
550 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 548 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
551 const std::span<const u8> src = input.subspan(host_offset); 549 const std::span<const u8> src = input.subspan(host_offset);
552 gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); 550 {
553 551 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite>
554 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 552 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
555 num_tiles.depth, block.height, block.depth);
556 553
557 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); 554 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
555 num_tiles.depth, block.height, block.depth);
556 }
558 557
559 host_offset += host_bytes_per_layer; 558 host_offset += host_bytes_per_layer;
560 guest_offset += layer_stride; 559 guest_offset += layer_stride;
@@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
837 const Extent3D size = info.size; 836 const Extent3D size = info.size;
838 837
839 if (info.type == ImageType::Linear) { 838 if (info.type == ImageType::Linear) {
839 ASSERT(output.size_bytes() >= guest_size_bytes);
840 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); 840 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
841 841
842 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); 842 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
@@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
904 return copies; 904 return copies;
905} 905}
906 906
907BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
908 const ImageBase& image, std::span<u8> output) {
909 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
910 return BufferCopy{
911 .src_offset = 0,
912 .dst_offset = 0,
913 .size = image.guest_size_bytes,
914 };
915}
916
917void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 907void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
918 std::span<BufferImageCopy> copies) { 908 std::span<BufferImageCopy> copies) {
919 u32 output_offset = 0; 909 u32 output_offset = 0;
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index ab45a43c4..5a0649d24 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -66,9 +66,6 @@ struct OverlapResult {
66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
67 std::span<const u8> input, std::span<u8> output); 67 std::span<const u8> input, std::span<u8> output);
68 68
69[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
70 const ImageBase& image, std::span<u8> output);
71
72void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 69void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
73 std::span<BufferImageCopy> copies); 70 std::span<BufferImageCopy> copies);
74 71