diff options
| author | 2021-01-16 16:20:18 -0300 | |
|---|---|---|
| committer | 2021-02-13 02:17:24 -0300 | |
| commit | 35df1d1864ba721ea7b1cebf9a106dd771cde4f5 (patch) | |
| tree | 034a8281294246e2a8eea92d1937607ad00ed428 /src/video_core/texture_cache | |
| parent | vulkan_device: Enable robustBufferAccess (diff) | |
| download | yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.gz yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.xz yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.zip | |
vk_staging_buffer_pool: Add stream buffer for small uploads
This uses a ring buffer similar to OpenGL's stream buffer for small
uploads. This stops us from allocating several small buffers, reducing
memory fragmentation and cache locality.
It uses dedicated allocations when possible.
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f336b705f..b1da69971 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -212,7 +212,7 @@ private: | |||
| 212 | 212 | ||
| 213 | /// Upload data from guest to an image | 213 | /// Upload data from guest to an image |
| 214 | template <typename StagingBuffer> | 214 | template <typename StagingBuffer> |
| 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); | 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer); |
| 216 | 216 | ||
| 217 | /// Find or create an image view from a guest descriptor | 217 | /// Find or create an image view from a guest descriptor |
| 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| @@ -592,7 +592,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 592 | Image& image = slot_images[image_id]; | 592 | Image& image = slot_images[image_id]; |
| 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 594 | const auto copies = FullDownloadCopies(image.info); | 594 | const auto copies = FullDownloadCopies(image.info); |
| 595 | image.DownloadMemory(map, 0, copies); | 595 | image.DownloadMemory(map, copies); |
| 596 | runtime.Finish(); | 596 | runtime.Finish(); |
| 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 598 | } | 598 | } |
| @@ -750,24 +750,24 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; |
| 751 | } | 751 | } |
| 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 753 | size_t buffer_offset = 0; | 753 | const size_t original_offset = download_map.offset; |
| 754 | for (const ImageId image_id : download_ids) { | 754 | for (const ImageId image_id : download_ids) { |
| 755 | Image& image = slot_images[image_id]; | 755 | Image& image = slot_images[image_id]; |
| 756 | const auto copies = FullDownloadCopies(image.info); | 756 | const auto copies = FullDownloadCopies(image.info); |
| 757 | image.DownloadMemory(download_map, buffer_offset, copies); | 757 | image.DownloadMemory(download_map, copies); |
| 758 | buffer_offset += image.unswizzled_size_bytes; | 758 | download_map.offset += image.unswizzled_size_bytes; |
| 759 | } | 759 | } |
| 760 | // Wait for downloads to finish | 760 | // Wait for downloads to finish |
| 761 | runtime.Finish(); | 761 | runtime.Finish(); |
| 762 | 762 | ||
| 763 | buffer_offset = 0; | 763 | download_map.offset = original_offset; |
| 764 | const std::span<u8> download_span = download_map.mapped_span; | 764 | std::span<u8> download_span = download_map.mapped_span; |
| 765 | for (const ImageId image_id : download_ids) { | 765 | for (const ImageId image_id : download_ids) { |
| 766 | const ImageBase& image = slot_images[image_id]; | 766 | const ImageBase& image = slot_images[image_id]; |
| 767 | const auto copies = FullDownloadCopies(image.info); | 767 | const auto copies = FullDownloadCopies(image.info); |
| 768 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | 768 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); |
| 769 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | 769 | download_map.offset += image.unswizzled_size_bytes; |
| 770 | buffer_offset += image.unswizzled_size_bytes; | 770 | download_span = download_span.subspan(image.unswizzled_size_bytes); |
| 771 | } | 771 | } |
| 772 | committed_downloads.pop(); | 772 | committed_downloads.pop(); |
| 773 | } | 773 | } |
| @@ -798,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
| 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 799 | return; | 799 | return; |
| 800 | } | 800 | } |
| 801 | auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); | 801 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); |
| 802 | UploadImageContents(image, map, 0); | 802 | UploadImageContents(image, staging); |
| 803 | runtime.InsertUploadMemoryBarrier(); | 803 | runtime.InsertUploadMemoryBarrier(); |
| 804 | } | 804 | } |
| 805 | 805 | ||
| 806 | template <class P> | 806 | template <class P> |
| 807 | template <typename MapBuffer> | 807 | template <typename StagingBuffer> |
| 808 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | 808 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { |
| 809 | const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset); | 809 | const std::span<u8> mapped_span = staging.mapped_span; |
| 810 | const GPUVAddr gpu_addr = image.gpu_addr; | 810 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 811 | 811 | ||
| 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
| 813 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | 813 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
| 814 | const auto uploads = FullUploadSwizzles(image.info); | 814 | const auto uploads = FullUploadSwizzles(image.info); |
| 815 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | 815 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 816 | } else if (True(image.flags & ImageFlagBits::Converted)) { | 816 | } else if (True(image.flags & ImageFlagBits::Converted)) { |
| 817 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | 817 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |
| 818 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | 818 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); |
| 819 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 819 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
| 820 | image.UploadMemory(map, buffer_offset, copies); | 820 | image.UploadMemory(staging, copies); |
| 821 | } else if (image.info.type == ImageType::Buffer) { | 821 | } else if (image.info.type == ImageType::Buffer) { |
| 822 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | 822 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; |
| 823 | image.UploadMemory(map, buffer_offset, copies); | 823 | image.UploadMemory(staging, copies); |
| 824 | } else { | 824 | } else { |
| 825 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | 825 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); |
| 826 | image.UploadMemory(map, buffer_offset, copies); | 826 | image.UploadMemory(staging, copies); |
| 827 | } | 827 | } |
| 828 | } | 828 | } |
| 829 | 829 | ||