summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-04-14 18:07:38 +0200
committerGravatar Fernando Sahmkow2023-04-29 00:18:21 +0200
commite3a2ca96bd2350471ebb6c2907c67b10254a4f7e (patch)
tree5238364cdea97449adb0766df0d6263123ed06da /src/video_core/texture_cache
parentTextureCache: refactor DMA downloads to allow multiple buffers. (diff)
downloadyuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.gz
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.xz
yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.zip
Accelerate DMA: Use texture cache async downloads to perform the copies
to host. WIP
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/texture_cache.h118
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h23
2 files changed, 103 insertions, 38 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2cd5aa31e..63b8b5af5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -661,27 +661,40 @@ template <class P>
661void TextureCache<P>::CommitAsyncFlushes() { 661void TextureCache<P>::CommitAsyncFlushes() {
662 // This is intentionally passing the value by copy 662 // This is intentionally passing the value by copy
663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
664 const std::span<const ImageId> download_ids = uncommitted_downloads; 664 auto& download_ids = uncommitted_downloads;
665 if (download_ids.empty()) { 665 if (download_ids.empty()) {
666 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 666 committed_downloads.emplace_back(std::move(uncommitted_downloads));
667 uncommitted_downloads.clear(); 667 uncommitted_downloads.clear();
668 async_buffers.emplace_back(std::optional<AsyncBuffer>{}); 668 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
669 uncommitted_async_buffers.clear();
669 return; 670 return;
670 } 671 }
671 size_t total_size_bytes = 0; 672 size_t total_size_bytes = 0;
672 for (const ImageId image_id : download_ids) { 673 size_t last_async_buffer_id = uncommitted_async_buffers.size();
673 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 674 bool any_none_dma = false;
675 for (PendingDownload& download_info : download_ids) {
676 if (download_info.is_swizzle) {
677 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
678 any_none_dma = true;
679 download_info.async_buffer_id = last_async_buffer_id;
680 }
674 } 681 }
675 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); 682 if (any_none_dma) {
676 for (const ImageId image_id : download_ids) { 683 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
677 Image& image = slot_images[image_id]; 684 for (const PendingDownload& download_info : download_ids) {
678 const auto copies = FullDownloadCopies(image.info); 685 if (download_info.is_swizzle) {
679 image.DownloadMemory(download_map, copies); 686 Image& image = slot_images[download_info.object_id];
680 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); 687 const auto copies = FullDownloadCopies(image.info);
688 image.DownloadMemory(download_map, copies);
689 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
690 }
691 }
692 uncommitted_async_buffers.emplace_back(download_map);
681 } 693 }
682 async_buffers.emplace_back(download_map);
683 } 694 }
684 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 695 committed_downloads.emplace_back(std::move(uncommitted_downloads));
696 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
697 uncommitted_async_buffers.clear();
685 uncommitted_downloads.clear(); 698 uncommitted_downloads.clear();
686} 699}
687 700
@@ -691,39 +704,57 @@ void TextureCache<P>::PopAsyncFlushes() {
691 return; 704 return;
692 } 705 }
693 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 706 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
694 const std::span<const ImageId> download_ids = committed_downloads.front(); 707 const auto& download_ids = committed_downloads.front();
695 if (download_ids.empty()) { 708 if (download_ids.empty()) {
696 committed_downloads.pop_front(); 709 committed_downloads.pop_front();
697 async_buffers.pop_front(); 710 async_buffers.pop_front();
698 return; 711 return;
699 } 712 }
700 auto download_map = *async_buffers.front(); 713 auto download_map = std::move(async_buffers.front());
701 std::span<u8> download_span = download_map.mapped_span;
702 for (size_t i = download_ids.size(); i > 0; i--) { 714 for (size_t i = download_ids.size(); i > 0; i--) {
703 const ImageBase& image = slot_images[download_ids[i - 1]]; 715 auto& download_info = download_ids[i - 1];
704 const auto copies = FullDownloadCopies(image.info); 716 auto& download_buffer = download_map[download_info.async_buffer_id];
705 download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); 717 if (download_info.is_swizzle) {
706 std::span<u8> download_span_alt = download_span.subspan(download_map.offset); 718 const ImageBase& image = slot_images[download_info.object_id];
707 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, 719 const auto copies = FullDownloadCopies(image.info);
708 swizzle_data_buffer); 720 download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
721 std::span<u8> download_span =
722 download_buffer.mapped_span.subspan(download_buffer.offset);
723 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
724 swizzle_data_buffer);
725 } else {
726 const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
727 std::span<u8> download_span =
728 download_buffer.mapped_span.subspan(download_buffer.offset);
729 gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
730 buffer_info.size);
731 slot_buffer_downloads.erase(download_info.object_id);
732 }
733 }
734 for (auto& download_buffer : download_map) {
735 runtime.FreeDeferredStagingBuffer(download_buffer);
709 } 736 }
710 runtime.FreeDeferredStagingBuffer(download_map);
711 committed_downloads.pop_front(); 737 committed_downloads.pop_front();
712 async_buffers.pop_front(); 738 async_buffers.pop_front();
713 } else { 739 } else {
714 const std::span<const ImageId> download_ids = committed_downloads.front(); 740 const auto& download_ids = committed_downloads.front();
715 if (download_ids.empty()) { 741 if (download_ids.empty()) {
716 committed_downloads.pop_front(); 742 committed_downloads.pop_front();
717 return; 743 return;
718 } 744 }
719 size_t total_size_bytes = 0; 745 size_t total_size_bytes = 0;
720 for (const ImageId image_id : download_ids) { 746 for (const PendingDownload& download_info : download_ids) {
721 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 747 if (download_info.is_swizzle) {
748 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
749 }
722 } 750 }
723 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); 751 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
724 const size_t original_offset = download_map.offset; 752 const size_t original_offset = download_map.offset;
725 for (const ImageId image_id : download_ids) { 753 for (const PendingDownload& download_info : download_ids) {
726 Image& image = slot_images[image_id]; 754 if (download_info.is_swizzle) {
755 continue;
756 }
757 Image& image = slot_images[download_info.object_id];
727 const auto copies = FullDownloadCopies(image.info); 758 const auto copies = FullDownloadCopies(image.info);
728 image.DownloadMemory(download_map, copies); 759 image.DownloadMemory(download_map, copies);
729 download_map.offset += image.unswizzled_size_bytes; 760 download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +763,11 @@ void TextureCache<P>::PopAsyncFlushes() {
732 runtime.Finish(); 763 runtime.Finish();
733 download_map.offset = original_offset; 764 download_map.offset = original_offset;
734 std::span<u8> download_span = download_map.mapped_span; 765 std::span<u8> download_span = download_map.mapped_span;
735 for (const ImageId image_id : download_ids) { 766 for (const PendingDownload& download_info : download_ids) {
736 const ImageBase& image = slot_images[image_id]; 767 if (download_info.is_swizzle) {
768 continue;
769 }
770 const ImageBase& image = slot_images[download_info.object_id];
737 const auto copies = FullDownloadCopies(image.info); 771 const auto copies = FullDownloadCopies(image.info);
738 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, 772 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
739 swizzle_data_buffer); 773 swizzle_data_buffer);
@@ -836,11 +870,27 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
836template <class P> 870template <class P>
837void TextureCache<P>::DownloadImageIntoBuffer( 871void TextureCache<P>::DownloadImageIntoBuffer(
838 typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer, 872 typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer,
839 size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) { 873 size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies, GPUVAddr address, size_t size) {
840 std::array buffers{ 874 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
841 buffer, 875 auto slot = slot_buffer_downloads.insert(address, size);
842 }; 876 uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot);
843 image->DownloadMemory(buffers, buffer_offset, copies); 877 auto download_map = runtime.DownloadStagingBuffer(size, true);
878 uncommitted_async_buffers.emplace_back(download_map);
879 std::array buffers{
880 buffer,
881 download_map.buffer,
882 };
883 std::array buffer_offsets{
884 buffer_offset,
885 download_map.offset,
886 };
887 image->DownloadMemory(buffers, buffer_offsets, copies);
888 } else {
889 std::array buffers{
890 buffer,
891 };
892 image->DownloadMemory(buffers, buffer_offset, copies);
893 }
844} 894}
845 895
846template <class P> 896template <class P>
@@ -2219,7 +2269,7 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
2219 if (new_id) { 2269 if (new_id) {
2220 const ImageViewBase& old_view = slot_image_views[new_id]; 2270 const ImageViewBase& old_view = slot_image_views[new_id];
2221 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { 2271 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
2222 uncommitted_downloads.push_back(old_view.image_id); 2272 uncommitted_downloads.emplace_back(true, 0, old_view.image_id);
2223 } 2273 }
2224 } 2274 }
2225 *old_id = new_id; 2275 *old_id = new_id;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 51f44aed5..d5bba3379 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -217,7 +217,8 @@ public:
217 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); 217 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
218 218
219 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, 219 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
220 std::span<const VideoCommon::BufferImageCopy> copies); 220 std::span<const VideoCommon::BufferImageCopy> copies,
221 GPUVAddr address = 0, size_t size = 0);
221 222
222 /// Return true when a CPU region is modified from the GPU 223 /// Return true when a CPU region is modified from the GPU
223 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 224 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
@@ -428,17 +429,31 @@ private:
428 u64 critical_memory; 429 u64 critical_memory;
429 size_t critical_gc; 430 size_t critical_gc;
430 431
432 struct BufferDownload {
433 GPUVAddr address;
434 size_t size;
435 };
436
437 struct PendingDownload {
438 bool is_swizzle;
439 size_t async_buffer_id;
440 SlotId object_id;
441 };
442
431 SlotVector<Image> slot_images; 443 SlotVector<Image> slot_images;
432 SlotVector<ImageMapView> slot_map_views; 444 SlotVector<ImageMapView> slot_map_views;
433 SlotVector<ImageView> slot_image_views; 445 SlotVector<ImageView> slot_image_views;
434 SlotVector<ImageAlloc> slot_image_allocs; 446 SlotVector<ImageAlloc> slot_image_allocs;
435 SlotVector<Sampler> slot_samplers; 447 SlotVector<Sampler> slot_samplers;
436 SlotVector<Framebuffer> slot_framebuffers; 448 SlotVector<Framebuffer> slot_framebuffers;
449 SlotVector<BufferDownload> slot_buffer_downloads;
437 450
438 // TODO: This data structure is not optimal and it should be reworked 451 // TODO: This data structure is not optimal and it should be reworked
439 std::vector<ImageId> uncommitted_downloads; 452
440 std::deque<std::vector<ImageId>> committed_downloads; 453 std::vector<PendingDownload> uncommitted_downloads;
441 std::deque<std::optional<AsyncBuffer>> async_buffers; 454 std::deque<std::vector<PendingDownload>> committed_downloads;
455 std::vector<AsyncBuffer> uncommitted_async_buffers;
456 std::deque<std::vector<AsyncBuffer>> async_buffers;
442 457
443 struct LRUItemParams { 458 struct LRUItemParams {
444 using ObjectType = ImageId; 459 using ObjectType = ImageId;