diff options
| author | 2023-04-14 18:07:38 +0200 | |
|---|---|---|
| committer | 2023-04-29 00:18:21 +0200 | |
| commit | e3a2ca96bd2350471ebb6c2907c67b10254a4f7e (patch) | |
| tree | 5238364cdea97449adb0766df0d6263123ed06da /src/video_core/texture_cache | |
| parent | TextureCache: refactor DMA downloads to allow multiple buffers. (diff) | |
| download | yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.gz yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.tar.xz yuzu-e3a2ca96bd2350471ebb6c2907c67b10254a4f7e.zip | |
Accelerate DMA: Use texture cache async downloads to perform the copies
to host.
WIP
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 118 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 23 |
2 files changed, 103 insertions, 38 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cd5aa31e..63b8b5af5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -661,27 +661,40 @@ template <class P> | |||
| 661 | void TextureCache<P>::CommitAsyncFlushes() { | 661 | void TextureCache<P>::CommitAsyncFlushes() { |
| 662 | // This is intentionally passing the value by copy | 662 | // This is intentionally passing the value by copy |
| 663 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 663 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 664 | const std::span<const ImageId> download_ids = uncommitted_downloads; | 664 | auto& download_ids = uncommitted_downloads; |
| 665 | if (download_ids.empty()) { | 665 | if (download_ids.empty()) { |
| 666 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 666 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 667 | uncommitted_downloads.clear(); | 667 | uncommitted_downloads.clear(); |
| 668 | async_buffers.emplace_back(std::optional<AsyncBuffer>{}); | 668 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 669 | uncommitted_async_buffers.clear(); | ||
| 669 | return; | 670 | return; |
| 670 | } | 671 | } |
| 671 | size_t total_size_bytes = 0; | 672 | size_t total_size_bytes = 0; |
| 672 | for (const ImageId image_id : download_ids) { | 673 | size_t last_async_buffer_id = uncommitted_async_buffers.size(); |
| 673 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 674 | bool any_none_dma = false; |
| 675 | for (PendingDownload& download_info : download_ids) { | ||
| 676 | if (download_info.is_swizzle) { | ||
| 677 | total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; | ||
| 678 | any_none_dma = true; | ||
| 679 | download_info.async_buffer_id = last_async_buffer_id; | ||
| 680 | } | ||
| 674 | } | 681 | } |
| 675 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); | 682 | if (any_none_dma) { |
| 676 | for (const ImageId image_id : download_ids) { | 683 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 677 | Image& image = slot_images[image_id]; | 684 | for (const PendingDownload& download_info : download_ids) { |
| 678 | const auto copies = FullDownloadCopies(image.info); | 685 | if (download_info.is_swizzle) { |
| 679 | image.DownloadMemory(download_map, copies); | 686 | Image& image = slot_images[download_info.object_id]; |
| 680 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | 687 | const auto copies = FullDownloadCopies(image.info); |
| 688 | image.DownloadMemory(download_map, copies); | ||
| 689 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | ||
| 690 | } | ||
| 691 | } | ||
| 692 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 681 | } | 693 | } |
| 682 | async_buffers.emplace_back(download_map); | ||
| 683 | } | 694 | } |
| 684 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 695 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 696 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); | ||
| 697 | uncommitted_async_buffers.clear(); | ||
| 685 | uncommitted_downloads.clear(); | 698 | uncommitted_downloads.clear(); |
| 686 | } | 699 | } |
| 687 | 700 | ||
| @@ -691,39 +704,57 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 691 | return; | 704 | return; |
| 692 | } | 705 | } |
| 693 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 706 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 694 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 707 | const auto& download_ids = committed_downloads.front(); |
| 695 | if (download_ids.empty()) { | 708 | if (download_ids.empty()) { |
| 696 | committed_downloads.pop_front(); | 709 | committed_downloads.pop_front(); |
| 697 | async_buffers.pop_front(); | 710 | async_buffers.pop_front(); |
| 698 | return; | 711 | return; |
| 699 | } | 712 | } |
| 700 | auto download_map = *async_buffers.front(); | 713 | auto download_map = std::move(async_buffers.front()); |
| 701 | std::span<u8> download_span = download_map.mapped_span; | ||
| 702 | for (size_t i = download_ids.size(); i > 0; i--) { | 714 | for (size_t i = download_ids.size(); i > 0; i--) { |
| 703 | const ImageBase& image = slot_images[download_ids[i - 1]]; | 715 | auto& download_info = download_ids[i - 1]; |
| 704 | const auto copies = FullDownloadCopies(image.info); | 716 | auto& download_buffer = download_map[download_info.async_buffer_id]; |
| 705 | download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); | 717 | if (download_info.is_swizzle) { |
| 706 | std::span<u8> download_span_alt = download_span.subspan(download_map.offset); | 718 | const ImageBase& image = slot_images[download_info.object_id]; |
| 707 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, | 719 | const auto copies = FullDownloadCopies(image.info); |
| 708 | swizzle_data_buffer); | 720 | download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); |
| 721 | std::span<u8> download_span = | ||
| 722 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 723 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||
| 724 | swizzle_data_buffer); | ||
| 725 | } else { | ||
| 726 | const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; | ||
| 727 | std::span<u8> download_span = | ||
| 728 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 729 | gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), | ||
| 730 | buffer_info.size); | ||
| 731 | slot_buffer_downloads.erase(download_info.object_id); | ||
| 732 | } | ||
| 733 | } | ||
| 734 | for (auto& download_buffer : download_map) { | ||
| 735 | runtime.FreeDeferredStagingBuffer(download_buffer); | ||
| 709 | } | 736 | } |
| 710 | runtime.FreeDeferredStagingBuffer(download_map); | ||
| 711 | committed_downloads.pop_front(); | 737 | committed_downloads.pop_front(); |
| 712 | async_buffers.pop_front(); | 738 | async_buffers.pop_front(); |
| 713 | } else { | 739 | } else { |
| 714 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 740 | const auto& download_ids = committed_downloads.front(); |
| 715 | if (download_ids.empty()) { | 741 | if (download_ids.empty()) { |
| 716 | committed_downloads.pop_front(); | 742 | committed_downloads.pop_front(); |
| 717 | return; | 743 | return; |
| 718 | } | 744 | } |
| 719 | size_t total_size_bytes = 0; | 745 | size_t total_size_bytes = 0; |
| 720 | for (const ImageId image_id : download_ids) { | 746 | for (const PendingDownload& download_info : download_ids) { |
| 721 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 747 | if (download_info.is_swizzle) { |
| 748 | total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; | ||
| 749 | } | ||
| 722 | } | 750 | } |
| 723 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 751 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 724 | const size_t original_offset = download_map.offset; | 752 | const size_t original_offset = download_map.offset; |
| 725 | for (const ImageId image_id : download_ids) { | 753 | for (const PendingDownload& download_info : download_ids) { |
| 726 | Image& image = slot_images[image_id]; | 754 | if (download_info.is_swizzle) { |
| 755 | continue; | ||
| 756 | } | ||
| 757 | Image& image = slot_images[download_info.object_id]; | ||
| 727 | const auto copies = FullDownloadCopies(image.info); | 758 | const auto copies = FullDownloadCopies(image.info); |
| 728 | image.DownloadMemory(download_map, copies); | 759 | image.DownloadMemory(download_map, copies); |
| 729 | download_map.offset += image.unswizzled_size_bytes; | 760 | download_map.offset += image.unswizzled_size_bytes; |
| @@ -732,8 +763,11 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 732 | runtime.Finish(); | 763 | runtime.Finish(); |
| 733 | download_map.offset = original_offset; | 764 | download_map.offset = original_offset; |
| 734 | std::span<u8> download_span = download_map.mapped_span; | 765 | std::span<u8> download_span = download_map.mapped_span; |
| 735 | for (const ImageId image_id : download_ids) { | 766 | for (const PendingDownload& download_info : download_ids) { |
| 736 | const ImageBase& image = slot_images[image_id]; | 767 | if (download_info.is_swizzle) { |
| 768 | continue; | ||
| 769 | } | ||
| 770 | const ImageBase& image = slot_images[download_info.object_id]; | ||
| 737 | const auto copies = FullDownloadCopies(image.info); | 771 | const auto copies = FullDownloadCopies(image.info); |
| 738 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | 772 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, |
| 739 | swizzle_data_buffer); | 773 | swizzle_data_buffer); |
| @@ -836,11 +870,27 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm | |||
| 836 | template <class P> | 870 | template <class P> |
| 837 | void TextureCache<P>::DownloadImageIntoBuffer( | 871 | void TextureCache<P>::DownloadImageIntoBuffer( |
| 838 | typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer, | 872 | typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer, |
| 839 | size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) { | 873 | size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies, GPUVAddr address, size_t size) { |
| 840 | std::array buffers{ | 874 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 841 | buffer, | 875 | auto slot = slot_buffer_downloads.insert(address, size); |
| 842 | }; | 876 | uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot); |
| 843 | image->DownloadMemory(buffers, buffer_offset, copies); | 877 | auto download_map = runtime.DownloadStagingBuffer(size, true); |
| 878 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 879 | std::array buffers{ | ||
| 880 | buffer, | ||
| 881 | download_map.buffer, | ||
| 882 | }; | ||
| 883 | std::array buffer_offsets{ | ||
| 884 | buffer_offset, | ||
| 885 | download_map.offset, | ||
| 886 | }; | ||
| 887 | image->DownloadMemory(buffers, buffer_offsets, copies); | ||
| 888 | } else { | ||
| 889 | std::array buffers{ | ||
| 890 | buffer, | ||
| 891 | }; | ||
| 892 | image->DownloadMemory(buffers, buffer_offset, copies); | ||
| 893 | } | ||
| 844 | } | 894 | } |
| 845 | 895 | ||
| 846 | template <class P> | 896 | template <class P> |
| @@ -2219,7 +2269,7 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) | |||
| 2219 | if (new_id) { | 2269 | if (new_id) { |
| 2220 | const ImageViewBase& old_view = slot_image_views[new_id]; | 2270 | const ImageViewBase& old_view = slot_image_views[new_id]; |
| 2221 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | 2271 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { |
| 2222 | uncommitted_downloads.push_back(old_view.image_id); | 2272 | uncommitted_downloads.emplace_back(true, 0, old_view.image_id); |
| 2223 | } | 2273 | } |
| 2224 | } | 2274 | } |
| 2225 | *old_id = new_id; | 2275 | *old_id = new_id; |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 51f44aed5..d5bba3379 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -217,7 +217,8 @@ public: | |||
| 217 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); | 217 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); |
| 218 | 218 | ||
| 219 | void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, | 219 | void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, |
| 220 | std::span<const VideoCommon::BufferImageCopy> copies); | 220 | std::span<const VideoCommon::BufferImageCopy> copies, |
| 221 | GPUVAddr address = 0, size_t size = 0); | ||
| 221 | 222 | ||
| 222 | /// Return true when a CPU region is modified from the GPU | 223 | /// Return true when a CPU region is modified from the GPU |
| 223 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 224 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| @@ -428,17 +429,31 @@ private: | |||
| 428 | u64 critical_memory; | 429 | u64 critical_memory; |
| 429 | size_t critical_gc; | 430 | size_t critical_gc; |
| 430 | 431 | ||
| 432 | struct BufferDownload { | ||
| 433 | GPUVAddr address; | ||
| 434 | size_t size; | ||
| 435 | }; | ||
| 436 | |||
| 437 | struct PendingDownload { | ||
| 438 | bool is_swizzle; | ||
| 439 | size_t async_buffer_id; | ||
| 440 | SlotId object_id; | ||
| 441 | }; | ||
| 442 | |||
| 431 | SlotVector<Image> slot_images; | 443 | SlotVector<Image> slot_images; |
| 432 | SlotVector<ImageMapView> slot_map_views; | 444 | SlotVector<ImageMapView> slot_map_views; |
| 433 | SlotVector<ImageView> slot_image_views; | 445 | SlotVector<ImageView> slot_image_views; |
| 434 | SlotVector<ImageAlloc> slot_image_allocs; | 446 | SlotVector<ImageAlloc> slot_image_allocs; |
| 435 | SlotVector<Sampler> slot_samplers; | 447 | SlotVector<Sampler> slot_samplers; |
| 436 | SlotVector<Framebuffer> slot_framebuffers; | 448 | SlotVector<Framebuffer> slot_framebuffers; |
| 449 | SlotVector<BufferDownload> slot_buffer_downloads; | ||
| 437 | 450 | ||
| 438 | // TODO: This data structure is not optimal and it should be reworked | 451 | // TODO: This data structure is not optimal and it should be reworked |
| 439 | std::vector<ImageId> uncommitted_downloads; | 452 | |
| 440 | std::deque<std::vector<ImageId>> committed_downloads; | 453 | std::vector<PendingDownload> uncommitted_downloads; |
| 441 | std::deque<std::optional<AsyncBuffer>> async_buffers; | 454 | std::deque<std::vector<PendingDownload>> committed_downloads; |
| 455 | std::vector<AsyncBuffer> uncommitted_async_buffers; | ||
| 456 | std::deque<std::vector<AsyncBuffer>> async_buffers; | ||
| 442 | 457 | ||
| 443 | struct LRUItemParams { | 458 | struct LRUItemParams { |
| 444 | using ObjectType = ImageId; | 459 | using ObjectType = ImageId; |