diff options
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 136 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 33 |
2 files changed, 125 insertions, 44 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ed5c768d8..e601f8446 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <unordered_set> | 6 | #include <unordered_set> |
| 7 | #include <boost/container/small_vector.hpp> | ||
| 7 | 8 | ||
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 9 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| @@ -17,15 +18,10 @@ | |||
| 17 | 18 | ||
| 18 | namespace VideoCommon { | 19 | namespace VideoCommon { |
| 19 | 20 | ||
| 20 | using Tegra::Texture::SwizzleSource; | ||
| 21 | using Tegra::Texture::TextureType; | ||
| 22 | using Tegra::Texture::TICEntry; | 21 | using Tegra::Texture::TICEntry; |
| 23 | using Tegra::Texture::TSCEntry; | 22 | using Tegra::Texture::TSCEntry; |
| 24 | using VideoCore::Surface::GetFormatType; | 23 | using VideoCore::Surface::GetFormatType; |
| 25 | using VideoCore::Surface::IsCopyCompatible; | ||
| 26 | using VideoCore::Surface::PixelFormat; | 24 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 28 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 29 | using VideoCore::Surface::SurfaceType; | 25 | using VideoCore::Surface::SurfaceType; |
| 30 | using namespace Common::Literals; | 26 | using namespace Common::Literals; |
| 31 | 27 | ||
| @@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() { | |||
| 143 | runtime.TickFrame(); | 139 | runtime.TickFrame(); |
| 144 | critical_gc = 0; | 140 | critical_gc = 0; |
| 145 | ++frame_tick; | 141 | ++frame_tick; |
| 142 | |||
| 143 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 144 | for (auto& buffer : async_buffers_death_ring) { | ||
| 145 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 146 | } | ||
| 147 | async_buffers_death_ring.clear(); | ||
| 148 | } | ||
| 146 | } | 149 | } |
| 147 | 150 | ||
| 148 | template <class P> | 151 | template <class P> |
| @@ -661,25 +664,39 @@ template <class P> | |||
| 661 | void TextureCache<P>::CommitAsyncFlushes() { | 664 | void TextureCache<P>::CommitAsyncFlushes() { |
| 662 | // This is intentionally passing the value by copy | 665 | // This is intentionally passing the value by copy |
| 663 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 666 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 664 | const std::span<const ImageId> download_ids = uncommitted_downloads; | 667 | auto& download_ids = uncommitted_downloads; |
| 665 | if (download_ids.empty()) { | 668 | if (download_ids.empty()) { |
| 666 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 669 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 667 | uncommitted_downloads.clear(); | 670 | uncommitted_downloads.clear(); |
| 668 | async_buffers.emplace_back(std::optional<AsyncBuffer>{}); | 671 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 672 | uncommitted_async_buffers.clear(); | ||
| 669 | return; | 673 | return; |
| 670 | } | 674 | } |
| 671 | size_t total_size_bytes = 0; | 675 | size_t total_size_bytes = 0; |
| 672 | for (const ImageId image_id : download_ids) { | 676 | size_t last_async_buffer_id = uncommitted_async_buffers.size(); |
| 673 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 677 | bool any_none_dma = false; |
| 678 | for (PendingDownload& download_info : download_ids) { | ||
| 679 | if (download_info.is_swizzle) { | ||
| 680 | total_size_bytes += | ||
| 681 | Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); | ||
| 682 | any_none_dma = true; | ||
| 683 | download_info.async_buffer_id = last_async_buffer_id; | ||
| 684 | } | ||
| 674 | } | 685 | } |
| 675 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); | 686 | if (any_none_dma) { |
| 676 | for (const ImageId image_id : download_ids) { | 687 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 677 | Image& image = slot_images[image_id]; | 688 | for (const PendingDownload& download_info : download_ids) { |
| 678 | const auto copies = FullDownloadCopies(image.info); | 689 | if (download_info.is_swizzle) { |
| 679 | image.DownloadMemory(download_map, copies); | 690 | Image& image = slot_images[download_info.object_id]; |
| 680 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | 691 | const auto copies = FullDownloadCopies(image.info); |
| 692 | image.DownloadMemory(download_map, copies); | ||
| 693 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | ||
| 694 | } | ||
| 695 | } | ||
| 696 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 681 | } | 697 | } |
| 682 | async_buffers.emplace_back(download_map); | 698 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 699 | uncommitted_async_buffers.clear(); | ||
| 683 | } | 700 | } |
| 684 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 701 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 685 | uncommitted_downloads.clear(); | 702 | uncommitted_downloads.clear(); |
| @@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 691 | return; | 708 | return; |
| 692 | } | 709 | } |
| 693 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 710 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 694 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 711 | const auto& download_ids = committed_downloads.front(); |
| 695 | if (download_ids.empty()) { | 712 | if (download_ids.empty()) { |
| 696 | committed_downloads.pop_front(); | 713 | committed_downloads.pop_front(); |
| 697 | async_buffers.pop_front(); | 714 | async_buffers.pop_front(); |
| 698 | return; | 715 | return; |
| 699 | } | 716 | } |
| 700 | auto download_map = *async_buffers.front(); | 717 | auto download_map = std::move(async_buffers.front()); |
| 701 | std::span<u8> download_span = download_map.mapped_span; | ||
| 702 | for (size_t i = download_ids.size(); i > 0; i--) { | 718 | for (size_t i = download_ids.size(); i > 0; i--) { |
| 703 | const ImageBase& image = slot_images[download_ids[i - 1]]; | 719 | auto& download_info = download_ids[i - 1]; |
| 704 | const auto copies = FullDownloadCopies(image.info); | 720 | auto& download_buffer = download_map[download_info.async_buffer_id]; |
| 705 | download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); | 721 | if (download_info.is_swizzle) { |
| 706 | std::span<u8> download_span_alt = download_span.subspan(download_map.offset); | 722 | const ImageBase& image = slot_images[download_info.object_id]; |
| 707 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, | 723 | const auto copies = FullDownloadCopies(image.info); |
| 708 | swizzle_data_buffer); | 724 | download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); |
| 725 | std::span<u8> download_span = | ||
| 726 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 727 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||
| 728 | swizzle_data_buffer); | ||
| 729 | } else { | ||
| 730 | const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; | ||
| 731 | std::span<u8> download_span = | ||
| 732 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 733 | gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), | ||
| 734 | buffer_info.size); | ||
| 735 | slot_buffer_downloads.erase(download_info.object_id); | ||
| 736 | } | ||
| 737 | } | ||
| 738 | for (auto& download_buffer : download_map) { | ||
| 739 | async_buffers_death_ring.emplace_back(download_buffer); | ||
| 709 | } | 740 | } |
| 710 | runtime.FreeDeferredStagingBuffer(download_map); | ||
| 711 | committed_downloads.pop_front(); | 741 | committed_downloads.pop_front(); |
| 712 | async_buffers.pop_front(); | 742 | async_buffers.pop_front(); |
| 713 | } else { | 743 | } else { |
| 714 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 744 | const auto& download_ids = committed_downloads.front(); |
| 715 | if (download_ids.empty()) { | 745 | if (download_ids.empty()) { |
| 716 | committed_downloads.pop_front(); | 746 | committed_downloads.pop_front(); |
| 717 | return; | 747 | return; |
| 718 | } | 748 | } |
| 719 | size_t total_size_bytes = 0; | 749 | size_t total_size_bytes = 0; |
| 720 | for (const ImageId image_id : download_ids) { | 750 | for (const PendingDownload& download_info : download_ids) { |
| 721 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 751 | if (download_info.is_swizzle) { |
| 752 | total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; | ||
| 753 | } | ||
| 722 | } | 754 | } |
| 723 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 755 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 724 | const size_t original_offset = download_map.offset; | 756 | const size_t original_offset = download_map.offset; |
| 725 | for (const ImageId image_id : download_ids) { | 757 | for (const PendingDownload& download_info : download_ids) { |
| 726 | Image& image = slot_images[image_id]; | 758 | if (!download_info.is_swizzle) { |
| 759 | continue; | ||
| 760 | } | ||
| 761 | Image& image = slot_images[download_info.object_id]; | ||
| 727 | const auto copies = FullDownloadCopies(image.info); | 762 | const auto copies = FullDownloadCopies(image.info); |
| 728 | image.DownloadMemory(download_map, copies); | 763 | image.DownloadMemory(download_map, copies); |
| 729 | download_map.offset += image.unswizzled_size_bytes; | 764 | download_map.offset += image.unswizzled_size_bytes; |
| @@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 732 | runtime.Finish(); | 767 | runtime.Finish(); |
| 733 | download_map.offset = original_offset; | 768 | download_map.offset = original_offset; |
| 734 | std::span<u8> download_span = download_map.mapped_span; | 769 | std::span<u8> download_span = download_map.mapped_span; |
| 735 | for (const ImageId image_id : download_ids) { | 770 | for (const PendingDownload& download_info : download_ids) { |
| 736 | const ImageBase& image = slot_images[image_id]; | 771 | if (!download_info.is_swizzle) { |
| 772 | continue; | ||
| 773 | } | ||
| 774 | const ImageBase& image = slot_images[download_info.object_id]; | ||
| 737 | const auto copies = FullDownloadCopies(image.info); | 775 | const auto copies = FullDownloadCopies(image.info); |
| 738 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | 776 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, |
| 739 | swizzle_data_buffer); | 777 | swizzle_data_buffer); |
| @@ -834,6 +872,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm | |||
| 834 | } | 872 | } |
| 835 | 873 | ||
| 836 | template <class P> | 874 | template <class P> |
| 875 | void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image, | ||
| 876 | typename TextureCache<P>::BufferType buffer, | ||
| 877 | size_t buffer_offset, | ||
| 878 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 879 | GPUVAddr address, size_t size) { | ||
| 880 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 881 | const BufferDownload new_buffer_download{address, size}; | ||
| 882 | auto slot = slot_buffer_downloads.insert(new_buffer_download); | ||
| 883 | const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; | ||
| 884 | uncommitted_downloads.emplace_back(new_download); | ||
| 885 | auto download_map = runtime.DownloadStagingBuffer(size, true); | ||
| 886 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 887 | std::array buffers{ | ||
| 888 | buffer, | ||
| 889 | download_map.buffer, | ||
| 890 | }; | ||
| 891 | std::array buffer_offsets{ | ||
| 892 | buffer_offset, | ||
| 893 | download_map.offset, | ||
| 894 | }; | ||
| 895 | image->DownloadMemory(buffers, buffer_offsets, copies); | ||
| 896 | } else { | ||
| 897 | image->DownloadMemory(buffer, buffer_offset, copies); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | |||
| 901 | template <class P> | ||
| 837 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | 902 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 838 | if (False(image.flags & ImageFlagBits::CpuModified)) { | 903 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 839 | // Only upload modified images | 904 | // Only upload modified images |
| @@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) | |||
| 2209 | if (new_id) { | 2274 | if (new_id) { |
| 2210 | const ImageViewBase& old_view = slot_image_views[new_id]; | 2275 | const ImageViewBase& old_view = slot_image_views[new_id]; |
| 2211 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | 2276 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { |
| 2212 | uncommitted_downloads.push_back(old_view.image_id); | 2277 | const PendingDownload new_download{true, 0, old_view.image_id}; |
| 2278 | uncommitted_downloads.emplace_back(new_download); | ||
| 2213 | } | 2279 | } |
| 2214 | } | 2280 | } |
| 2215 | *old_id = new_id; | 2281 | *old_id = new_id; |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5a5b4179c..758b7e212 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| @@ -40,14 +40,9 @@ struct ChannelState; | |||
| 40 | 40 | ||
| 41 | namespace VideoCommon { | 41 | namespace VideoCommon { |
| 42 | 42 | ||
| 43 | using Tegra::Texture::SwizzleSource; | ||
| 44 | using Tegra::Texture::TICEntry; | 43 | using Tegra::Texture::TICEntry; |
| 45 | using Tegra::Texture::TSCEntry; | 44 | using Tegra::Texture::TSCEntry; |
| 46 | using VideoCore::Surface::GetFormatType; | ||
| 47 | using VideoCore::Surface::IsCopyCompatible; | ||
| 48 | using VideoCore::Surface::PixelFormat; | 45 | using VideoCore::Surface::PixelFormat; |
| 49 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 50 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 51 | using namespace Common::Literals; | 46 | using namespace Common::Literals; |
| 52 | 47 | ||
| 53 | struct ImageViewInOut { | 48 | struct ImageViewInOut { |
| @@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
| 119 | using Sampler = typename P::Sampler; | 114 | using Sampler = typename P::Sampler; |
| 120 | using Framebuffer = typename P::Framebuffer; | 115 | using Framebuffer = typename P::Framebuffer; |
| 121 | using AsyncBuffer = typename P::AsyncBuffer; | 116 | using AsyncBuffer = typename P::AsyncBuffer; |
| 117 | using BufferType = typename P::BufferType; | ||
| 122 | 118 | ||
| 123 | struct BlitImages { | 119 | struct BlitImages { |
| 124 | ImageId dst_id; | 120 | ImageId dst_id; |
| @@ -215,6 +211,10 @@ public: | |||
| 215 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, | 211 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, |
| 216 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); | 212 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); |
| 217 | 213 | ||
| 214 | void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, | ||
| 215 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 216 | GPUVAddr address = 0, size_t size = 0); | ||
| 217 | |||
| 218 | /// Return true when a CPU region is modified from the GPU | 218 | /// Return true when a CPU region is modified from the GPU |
| 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 220 | 220 | ||
| @@ -424,17 +424,32 @@ private: | |||
| 424 | u64 critical_memory; | 424 | u64 critical_memory; |
| 425 | size_t critical_gc; | 425 | size_t critical_gc; |
| 426 | 426 | ||
| 427 | struct BufferDownload { | ||
| 428 | GPUVAddr address; | ||
| 429 | size_t size; | ||
| 430 | }; | ||
| 431 | |||
| 432 | struct PendingDownload { | ||
| 433 | bool is_swizzle; | ||
| 434 | size_t async_buffer_id; | ||
| 435 | SlotId object_id; | ||
| 436 | }; | ||
| 437 | |||
| 427 | SlotVector<Image> slot_images; | 438 | SlotVector<Image> slot_images; |
| 428 | SlotVector<ImageMapView> slot_map_views; | 439 | SlotVector<ImageMapView> slot_map_views; |
| 429 | SlotVector<ImageView> slot_image_views; | 440 | SlotVector<ImageView> slot_image_views; |
| 430 | SlotVector<ImageAlloc> slot_image_allocs; | 441 | SlotVector<ImageAlloc> slot_image_allocs; |
| 431 | SlotVector<Sampler> slot_samplers; | 442 | SlotVector<Sampler> slot_samplers; |
| 432 | SlotVector<Framebuffer> slot_framebuffers; | 443 | SlotVector<Framebuffer> slot_framebuffers; |
| 444 | SlotVector<BufferDownload> slot_buffer_downloads; | ||
| 433 | 445 | ||
| 434 | // TODO: This data structure is not optimal and it should be reworked | 446 | // TODO: This data structure is not optimal and it should be reworked |
| 435 | std::vector<ImageId> uncommitted_downloads; | 447 | |
| 436 | std::deque<std::vector<ImageId>> committed_downloads; | 448 | std::vector<PendingDownload> uncommitted_downloads; |
| 437 | std::deque<std::optional<AsyncBuffer>> async_buffers; | 449 | std::deque<std::vector<PendingDownload>> committed_downloads; |
| 450 | std::vector<AsyncBuffer> uncommitted_async_buffers; | ||
| 451 | std::deque<std::vector<AsyncBuffer>> async_buffers; | ||
| 452 | std::deque<AsyncBuffer> async_buffers_death_ring; | ||
| 438 | 453 | ||
| 439 | struct LRUItemParams { | 454 | struct LRUItemParams { |
| 440 | using ObjectType = ImageId; | 455 | using ObjectType = ImageId; |