summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar bunnei2023-04-29 11:46:01 -0700
committerGravatar GitHub2023-04-29 11:46:01 -0700
commitfe57f3967639616142889bf1b638117ca6c879cf (patch)
tree43879297dd91ac1ab824010610e724c37ec3cb95 /src/video_core/texture_cache
parentMerge pull request #10051 from liamwhite/surface-capabilities (diff)
parentTexture Cache: Release stagging buffers on tick frame (diff)
downloadyuzu-fe57f3967639616142889bf1b638117ca6c879cf.tar.gz
yuzu-fe57f3967639616142889bf1b638117ca6c879cf.tar.xz
yuzu-fe57f3967639616142889bf1b638117ca6c879cf.zip
Merge pull request #10082 from FernandoS27/the-testers-really-love-chocolate
Refactor Accelerate DMA and do downloads through TC.
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/texture_cache.h136
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h33
2 files changed, 125 insertions, 44 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ed5c768d8..e601f8446 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,9 +1,10 @@
1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <unordered_set> 6#include <unordered_set>
7#include <boost/container/small_vector.hpp>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
9#include "common/settings.h" 10#include "common/settings.h"
@@ -17,15 +18,10 @@
17 18
18namespace VideoCommon { 19namespace VideoCommon {
19 20
20using Tegra::Texture::SwizzleSource;
21using Tegra::Texture::TextureType;
22using Tegra::Texture::TICEntry; 21using Tegra::Texture::TICEntry;
23using Tegra::Texture::TSCEntry; 22using Tegra::Texture::TSCEntry;
24using VideoCore::Surface::GetFormatType; 23using VideoCore::Surface::GetFormatType;
25using VideoCore::Surface::IsCopyCompatible;
26using VideoCore::Surface::PixelFormat; 24using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::PixelFormatFromDepthFormat;
28using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
29using VideoCore::Surface::SurfaceType; 25using VideoCore::Surface::SurfaceType;
30using namespace Common::Literals; 26using namespace Common::Literals;
31 27
@@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() {
143 runtime.TickFrame(); 139 runtime.TickFrame();
144 critical_gc = 0; 140 critical_gc = 0;
145 ++frame_tick; 141 ++frame_tick;
142
143 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
144 for (auto& buffer : async_buffers_death_ring) {
145 runtime.FreeDeferredStagingBuffer(buffer);
146 }
147 async_buffers_death_ring.clear();
148 }
146} 149}
147 150
148template <class P> 151template <class P>
@@ -661,25 +664,39 @@ template <class P>
661void TextureCache<P>::CommitAsyncFlushes() { 664void TextureCache<P>::CommitAsyncFlushes() {
662 // This is intentionally passing the value by copy 665 // This is intentionally passing the value by copy
663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 666 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
664 const std::span<const ImageId> download_ids = uncommitted_downloads; 667 auto& download_ids = uncommitted_downloads;
665 if (download_ids.empty()) { 668 if (download_ids.empty()) {
666 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 669 committed_downloads.emplace_back(std::move(uncommitted_downloads));
667 uncommitted_downloads.clear(); 670 uncommitted_downloads.clear();
668 async_buffers.emplace_back(std::optional<AsyncBuffer>{}); 671 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
672 uncommitted_async_buffers.clear();
669 return; 673 return;
670 } 674 }
671 size_t total_size_bytes = 0; 675 size_t total_size_bytes = 0;
672 for (const ImageId image_id : download_ids) { 676 size_t last_async_buffer_id = uncommitted_async_buffers.size();
673 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 677 bool any_none_dma = false;
678 for (PendingDownload& download_info : download_ids) {
679 if (download_info.is_swizzle) {
680 total_size_bytes +=
681 Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64);
682 any_none_dma = true;
683 download_info.async_buffer_id = last_async_buffer_id;
684 }
674 } 685 }
675 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); 686 if (any_none_dma) {
676 for (const ImageId image_id : download_ids) { 687 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
677 Image& image = slot_images[image_id]; 688 for (const PendingDownload& download_info : download_ids) {
678 const auto copies = FullDownloadCopies(image.info); 689 if (download_info.is_swizzle) {
679 image.DownloadMemory(download_map, copies); 690 Image& image = slot_images[download_info.object_id];
680 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); 691 const auto copies = FullDownloadCopies(image.info);
692 image.DownloadMemory(download_map, copies);
693 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
694 }
695 }
696 uncommitted_async_buffers.emplace_back(download_map);
681 } 697 }
682 async_buffers.emplace_back(download_map); 698 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
699 uncommitted_async_buffers.clear();
683 } 700 }
684 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 701 committed_downloads.emplace_back(std::move(uncommitted_downloads));
685 uncommitted_downloads.clear(); 702 uncommitted_downloads.clear();
@@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() {
691 return; 708 return;
692 } 709 }
693 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 710 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
694 const std::span<const ImageId> download_ids = committed_downloads.front(); 711 const auto& download_ids = committed_downloads.front();
695 if (download_ids.empty()) { 712 if (download_ids.empty()) {
696 committed_downloads.pop_front(); 713 committed_downloads.pop_front();
697 async_buffers.pop_front(); 714 async_buffers.pop_front();
698 return; 715 return;
699 } 716 }
700 auto download_map = *async_buffers.front(); 717 auto download_map = std::move(async_buffers.front());
701 std::span<u8> download_span = download_map.mapped_span;
702 for (size_t i = download_ids.size(); i > 0; i--) { 718 for (size_t i = download_ids.size(); i > 0; i--) {
703 const ImageBase& image = slot_images[download_ids[i - 1]]; 719 auto& download_info = download_ids[i - 1];
704 const auto copies = FullDownloadCopies(image.info); 720 auto& download_buffer = download_map[download_info.async_buffer_id];
705 download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); 721 if (download_info.is_swizzle) {
706 std::span<u8> download_span_alt = download_span.subspan(download_map.offset); 722 const ImageBase& image = slot_images[download_info.object_id];
707 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, 723 const auto copies = FullDownloadCopies(image.info);
708 swizzle_data_buffer); 724 download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
725 std::span<u8> download_span =
726 download_buffer.mapped_span.subspan(download_buffer.offset);
727 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
728 swizzle_data_buffer);
729 } else {
730 const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
731 std::span<u8> download_span =
732 download_buffer.mapped_span.subspan(download_buffer.offset);
733 gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
734 buffer_info.size);
735 slot_buffer_downloads.erase(download_info.object_id);
736 }
737 }
738 for (auto& download_buffer : download_map) {
739 async_buffers_death_ring.emplace_back(download_buffer);
709 } 740 }
710 runtime.FreeDeferredStagingBuffer(download_map);
711 committed_downloads.pop_front(); 741 committed_downloads.pop_front();
712 async_buffers.pop_front(); 742 async_buffers.pop_front();
713 } else { 743 } else {
714 const std::span<const ImageId> download_ids = committed_downloads.front(); 744 const auto& download_ids = committed_downloads.front();
715 if (download_ids.empty()) { 745 if (download_ids.empty()) {
716 committed_downloads.pop_front(); 746 committed_downloads.pop_front();
717 return; 747 return;
718 } 748 }
719 size_t total_size_bytes = 0; 749 size_t total_size_bytes = 0;
720 for (const ImageId image_id : download_ids) { 750 for (const PendingDownload& download_info : download_ids) {
721 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 751 if (download_info.is_swizzle) {
752 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
753 }
722 } 754 }
723 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); 755 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
724 const size_t original_offset = download_map.offset; 756 const size_t original_offset = download_map.offset;
725 for (const ImageId image_id : download_ids) { 757 for (const PendingDownload& download_info : download_ids) {
726 Image& image = slot_images[image_id]; 758 if (!download_info.is_swizzle) {
759 continue;
760 }
761 Image& image = slot_images[download_info.object_id];
727 const auto copies = FullDownloadCopies(image.info); 762 const auto copies = FullDownloadCopies(image.info);
728 image.DownloadMemory(download_map, copies); 763 image.DownloadMemory(download_map, copies);
729 download_map.offset += image.unswizzled_size_bytes; 764 download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() {
732 runtime.Finish(); 767 runtime.Finish();
733 download_map.offset = original_offset; 768 download_map.offset = original_offset;
734 std::span<u8> download_span = download_map.mapped_span; 769 std::span<u8> download_span = download_map.mapped_span;
735 for (const ImageId image_id : download_ids) { 770 for (const PendingDownload& download_info : download_ids) {
736 const ImageBase& image = slot_images[image_id]; 771 if (!download_info.is_swizzle) {
772 continue;
773 }
774 const ImageBase& image = slot_images[download_info.object_id];
737 const auto copies = FullDownloadCopies(image.info); 775 const auto copies = FullDownloadCopies(image.info);
738 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, 776 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
739 swizzle_data_buffer); 777 swizzle_data_buffer);
@@ -834,6 +872,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
834} 872}
835 873
836template <class P> 874template <class P>
875void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image,
876 typename TextureCache<P>::BufferType buffer,
877 size_t buffer_offset,
878 std::span<const VideoCommon::BufferImageCopy> copies,
879 GPUVAddr address, size_t size) {
880 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
881 const BufferDownload new_buffer_download{address, size};
882 auto slot = slot_buffer_downloads.insert(new_buffer_download);
883 const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot};
884 uncommitted_downloads.emplace_back(new_download);
885 auto download_map = runtime.DownloadStagingBuffer(size, true);
886 uncommitted_async_buffers.emplace_back(download_map);
887 std::array buffers{
888 buffer,
889 download_map.buffer,
890 };
891 std::array buffer_offsets{
892 buffer_offset,
893 download_map.offset,
894 };
895 image->DownloadMemory(buffers, buffer_offsets, copies);
896 } else {
897 image->DownloadMemory(buffer, buffer_offset, copies);
898 }
899}
900
901template <class P>
837void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { 902void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
838 if (False(image.flags & ImageFlagBits::CpuModified)) { 903 if (False(image.flags & ImageFlagBits::CpuModified)) {
839 // Only upload modified images 904 // Only upload modified images
@@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
2209 if (new_id) { 2274 if (new_id) {
2210 const ImageViewBase& old_view = slot_image_views[new_id]; 2275 const ImageViewBase& old_view = slot_image_views[new_id];
2211 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { 2276 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
2212 uncommitted_downloads.push_back(old_view.image_id); 2277 const PendingDownload new_download{true, 0, old_view.image_id};
2278 uncommitted_downloads.emplace_back(new_download);
2213 } 2279 }
2214 } 2280 }
2215 *old_id = new_id; 2281 *old_id = new_id;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 5a5b4179c..758b7e212 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,4 +1,4 @@
1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
@@ -40,14 +40,9 @@ struct ChannelState;
40 40
41namespace VideoCommon { 41namespace VideoCommon {
42 42
43using Tegra::Texture::SwizzleSource;
44using Tegra::Texture::TICEntry; 43using Tegra::Texture::TICEntry;
45using Tegra::Texture::TSCEntry; 44using Tegra::Texture::TSCEntry;
46using VideoCore::Surface::GetFormatType;
47using VideoCore::Surface::IsCopyCompatible;
48using VideoCore::Surface::PixelFormat; 45using VideoCore::Surface::PixelFormat;
49using VideoCore::Surface::PixelFormatFromDepthFormat;
50using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
51using namespace Common::Literals; 46using namespace Common::Literals;
52 47
53struct ImageViewInOut { 48struct ImageViewInOut {
@@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
119 using Sampler = typename P::Sampler; 114 using Sampler = typename P::Sampler;
120 using Framebuffer = typename P::Framebuffer; 115 using Framebuffer = typename P::Framebuffer;
121 using AsyncBuffer = typename P::AsyncBuffer; 116 using AsyncBuffer = typename P::AsyncBuffer;
117 using BufferType = typename P::BufferType;
122 118
123 struct BlitImages { 119 struct BlitImages {
124 ImageId dst_id; 120 ImageId dst_id;
@@ -215,6 +211,10 @@ public:
215 const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, 211 const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
216 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); 212 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
217 213
214 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
215 std::span<const VideoCommon::BufferImageCopy> copies,
216 GPUVAddr address = 0, size_t size = 0);
217
218 /// Return true when a CPU region is modified from the GPU 218 /// Return true when a CPU region is modified from the GPU
219 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 219 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
220 220
@@ -424,17 +424,32 @@ private:
424 u64 critical_memory; 424 u64 critical_memory;
425 size_t critical_gc; 425 size_t critical_gc;
426 426
427 struct BufferDownload {
428 GPUVAddr address;
429 size_t size;
430 };
431
432 struct PendingDownload {
433 bool is_swizzle;
434 size_t async_buffer_id;
435 SlotId object_id;
436 };
437
427 SlotVector<Image> slot_images; 438 SlotVector<Image> slot_images;
428 SlotVector<ImageMapView> slot_map_views; 439 SlotVector<ImageMapView> slot_map_views;
429 SlotVector<ImageView> slot_image_views; 440 SlotVector<ImageView> slot_image_views;
430 SlotVector<ImageAlloc> slot_image_allocs; 441 SlotVector<ImageAlloc> slot_image_allocs;
431 SlotVector<Sampler> slot_samplers; 442 SlotVector<Sampler> slot_samplers;
432 SlotVector<Framebuffer> slot_framebuffers; 443 SlotVector<Framebuffer> slot_framebuffers;
444 SlotVector<BufferDownload> slot_buffer_downloads;
433 445
434 // TODO: This data structure is not optimal and it should be reworked 446 // TODO: This data structure is not optimal and it should be reworked
435 std::vector<ImageId> uncommitted_downloads; 447
436 std::deque<std::vector<ImageId>> committed_downloads; 448 std::vector<PendingDownload> uncommitted_downloads;
437 std::deque<std::optional<AsyncBuffer>> async_buffers; 449 std::deque<std::vector<PendingDownload>> committed_downloads;
450 std::vector<AsyncBuffer> uncommitted_async_buffers;
451 std::deque<std::vector<AsyncBuffer>> async_buffers;
452 std::deque<AsyncBuffer> async_buffers_death_ring;
438 453
439 struct LRUItemParams { 454 struct LRUItemParams {
440 using ObjectType = ImageId; 455 using ObjectType = ImageId;