summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h118
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h23
6 files changed, 123 insertions, 53 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2de533584..4993d4709 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
1287 } 1287 }
1288 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); 1288 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
1289 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; 1289 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
1290 const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing 1290 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
1291 : VideoCommon::ObtainBufferOperation::MarkAsWritten;
1292 const auto [buffer, offset] = 1291 const auto [buffer, offset] =
1293 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); 1292 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
1294 1293
@@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
1299 if constexpr (IS_IMAGE_UPLOAD) { 1298 if constexpr (IS_IMAGE_UPLOAD) {
1300 image->UploadMemory(buffer->Handle(), offset, copy_span); 1299 image->UploadMemory(buffer->Handle(), offset, copy_span);
1301 } else { 1300 } else {
1302 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); 1301 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
1302 buffer_operand.address, buffer_size);
1303 } 1303 }
1304 return true; 1304 return true;
1305} 1305}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 8fc783cc0..2559a3aa7 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
781 } 781 }
782 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); 782 const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
783 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; 783 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
784 const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing 784 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
785 : VideoCommon::ObtainBufferOperation::MarkAsWritten;
786 const auto [buffer, offset] = 785 const auto [buffer, offset] =
787 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); 786 buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
788 787
@@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
793 if constexpr (IS_IMAGE_UPLOAD) { 792 if constexpr (IS_IMAGE_UPLOAD) {
794 image->UploadMemory(buffer->Handle(), offset, copy_span); 793 image->UploadMemory(buffer->Handle(), offset, copy_span);
795 } else { 794 } else {
796 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span); 795 texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
796 buffer_operand.address, buffer_size);
797 } 797 }
798 return true; 798 return true;
799} 799}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index e4d077e63..da3841bb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1342,17 +1342,19 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
1342 UploadMemory(map.buffer, map.offset, copies); 1342 UploadMemory(map.buffer, map.offset, copies);
1343} 1343}
1344 1344
1345void Image::DownloadMemory(std::span<VkBuffer> buffers_span, VkDeviceSize offset, 1345void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span,
1346 std::span<const VideoCommon::BufferImageCopy> copies) { 1346 std::span<const VideoCommon::BufferImageCopy> copies) {
1347 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); 1347 const bool is_rescaled = True(flags & ImageFlagBits::Rescaled);
1348 if (is_rescaled) { 1348 if (is_rescaled) {
1349 ScaleDown(); 1349 ScaleDown();
1350 } 1350 }
1351 boost::container::small_vector<VkBuffer, 1> buffers_vector{}; 1351 boost::container::small_vector<VkBuffer, 1> buffers_vector{};
1352 for (auto& buffer : buffers_span) { 1352 boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies;
1353 buffers_vector.push_back(buffer); 1353 for (size_t index = 0; index < buffers_span.size(); index++) {
1354 buffers_vector.emplace_back(buffers_span[index]);
1355 vk_copies.emplace_back(
1356 TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
1354 } 1357 }
1355 std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
1356 scheduler->RequestOutsideRenderPassOperationContext(); 1358 scheduler->RequestOutsideRenderPassOperationContext();
1357 scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, 1359 scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
1358 aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { 1360 aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
@@ -1377,9 +1379,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, VkDeviceSize offset
1377 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 1379 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1378 0, read_barrier); 1380 0, read_barrier);
1379 1381
1380 for (auto buffer : buffers) { 1382 for (size_t index = 0; index < buffers.size(); index++) {
1381 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, 1383 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
1382 vk_copies); 1384 vk_copies[index]);
1383 } 1385 }
1384 1386
1385 const VkMemoryBarrier memory_write_barrier{ 1387 const VkMemoryBarrier memory_write_barrier{
@@ -1418,7 +1420,10 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
1418 std::array buffers{ 1420 std::array buffers{
1419 map.buffer, 1421 map.buffer,
1420 }; 1422 };
1421 DownloadMemory(buffers, map.offset, copies); 1423 std::array offsets{
1424 map.offset,
1425 };
1426 DownloadMemory(buffers, offsets, copies);
1422} 1427}
1423 1428
1424bool Image::IsRescaled() const noexcept { 1429bool Image::IsRescaled() const noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 422476188..bdaf43ba4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -138,7 +138,7 @@ public:
138 void UploadMemory(const StagingBufferRef& map, 138 void UploadMemory(const StagingBufferRef& map,
139 std::span<const VideoCommon::BufferImageCopy> copies); 139 std::span<const VideoCommon::BufferImageCopy> copies);
140 140
141 void DownloadMemory(std::span<VkBuffer> buffers, VkDeviceSize offset, 141 void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets,
142 std::span<const VideoCommon::BufferImageCopy> copies); 142 std::span<const VideoCommon::BufferImageCopy> copies);
143 143
144 void DownloadMemory(const StagingBufferRef& map, 144 void DownloadMemory(const StagingBufferRef& map,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2cd5aa31e..63b8b5af5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -661,27 +661,40 @@ template <class P>
661void TextureCache<P>::CommitAsyncFlushes() { 661void TextureCache<P>::CommitAsyncFlushes() {
662 // This is intentionally passing the value by copy 662 // This is intentionally passing the value by copy
663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 663 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
664 const std::span<const ImageId> download_ids = uncommitted_downloads; 664 auto& download_ids = uncommitted_downloads;
665 if (download_ids.empty()) { 665 if (download_ids.empty()) {
666 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 666 committed_downloads.emplace_back(std::move(uncommitted_downloads));
667 uncommitted_downloads.clear(); 667 uncommitted_downloads.clear();
668 async_buffers.emplace_back(std::optional<AsyncBuffer>{}); 668 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
669 uncommitted_async_buffers.clear();
669 return; 670 return;
670 } 671 }
671 size_t total_size_bytes = 0; 672 size_t total_size_bytes = 0;
672 for (const ImageId image_id : download_ids) { 673 size_t last_async_buffer_id = uncommitted_async_buffers.size();
673 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 674 bool any_none_dma = false;
675 for (PendingDownload& download_info : download_ids) {
676 if (download_info.is_swizzle) {
677 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
678 any_none_dma = true;
679 download_info.async_buffer_id = last_async_buffer_id;
680 }
674 } 681 }
675 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); 682 if (any_none_dma) {
676 for (const ImageId image_id : download_ids) { 683 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true);
677 Image& image = slot_images[image_id]; 684 for (const PendingDownload& download_info : download_ids) {
678 const auto copies = FullDownloadCopies(image.info); 685 if (download_info.is_swizzle) {
679 image.DownloadMemory(download_map, copies); 686 Image& image = slot_images[download_info.object_id];
680 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); 687 const auto copies = FullDownloadCopies(image.info);
688 image.DownloadMemory(download_map, copies);
689 download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
690 }
691 }
692 uncommitted_async_buffers.emplace_back(download_map);
681 } 693 }
682 async_buffers.emplace_back(download_map);
683 } 694 }
684 committed_downloads.emplace_back(std::move(uncommitted_downloads)); 695 committed_downloads.emplace_back(std::move(uncommitted_downloads));
696 async_buffers.emplace_back(std::move(uncommitted_async_buffers));
697 uncommitted_async_buffers.clear();
685 uncommitted_downloads.clear(); 698 uncommitted_downloads.clear();
686} 699}
687 700
@@ -691,39 +704,57 @@ void TextureCache<P>::PopAsyncFlushes() {
691 return; 704 return;
692 } 705 }
693 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 706 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
694 const std::span<const ImageId> download_ids = committed_downloads.front(); 707 const auto& download_ids = committed_downloads.front();
695 if (download_ids.empty()) { 708 if (download_ids.empty()) {
696 committed_downloads.pop_front(); 709 committed_downloads.pop_front();
697 async_buffers.pop_front(); 710 async_buffers.pop_front();
698 return; 711 return;
699 } 712 }
700 auto download_map = *async_buffers.front(); 713 auto download_map = std::move(async_buffers.front());
701 std::span<u8> download_span = download_map.mapped_span;
702 for (size_t i = download_ids.size(); i > 0; i--) { 714 for (size_t i = download_ids.size(); i > 0; i--) {
703 const ImageBase& image = slot_images[download_ids[i - 1]]; 715 auto& download_info = download_ids[i - 1];
704 const auto copies = FullDownloadCopies(image.info); 716 auto& download_buffer = download_map[download_info.async_buffer_id];
705 download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); 717 if (download_info.is_swizzle) {
706 std::span<u8> download_span_alt = download_span.subspan(download_map.offset); 718 const ImageBase& image = slot_images[download_info.object_id];
707 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, 719 const auto copies = FullDownloadCopies(image.info);
708 swizzle_data_buffer); 720 download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
721 std::span<u8> download_span =
722 download_buffer.mapped_span.subspan(download_buffer.offset);
723 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
724 swizzle_data_buffer);
725 } else {
726 const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
727 std::span<u8> download_span =
728 download_buffer.mapped_span.subspan(download_buffer.offset);
729 gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
730 buffer_info.size);
731 slot_buffer_downloads.erase(download_info.object_id);
732 }
733 }
734 for (auto& download_buffer : download_map) {
735 runtime.FreeDeferredStagingBuffer(download_buffer);
709 } 736 }
710 runtime.FreeDeferredStagingBuffer(download_map);
711 committed_downloads.pop_front(); 737 committed_downloads.pop_front();
712 async_buffers.pop_front(); 738 async_buffers.pop_front();
713 } else { 739 } else {
714 const std::span<const ImageId> download_ids = committed_downloads.front(); 740 const auto& download_ids = committed_downloads.front();
715 if (download_ids.empty()) { 741 if (download_ids.empty()) {
716 committed_downloads.pop_front(); 742 committed_downloads.pop_front();
717 return; 743 return;
718 } 744 }
719 size_t total_size_bytes = 0; 745 size_t total_size_bytes = 0;
720 for (const ImageId image_id : download_ids) { 746 for (const PendingDownload& download_info : download_ids) {
721 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 747 if (download_info.is_swizzle) {
748 total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes;
749 }
722 } 750 }
723 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); 751 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
724 const size_t original_offset = download_map.offset; 752 const size_t original_offset = download_map.offset;
725 for (const ImageId image_id : download_ids) { 753 for (const PendingDownload& download_info : download_ids) {
726 Image& image = slot_images[image_id]; 754 if (download_info.is_swizzle) {
755 continue;
756 }
757 Image& image = slot_images[download_info.object_id];
727 const auto copies = FullDownloadCopies(image.info); 758 const auto copies = FullDownloadCopies(image.info);
728 image.DownloadMemory(download_map, copies); 759 image.DownloadMemory(download_map, copies);
729 download_map.offset += image.unswizzled_size_bytes; 760 download_map.offset += image.unswizzled_size_bytes;
@@ -732,8 +763,11 @@ void TextureCache<P>::PopAsyncFlushes() {
732 runtime.Finish(); 763 runtime.Finish();
733 download_map.offset = original_offset; 764 download_map.offset = original_offset;
734 std::span<u8> download_span = download_map.mapped_span; 765 std::span<u8> download_span = download_map.mapped_span;
735 for (const ImageId image_id : download_ids) { 766 for (const PendingDownload& download_info : download_ids) {
736 const ImageBase& image = slot_images[image_id]; 767 if (download_info.is_swizzle) {
768 continue;
769 }
770 const ImageBase& image = slot_images[download_info.object_id];
737 const auto copies = FullDownloadCopies(image.info); 771 const auto copies = FullDownloadCopies(image.info);
738 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, 772 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
739 swizzle_data_buffer); 773 swizzle_data_buffer);
@@ -836,11 +870,27 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm
836template <class P> 870template <class P>
837void TextureCache<P>::DownloadImageIntoBuffer( 871void TextureCache<P>::DownloadImageIntoBuffer(
838 typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer, 872 typename TextureCache<P>::Image* image, typename TextureCache<P>::BufferType buffer,
839 size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies) { 873 size_t buffer_offset, std::span<const VideoCommon::BufferImageCopy> copies, GPUVAddr address, size_t size) {
840 std::array buffers{ 874 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
841 buffer, 875 auto slot = slot_buffer_downloads.insert(address, size);
842 }; 876 uncommitted_downloads.emplace_back(false, uncommitted_async_buffers.size(), slot);
843 image->DownloadMemory(buffers, buffer_offset, copies); 877 auto download_map = runtime.DownloadStagingBuffer(size, true);
878 uncommitted_async_buffers.emplace_back(download_map);
879 std::array buffers{
880 buffer,
881 download_map.buffer,
882 };
883 std::array buffer_offsets{
884 buffer_offset,
885 download_map.offset,
886 };
887 image->DownloadMemory(buffers, buffer_offsets, copies);
888 } else {
889 std::array buffers{
890 buffer,
891 };
892 image->DownloadMemory(buffers, buffer_offset, copies);
893 }
844} 894}
845 895
846template <class P> 896template <class P>
@@ -2219,7 +2269,7 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id)
2219 if (new_id) { 2269 if (new_id) {
2220 const ImageViewBase& old_view = slot_image_views[new_id]; 2270 const ImageViewBase& old_view = slot_image_views[new_id];
2221 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { 2271 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
2222 uncommitted_downloads.push_back(old_view.image_id); 2272 uncommitted_downloads.emplace_back(true, 0, old_view.image_id);
2223 } 2273 }
2224 } 2274 }
2225 *old_id = new_id; 2275 *old_id = new_id;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 51f44aed5..d5bba3379 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -217,7 +217,8 @@ public:
217 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); 217 const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
218 218
219 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, 219 void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
220 std::span<const VideoCommon::BufferImageCopy> copies); 220 std::span<const VideoCommon::BufferImageCopy> copies,
221 GPUVAddr address = 0, size_t size = 0);
221 222
222 /// Return true when a CPU region is modified from the GPU 223 /// Return true when a CPU region is modified from the GPU
223 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 224 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
@@ -428,17 +429,31 @@ private:
428 u64 critical_memory; 429 u64 critical_memory;
429 size_t critical_gc; 430 size_t critical_gc;
430 431
432 struct BufferDownload {
433 GPUVAddr address;
434 size_t size;
435 };
436
437 struct PendingDownload {
438 bool is_swizzle;
439 size_t async_buffer_id;
440 SlotId object_id;
441 };
442
431 SlotVector<Image> slot_images; 443 SlotVector<Image> slot_images;
432 SlotVector<ImageMapView> slot_map_views; 444 SlotVector<ImageMapView> slot_map_views;
433 SlotVector<ImageView> slot_image_views; 445 SlotVector<ImageView> slot_image_views;
434 SlotVector<ImageAlloc> slot_image_allocs; 446 SlotVector<ImageAlloc> slot_image_allocs;
435 SlotVector<Sampler> slot_samplers; 447 SlotVector<Sampler> slot_samplers;
436 SlotVector<Framebuffer> slot_framebuffers; 448 SlotVector<Framebuffer> slot_framebuffers;
449 SlotVector<BufferDownload> slot_buffer_downloads;
437 450
438 // TODO: This data structure is not optimal and it should be reworked 451 // TODO: This data structure is not optimal and it should be reworked
439 std::vector<ImageId> uncommitted_downloads; 452
440 std::deque<std::vector<ImageId>> committed_downloads; 453 std::vector<PendingDownload> uncommitted_downloads;
441 std::deque<std::optional<AsyncBuffer>> async_buffers; 454 std::deque<std::vector<PendingDownload>> committed_downloads;
455 std::vector<AsyncBuffer> uncommitted_async_buffers;
456 std::deque<std::vector<AsyncBuffer>> async_buffers;
442 457
443 struct LRUItemParams { 458 struct LRUItemParams {
444 using ObjectType = ImageId; 459 using ObjectType = ImageId;