summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-04-22 13:36:18 +0200
committerGravatar Fernando Sahmkow2023-04-29 00:46:31 +0200
commitf2d3212de97ebed710bc03792343fae45b3203f3 (patch)
tree6faecb218f21dadc4eb128c8974228f0219beba0
parentBuffer Cache: Fully rework the buffer cache. (diff)
downloadyuzu-f2d3212de97ebed710bc03792343fae45b3203f3.tar.gz
yuzu-f2d3212de97ebed710bc03792343fae45b3203f3.tar.xz
yuzu-f2d3212de97ebed710bc03792343fae45b3203f3.zip
Buffer Cache rework: Setup async downloads.
Diffstat (limited to '')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h229
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h65
2 files changed, 154 insertions, 140 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index a0701ce4e..43fe5b080 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -11,6 +11,8 @@
11 11
12namespace VideoCommon { 12namespace VideoCommon {
13 13
14using Core::Memory::YUZU_PAGESIZE;
15
14template <class P> 16template <class P>
15BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
16 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
@@ -87,9 +89,11 @@ void BufferCache<P>::TickFrame() {
87template <class P> 89template <class P>
88void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 90void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
89 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); 91 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size);
90 const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; 92 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
91 ClearDownload(subtract_interval); 93 const IntervalType subtract_interval{cpu_addr, cpu_addr + size};
92 common_ranges.subtract(subtract_interval); 94 ClearDownload(subtract_interval);
95 common_ranges.subtract(subtract_interval);
96 }
93} 97}
94 98
95template <class P> 99template <class P>
@@ -102,17 +106,33 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
102 106
103template <class P> 107template <class P>
104void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 108void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
109 WaitOnAsyncFlushes(cpu_addr, size);
105 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 110 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
106 DownloadBufferMemory(buffer, cpu_addr, size); 111 DownloadBufferMemory(buffer, cpu_addr, size);
107 }); 112 });
108} 113}
109 114
110template <class P> 115template <class P>
116void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
117 bool must_wait = false;
118 ForEachInOverlapCounter(async_downloads, cpu_addr, size,
119 [&](VAddr, VAddr, int) { must_wait = true; });
120 bool must_release = false;
121 ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; });
122 if (must_release) {
123 std::function<void()> tmp([]() {});
124 rasterizer.SignalFence(std::move(tmp));
125 }
126 if (must_wait || must_release) {
127 rasterizer.ReleaseFences();
128 }
129}
130
131template <class P>
111void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 132void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
133 async_downloads -= std::make_pair(subtract_interval, std::numeric_limits<int>::max());
112 uncommitted_ranges.subtract(subtract_interval); 134 uncommitted_ranges.subtract(subtract_interval);
113 for (auto& interval_set : async_downloads) { 135 pending_ranges.subtract(subtract_interval);
114 interval_set.subtract(subtract_interval);
115 }
116 for (auto& interval_set : committed_ranges) { 136 for (auto& interval_set : committed_ranges) {
117 interval_set.subtract(subtract_interval); 137 interval_set.subtract(subtract_interval);
118 } 138 }
@@ -132,6 +152,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
132 } 152 }
133 153
134 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 154 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
155 WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount));
135 ClearDownload(subtract_interval); 156 ClearDownload(subtract_interval);
136 157
137 BufferId buffer_a; 158 BufferId buffer_a;
@@ -162,6 +183,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
162 tmp_intervals.push_back(add_interval); 183 tmp_intervals.push_back(add_interval);
163 if (is_high_accuracy) { 184 if (is_high_accuracy) {
164 uncommitted_ranges.add(add_interval); 185 uncommitted_ranges.add(add_interval);
186 pending_ranges.add(add_interval);
165 } 187 }
166 }; 188 };
167 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); 189 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
@@ -413,18 +435,15 @@ template <class P>
413void BufferCache<P>::FlushCachedWrites() { 435void BufferCache<P>::FlushCachedWrites() {
414 cached_write_buffer_ids.clear(); 436 cached_write_buffer_ids.clear();
415 memory_tracker.FlushCachedWrites(); 437 memory_tracker.FlushCachedWrites();
416 /*for (auto& interval : cached_ranges) { 438 for (auto& interval : cached_ranges) {
417 VAddr cpu_addr = interval.lower(); 439 ClearDownload(interval);
418 const std::size_t size = interval.upper() - interval.lower(); 440 }
419 memory_tracker.FlushCachedWrites(cpu_addr, size);
420 // common_ranges.subtract(interval);
421 }*/
422 cached_ranges.clear(); 441 cached_ranges.clear();
423} 442}
424 443
425template <class P> 444template <class P>
426bool BufferCache<P>::HasUncommittedFlushes() const noexcept { 445bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
427 return !uncommitted_ranges.empty() || !committed_ranges.empty() || !pending_queries.empty(); 446 return !uncommitted_ranges.empty() || !committed_ranges.empty();
428} 447}
429 448
430template <class P> 449template <class P>
@@ -437,8 +456,11 @@ void BufferCache<P>::AccumulateFlushes() {
437 456
438template <class P> 457template <class P>
439bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 458bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
440 return (!async_buffers.empty() && async_buffers.front().has_value()) || 459 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
441 (!query_async_buffers.empty() && query_async_buffers.front().has_value()); 460 return (!async_buffers.empty() && async_buffers.front().has_value());
461 } else {
462 return false;
463 }
442} 464}
443 465
444template <class P> 466template <class P>
@@ -446,11 +468,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
446 AccumulateFlushes(); 468 AccumulateFlushes();
447 469
448 if (committed_ranges.empty()) { 470 if (committed_ranges.empty()) {
449 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 471 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
472 async_buffers.emplace_back(std::optional<Async_Buffer>{});
473 }
450 return; 474 return;
451 } 475 }
452 MICROPROFILE_SCOPE(GPU_DownloadMemory); 476 MICROPROFILE_SCOPE(GPU_DownloadMemory);
453 477
478 pending_ranges.clear();
454 auto it = committed_ranges.begin(); 479 auto it = committed_ranges.begin();
455 while (it != committed_ranges.end()) { 480 while (it != committed_ranges.end()) {
456 auto& current_intervals = *it; 481 auto& current_intervals = *it;
@@ -491,7 +516,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
491 buffer_id, 516 buffer_id,
492 }); 517 });
493 // Align up to avoid cache conflicts 518 // Align up to avoid cache conflicts
494 constexpr u64 align = 8ULL; 519 constexpr u64 align = 64ULL;
495 constexpr u64 mask = ~(align - 1ULL); 520 constexpr u64 mask = ~(align - 1ULL);
496 total_size_bytes += (new_size + align - 1) & mask; 521 total_size_bytes += (new_size + align - 1) & mask;
497 largest_copy = std::max(largest_copy, new_size); 522 largest_copy = std::max(largest_copy, new_size);
@@ -504,7 +529,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
504 } 529 }
505 committed_ranges.clear(); 530 committed_ranges.clear();
506 if (downloads.empty()) { 531 if (downloads.empty()) {
507 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 532 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
533 async_buffers.emplace_back(std::optional<Async_Buffer>{});
534 }
508 return; 535 return;
509 } 536 }
510 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 537 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
@@ -520,99 +547,54 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
520 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 547 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
521 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 548 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
522 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 549 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
523 new_async_range.add(base_interval); 550 async_downloads += std::make_pair(base_interval, 1);
524 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 551 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
525 normalized_copies.push_back(second_copy); 552 normalized_copies.push_back(second_copy);
526 } 553 }
527 async_downloads.emplace_back(std::move(new_async_range)); 554 runtime.PostCopyBarrier();
528 pending_downloads.emplace_back(std::move(normalized_copies)); 555 pending_downloads.emplace_back(std::move(normalized_copies));
529 async_buffers.emplace_back(download_staging); 556 async_buffers.emplace_back(download_staging);
530 } else { 557 } else {
531 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 558 if constexpr (USE_MEMORY_MAPS) {
532 for (const auto& [copy, buffer_id] : downloads) { 559 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
533 Buffer& buffer = slot_buffers[buffer_id]; 560 runtime.PreCopyBarrier();
534 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 561 for (auto& [copy, buffer_id] : downloads) {
535 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 562 // Have in mind the staging buffer offset for the copy
536 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 563 copy.dst_offset += download_staging.offset;
537 } 564 const std::array copies{copy};
538 } 565 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
539} 566 }
540 567 runtime.PostCopyBarrier();
541template <class P> 568 runtime.Finish();
542void BufferCache<P>::CommitAsyncQueries() { 569 for (const auto& [copy, buffer_id] : downloads) {
543 if (pending_queries.empty()) { 570 const Buffer& buffer = slot_buffers[buffer_id];
544 query_async_buffers.emplace_back(std::optional<Async_Buffer>{}); 571 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
545 return; 572 // Undo the modified offset
546 } 573 const u64 dst_offset = copy.dst_offset - download_staging.offset;
547 574 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
548 MICROPROFILE_SCOPE(GPU_DownloadMemory); 575 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
549 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 8> downloads; 576 }
550 u64 total_size_bytes = 0; 577 } else {
551 u64 largest_copy = 0; 578 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
552 do { 579 for (const auto& [copy, buffer_id] : downloads) {
553 has_deleted_buffers = false; 580 Buffer& buffer = slot_buffers[buffer_id];
554 downloads.clear(); 581 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
555 total_size_bytes = 0; 582 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
556 largest_copy = 0; 583 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
557 for (const auto& query_info : pending_queries) {
558 const std::size_t size = query_info.second;
559 const VAddr cpu_addr = query_info.first;
560 const BufferId buffer_id = FindBuffer(cpu_addr, static_cast<u32>(size));
561 Buffer& buffer = slot_buffers[buffer_id];
562 if (has_deleted_buffers) {
563 break;
564 } 584 }
565 downloads.push_back({
566 BufferCopy{
567 .src_offset = buffer.Offset(cpu_addr),
568 .dst_offset = total_size_bytes,
569 .size = size,
570 },
571 buffer_id,
572 });
573 constexpr u64 align = 8ULL;
574 constexpr u64 mask = ~(align - 1ULL);
575 total_size_bytes += (size + align - 1) & mask;
576 largest_copy = std::max(largest_copy, size);
577 }
578 } while (has_deleted_buffers);
579 pending_queries.clear();
580 if (downloads.empty()) {
581 query_async_buffers.push_back(std::optional<Async_Buffer>{});
582 return;
583 }
584 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
585 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
586 boost::container::small_vector<BufferCopy, 8> normalized_copies;
587 runtime.PreCopyBarrier();
588 for (auto& [copy, buffer_id] : downloads) {
589 // Have in mind the staging buffer offset for the copy
590 copy.dst_offset += download_staging.offset;
591 const std::array copies{copy};
592 const Buffer& buffer = slot_buffers[buffer_id];
593 BufferCopy second_copy{copy};
594 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + second_copy.src_offset;
595 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
596 normalized_copies.push_back(second_copy);
597 } 585 }
598 committed_queries.emplace_back(std::move(normalized_copies));
599 query_async_buffers.emplace_back(download_staging);
600 } else {
601 query_async_buffers.push_back(std::optional<Async_Buffer>{});
602 } 586 }
603} 587}
604 588
605template <class P> 589template <class P>
606void BufferCache<P>::CommitAsyncFlushes() { 590void BufferCache<P>::CommitAsyncFlushes() {
607 CommitAsyncFlushesHigh(); 591 CommitAsyncFlushesHigh();
608 CommitAsyncQueries();
609} 592}
610 593
611template <class P> 594template <class P>
612void BufferCache<P>::PopAsyncFlushes() { 595void BufferCache<P>::PopAsyncFlushes() {
613 MICROPROFILE_SCOPE(GPU_DownloadMemory); 596 MICROPROFILE_SCOPE(GPU_DownloadMemory);
614 PopAsyncBuffers(); 597 PopAsyncBuffers();
615 PopAsyncQueries();
616} 598}
617 599
618template <class P> 600template <class P>
@@ -627,59 +609,34 @@ void BufferCache<P>::PopAsyncBuffers() {
627 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 609 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
628 auto& downloads = pending_downloads.front(); 610 auto& downloads = pending_downloads.front();
629 auto& async_buffer = async_buffers.front(); 611 auto& async_buffer = async_buffers.front();
630 auto& async_range = async_downloads.front();
631 u8* base = async_buffer->mapped_span.data(); 612 u8* base = async_buffer->mapped_span.data();
632 const size_t base_offset = async_buffer->offset; 613 const size_t base_offset = async_buffer->offset;
633 for (const auto& copy : downloads) { 614 for (const auto& copy : downloads) {
634 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); 615 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset);
635 const u64 dst_offset = copy.dst_offset - base_offset; 616 const u64 dst_offset = copy.dst_offset - base_offset;
636 const u8* read_mapped_memory = base + dst_offset; 617 const u8* read_mapped_memory = base + dst_offset;
637 ForEachInRangeSet(async_range, cpu_addr, copy.size, [&](VAddr start, VAddr end) { 618 ForEachInOverlapCounter(
638 const size_t diff = start - cpu_addr; 619 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) {
639 const size_t new_size = end - start; 620 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr],
640 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[diff], new_size); 621 end - start);
641 const IntervalType base_interval{start, end}; 622 if (count == 1) {
642 common_ranges.subtract(base_interval); 623 const IntervalType base_interval{start, end};
643 }); 624 common_ranges.subtract(base_interval);
625 }
626 });
627 async_downloads -= std::make_pair(IntervalType(cpu_addr, cpu_addr + copy.size), 1);
644 } 628 }
645 runtime.FreeDeferredStagingBuffer(*async_buffer); 629 runtime.FreeDeferredStagingBuffer(*async_buffer);
646 async_buffers.pop_front(); 630 async_buffers.pop_front();
647 pending_downloads.pop_front(); 631 pending_downloads.pop_front();
648 async_downloads.pop_front();
649 }
650}
651
652template <class P>
653void BufferCache<P>::PopAsyncQueries() {
654 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
655 if (query_async_buffers.empty()) {
656 return;
657 }
658 if (!query_async_buffers.front().has_value()) {
659 query_async_buffers.pop_front();
660 return;
661 }
662 auto& downloads = committed_queries.front();
663 auto& async_buffer = query_async_buffers.front();
664 flushed_queries.clear();
665 u8* base = async_buffer->mapped_span.data();
666 const size_t base_offset = async_buffer->offset;
667 for (const auto& copy : downloads) {
668 const size_t dst_offset = copy.dst_offset - base_offset;
669 const u8* read_mapped_memory = base + dst_offset;
670 u64 new_value{};
671 std::memcpy(&new_value, read_mapped_memory, copy.size);
672 flushed_queries.push_back(new_value);
673 }
674 runtime.FreeDeferredStagingBuffer(*async_buffer);
675 committed_queries.pop_front();
676 query_async_buffers.pop_front();
677 } 632 }
678} 633}
679 634
680template <class P> 635template <class P>
681bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 636bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
682 return memory_tracker.IsRegionGpuModified(addr, size); 637 bool is_dirty = false;
638 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; });
639 return is_dirty;
683} 640}
684 641
685template <class P> 642template <class P>
@@ -1232,16 +1189,18 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1232} 1189}
1233 1190
1234template <class P> 1191template <class P>
1235void BufferCache<P>::MarkWrittenBuffer(BufferId, VAddr cpu_addr, u32 size) { 1192void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
1236 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); 1193 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
1237 1194
1195 if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
1196 SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
1197 }
1198
1238 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1199 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1239 common_ranges.add(base_interval); 1200 common_ranges.add(base_interval);
1240 for (auto& interval_set : async_downloads) {
1241 interval_set.subtract(base_interval);
1242 }
1243 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 1201 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
1244 uncommitted_ranges.add(base_interval); 1202 uncommitted_ranges.add(base_interval);
1203 pending_ranges.add(base_interval);
1245 } 1204 }
1246} 1205}
1247 1206
@@ -1530,7 +1489,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1530 if (!is_dirty) { 1489 if (!is_dirty) {
1531 return false; 1490 return false;
1532 } 1491 }
1533 if (!IsRegionGpuModified(dest_address, copy_size)) { 1492 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1493 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1494 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1534 return false; 1495 return false;
1535 } 1496 }
1536 1497
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 4b3677da3..6f29cba25 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -17,6 +17,7 @@
17#include <boost/pool/detail/mutex.hpp> 17#include <boost/pool/detail/mutex.hpp>
18#undef BOOST_NO_MT 18#undef BOOST_NO_MT
19#include <boost/icl/interval_set.hpp> 19#include <boost/icl/interval_set.hpp>
20#include <boost/icl/split_interval_map.hpp>
20#include <boost/pool/pool.hpp> 21#include <boost/pool/pool.hpp>
21#include <boost/pool/pool_alloc.hpp> 22#include <boost/pool/pool_alloc.hpp>
22 23
@@ -44,8 +45,7 @@
44 45
45namespace boost { 46namespace boost {
46template <typename T> 47template <typename T>
47class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::default_mutex, 4096, 48class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
48 0>;
49} 49}
50 50
51namespace VideoCommon { 51namespace VideoCommon {
@@ -123,6 +123,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
123 boost::icl::interval_set<VAddr, IntervalCompare, IntervalInstance, IntervalAllocator>; 123 boost::icl::interval_set<VAddr, IntervalCompare, IntervalInstance, IntervalAllocator>;
124 using IntervalType = typename IntervalSet::interval_type; 124 using IntervalType = typename IntervalSet::interval_type;
125 125
126 template <typename Type>
127 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
128 // types
129 typedef counter_add_functor<Type> type;
130 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
131
132 // public member functions
133 void operator()(Type& current, const Type& added) const {
134 current += added;
135 if (current < base_type::identity_element()) {
136 current = base_type::identity_element();
137 }
138 }
139
140 // public static functions
141 static void version(Type&){};
142 };
143
144 using OverlapCombine = ICL_COMBINE_INSTANCE(counter_add_functor, int);
145 using OverlapSection = ICL_SECTION_INSTANCE(boost::icl::inter_section, int);
146 using OverlapCounter =
147 boost::icl::split_interval_map<VAddr, int, boost::icl::partial_absorber, IntervalCompare,
148 OverlapCombine, OverlapSection, IntervalInstance,
149 IntervalAllocator>;
150
126 struct Empty {}; 151 struct Empty {};
127 152
128 struct OverlapResult { 153 struct OverlapResult {
@@ -219,12 +244,9 @@ public:
219 /// Commit asynchronous downloads 244 /// Commit asynchronous downloads
220 void CommitAsyncFlushes(); 245 void CommitAsyncFlushes();
221 void CommitAsyncFlushesHigh(); 246 void CommitAsyncFlushesHigh();
222 void CommitAsyncQueries();
223 247
224 /// Pop asynchronous downloads 248 /// Pop asynchronous downloads
225 void PopAsyncFlushes(); 249 void PopAsyncFlushes();
226
227 void PopAsyncQueries();
228 void PopAsyncBuffers(); 250 void PopAsyncBuffers();
229 251
230 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); 252 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
@@ -302,6 +324,34 @@ private:
302 } 324 }
303 } 325 }
304 326
327 template <typename Func>
328 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
329 Func&& func) {
330 const VAddr start_address = cpu_addr;
331 const VAddr end_address = start_address + size;
332 const IntervalType search_interval{start_address, end_address};
333 auto it = current_range.lower_bound(search_interval);
334 if (it == current_range.end()) {
335 return;
336 }
337 auto end_it = current_range.upper_bound(search_interval);
338 for (; it != end_it; it++) {
339 auto& inter = it->first;
340 VAddr inter_addr_end = inter.upper();
341 VAddr inter_addr = inter.lower();
342 if (inter_addr_end > end_address) {
343 inter_addr_end = end_address;
344 }
345 if (inter_addr < start_address) {
346 inter_addr = start_address;
347 }
348 if (it->second <= 0) {
349 __debugbreak();
350 }
351 func(inter_addr, inter_addr_end, it->second);
352 }
353 }
354
305 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 355 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
306 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == 356 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
307 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); 357 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
@@ -309,6 +359,8 @@ private:
309 359
310 void RunGarbageCollector(); 360 void RunGarbageCollector();
311 361
362 void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size);
363
312 void BindHostIndexBuffer(); 364 void BindHostIndexBuffer();
313 365
314 void BindHostVertexBuffers(); 366 void BindHostVertexBuffers();
@@ -474,10 +526,11 @@ private:
474 IntervalSet uncommitted_ranges; 526 IntervalSet uncommitted_ranges;
475 IntervalSet common_ranges; 527 IntervalSet common_ranges;
476 IntervalSet cached_ranges; 528 IntervalSet cached_ranges;
529 IntervalSet pending_ranges;
477 std::deque<IntervalSet> committed_ranges; 530 std::deque<IntervalSet> committed_ranges;
478 531
479 // Async Buffers 532 // Async Buffers
480 std::deque<IntervalSet> async_downloads; 533 OverlapCounter async_downloads;
481 std::deque<std::optional<Async_Buffer>> async_buffers; 534 std::deque<std::optional<Async_Buffer>> async_buffers;
482 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; 535 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
483 std::optional<Async_Buffer> current_buffer; 536 std::optional<Async_Buffer> current_buffer;