summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2024-02-04 19:16:07 +0100
committerGravatar Fernando Sahmkow2024-02-05 11:06:52 +0100
commit0d5a3abeaefd3a1682c48a59c5a9170cfb0a39d0 (patch)
tree850cfea521da30809e93d4ab2ce69f649961a9a1 /src/video_core/buffer_cache
parentNVDRV: Refactor HeapMapper to use RangeSets (diff)
downloadyuzu-0d5a3abeaefd3a1682c48a59c5a9170cfb0a39d0.tar.gz
yuzu-0d5a3abeaefd3a1682c48a59c5a9170cfb0a39d0.tar.xz
yuzu-0d5a3abeaefd3a1682c48a59c5a9170cfb0a39d0.zip
Buffer Cache: Refactor to use Range sets instead
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h250
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h131
2 files changed, 103 insertions, 278 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b4bf369d1..6d3d933c5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -7,6 +7,7 @@
7#include <memory> 7#include <memory>
8#include <numeric> 8#include <numeric>
9 9
10#include "common/range_sets.inc"
10#include "video_core/buffer_cache/buffer_cache_base.h" 11#include "video_core/buffer_cache/buffer_cache_base.h"
11#include "video_core/guest_memory.h" 12#include "video_core/guest_memory.h"
12#include "video_core/host1x/gpu_device_memory_manager.h" 13#include "video_core/host1x/gpu_device_memory_manager.h"
@@ -20,7 +21,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
20 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { 21 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
21 // Ensure the first slot is used for the null buffer 22 // Ensure the first slot is used for the null buffer
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 23 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 24 gpu_modified_ranges.Clear();
24 inline_buffer_id = NULL_BUFFER_ID; 25 inline_buffer_id = NULL_BUFFER_ID;
25 26
26 if (!runtime.CanReportMemoryUsage()) { 27 if (!runtime.CanReportMemoryUsage()) {
@@ -44,6 +45,9 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
44} 45}
45 46
46template <class P> 47template <class P>
48BufferCache<P>::~BufferCache() = default;
49
50template <class P>
47void BufferCache<P>::RunGarbageCollector() { 51void BufferCache<P>::RunGarbageCollector() {
48 const bool aggressive_gc = total_used_memory >= critical_memory; 52 const bool aggressive_gc = total_used_memory >= critical_memory;
49 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; 53 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
@@ -96,20 +100,17 @@ void BufferCache<P>::TickFrame() {
96 ++frame_tick; 100 ++frame_tick;
97 delayed_destruction_ring.Tick(); 101 delayed_destruction_ring.Tick();
98 102
99 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 103 for (auto& buffer : async_buffers_death_ring) {
100 for (auto& buffer : async_buffers_death_ring) { 104 runtime.FreeDeferredStagingBuffer(buffer);
101 runtime.FreeDeferredStagingBuffer(buffer);
102 }
103 async_buffers_death_ring.clear();
104 } 105 }
106 async_buffers_death_ring.clear();
105} 107}
106 108
107template <class P> 109template <class P>
108void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { 110void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
109 if (memory_tracker.IsRegionGpuModified(device_addr, size)) { 111 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
110 const IntervalType subtract_interval{device_addr, device_addr + size}; 112 ClearDownload(device_addr, size);
111 ClearDownload(subtract_interval); 113 gpu_modified_ranges.Subtract(device_addr, size);
112 common_ranges.subtract(subtract_interval);
113 } 114 }
114 memory_tracker.MarkRegionAsCpuModified(device_addr, size); 115 memory_tracker.MarkRegionAsCpuModified(device_addr, size);
115} 116}
@@ -174,11 +175,11 @@ void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
174} 175}
175 176
176template <class P> 177template <class P>
177void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 178void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) {
178 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); 179 async_downloads.DeleteAll(device_addr, size);
179 uncommitted_ranges.subtract(subtract_interval); 180 uncommitted_gpu_modified_ranges.Subtract(device_addr, size);
180 for (auto& interval_set : committed_ranges) { 181 for (auto& interval_set : committed_gpu_modified_ranges) {
181 interval_set.subtract(subtract_interval); 182 interval_set.Subtract(device_addr, size);
182 } 183 }
183} 184}
184 185
@@ -195,8 +196,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
195 return false; 196 return false;
196 } 197 }
197 198
198 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 199 ClearDownload(*cpu_dest_address, amount);
199 ClearDownload(subtract_interval);
200 200
201 BufferId buffer_a; 201 BufferId buffer_a;
202 BufferId buffer_b; 202 BufferId buffer_b;
@@ -215,21 +215,20 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
215 .size = amount, 215 .size = amount,
216 }}; 216 }};
217 217
218 boost::container::small_vector<IntervalType, 4> tmp_intervals; 218 boost::container::small_vector<std::pair<DAddr, size_t>, 4> tmp_intervals;
219 auto mirror = [&](DAddr base_address, DAddr base_address_end) { 219 auto mirror = [&](DAddr base_address, DAddr base_address_end) {
220 const u64 size = base_address_end - base_address; 220 const u64 size = base_address_end - base_address;
221 const DAddr diff = base_address - *cpu_src_address; 221 const DAddr diff = base_address - *cpu_src_address;
222 const DAddr new_base_address = *cpu_dest_address + diff; 222 const DAddr new_base_address = *cpu_dest_address + diff;
223 const IntervalType add_interval{new_base_address, new_base_address + size}; 223 tmp_intervals.push_back({new_base_address, size});
224 tmp_intervals.push_back(add_interval); 224 uncommitted_gpu_modified_ranges.Add(new_base_address, size);
225 uncommitted_ranges.add(add_interval);
226 }; 225 };
227 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); 226 gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror);
228 // This subtraction in this order is important for overlapping copies. 227 // This subtraction in this order is important for overlapping copies.
229 common_ranges.subtract(subtract_interval); 228 gpu_modified_ranges.Subtract(*cpu_dest_address, amount);
230 const bool has_new_downloads = tmp_intervals.size() != 0; 229 const bool has_new_downloads = tmp_intervals.size() != 0;
231 for (const IntervalType& add_interval : tmp_intervals) { 230 for (const auto& pair : tmp_intervals) {
232 common_ranges.add(add_interval); 231 gpu_modified_ranges.Add(pair.first, pair.second);
233 } 232 }
234 const auto& copy = copies[0]; 233 const auto& copy = copies[0];
235 src_buffer.MarkUsage(copy.src_offset, copy.size); 234 src_buffer.MarkUsage(copy.src_offset, copy.size);
@@ -257,9 +256,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
257 } 256 }
258 257
259 const size_t size = amount * sizeof(u32); 258 const size_t size = amount * sizeof(u32);
260 const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; 259 ClearDownload(*cpu_dst_address, size);
261 ClearDownload(subtract_interval); 260 gpu_modified_ranges.Subtract(*cpu_dst_address, size);
262 common_ranges.subtract(subtract_interval);
263 261
264 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); 262 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
265 Buffer& dest_buffer = slot_buffers[buffer]; 263 Buffer& dest_buffer = slot_buffers[buffer];
@@ -300,11 +298,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
300 MarkWrittenBuffer(buffer_id, device_addr, size); 298 MarkWrittenBuffer(buffer_id, device_addr, size);
301 break; 299 break;
302 case ObtainBufferOperation::DiscardWrite: { 300 case ObtainBufferOperation::DiscardWrite: {
303 DAddr device_addr_start = Common::AlignDown(device_addr, 64); 301 const DAddr device_addr_start = Common::AlignDown(device_addr, 64);
304 DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); 302 const DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
305 IntervalType interval{device_addr_start, device_addr_end}; 303 const size_t new_size = device_addr_end - device_addr_start;
306 ClearDownload(interval); 304 ClearDownload(device_addr_start, new_size);
307 common_ranges.subtract(interval); 305 gpu_modified_ranges.Subtract(device_addr_start, new_size);
308 break; 306 break;
309 } 307 }
310 default: 308 default:
@@ -504,46 +502,40 @@ void BufferCache<P>::FlushCachedWrites() {
504 502
505template <class P> 503template <class P>
506bool BufferCache<P>::HasUncommittedFlushes() const noexcept { 504bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
507 return !uncommitted_ranges.empty() || !committed_ranges.empty(); 505 return !uncommitted_gpu_modified_ranges.Empty() || !committed_gpu_modified_ranges.empty();
508} 506}
509 507
510template <class P> 508template <class P>
511void BufferCache<P>::AccumulateFlushes() { 509void BufferCache<P>::AccumulateFlushes() {
512 if (uncommitted_ranges.empty()) { 510 if (uncommitted_gpu_modified_ranges.Empty()) {
513 return; 511 return;
514 } 512 }
515 committed_ranges.emplace_back(std::move(uncommitted_ranges)); 513 committed_gpu_modified_ranges.emplace_back(std::move(uncommitted_gpu_modified_ranges));
516} 514}
517 515
518template <class P> 516template <class P>
519bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 517bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
520 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 518 return (!async_buffers.empty() && async_buffers.front().has_value());
521 return (!async_buffers.empty() && async_buffers.front().has_value());
522 } else {
523 return false;
524 }
525} 519}
526 520
527template <class P> 521template <class P>
528void BufferCache<P>::CommitAsyncFlushesHigh() { 522void BufferCache<P>::CommitAsyncFlushesHigh() {
529 AccumulateFlushes(); 523 AccumulateFlushes();
530 524
531 if (committed_ranges.empty()) { 525 if (committed_gpu_modified_ranges.empty()) {
532 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 526 async_buffers.emplace_back(std::optional<Async_Buffer>{});
533 async_buffers.emplace_back(std::optional<Async_Buffer>{});
534 }
535 return; 527 return;
536 } 528 }
537 MICROPROFILE_SCOPE(GPU_DownloadMemory); 529 MICROPROFILE_SCOPE(GPU_DownloadMemory);
538 530
539 auto it = committed_ranges.begin(); 531 auto it = committed_gpu_modified_ranges.begin();
540 while (it != committed_ranges.end()) { 532 while (it != committed_gpu_modified_ranges.end()) {
541 auto& current_intervals = *it; 533 auto& current_intervals = *it;
542 auto next_it = std::next(it); 534 auto next_it = std::next(it);
543 while (next_it != committed_ranges.end()) { 535 while (next_it != committed_gpu_modified_ranges.end()) {
544 for (auto& interval : *next_it) { 536 next_it->ForEach([&current_intervals](DAddr start, DAddr end) {
545 current_intervals.subtract(interval); 537 current_intervals.Subtract(start, end - start);
546 } 538 });
547 next_it++; 539 next_it++;
548 } 540 }
549 it++; 541 it++;
@@ -552,10 +544,10 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
552 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; 544 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
553 u64 total_size_bytes = 0; 545 u64 total_size_bytes = 0;
554 u64 largest_copy = 0; 546 u64 largest_copy = 0;
555 for (const IntervalSet& intervals : committed_ranges) { 547 for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) {
556 for (auto& interval : intervals) { 548 range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) {
557 const std::size_t size = interval.upper() - interval.lower(); 549 const std::size_t size = interval_upper - interval_lower;
558 const DAddr device_addr = interval.lower(); 550 const DAddr device_addr = interval_lower;
559 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 551 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
560 const DAddr buffer_start = buffer.CpuAddr(); 552 const DAddr buffer_start = buffer.CpuAddr();
561 const DAddr buffer_end = buffer_start + buffer.SizeBytes(); 553 const DAddr buffer_end = buffer_start + buffer.SizeBytes();
@@ -583,77 +575,35 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
583 largest_copy = std::max(largest_copy, new_size); 575 largest_copy = std::max(largest_copy, new_size);
584 }; 576 };
585 577
586 ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); 578 gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
579 add_download);
587 }); 580 });
588 }); 581 });
589 } 582 });
590 } 583 }
591 committed_ranges.clear(); 584 committed_gpu_modified_ranges.clear();
592 if (downloads.empty()) { 585 if (downloads.empty()) {
593 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 586 async_buffers.emplace_back(std::optional<Async_Buffer>{});
594 async_buffers.emplace_back(std::optional<Async_Buffer>{});
595 }
596 return; 587 return;
597 } 588 }
598 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 589 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
599 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); 590 boost::container::small_vector<BufferCopy, 4> normalized_copies;
600 boost::container::small_vector<BufferCopy, 4> normalized_copies; 591 runtime.PreCopyBarrier();
601 IntervalSet new_async_range{}; 592 for (auto& [copy, buffer_id] : downloads) {
602 runtime.PreCopyBarrier(); 593 copy.dst_offset += download_staging.offset;
603 for (auto& [copy, buffer_id] : downloads) { 594 const std::array copies{copy};
604 copy.dst_offset += download_staging.offset; 595 BufferCopy second_copy{copy};
605 const std::array copies{copy}; 596 Buffer& buffer = slot_buffers[buffer_id];
606 BufferCopy second_copy{copy}; 597 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
607 Buffer& buffer = slot_buffers[buffer_id]; 598 const DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
608 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 599 async_downloads.Add(orig_device_addr, copy.size);
609 DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); 600 buffer.MarkUsage(copy.src_offset, copy.size);
610 const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; 601 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
611 async_downloads += std::make_pair(base_interval, 1); 602 normalized_copies.push_back(second_copy);
612 buffer.MarkUsage(copy.src_offset, copy.size);
613 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
614 normalized_copies.push_back(second_copy);
615 }
616 runtime.PostCopyBarrier();
617 pending_downloads.emplace_back(std::move(normalized_copies));
618 async_buffers.emplace_back(download_staging);
619 } else {
620 if (!Settings::IsGPULevelHigh()) {
621 committed_ranges.clear();
622 uncommitted_ranges.clear();
623 } else {
624 if constexpr (USE_MEMORY_MAPS) {
625 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
626 runtime.PreCopyBarrier();
627 for (auto& [copy, buffer_id] : downloads) {
628 // Have in mind the staging buffer offset for the copy
629 copy.dst_offset += download_staging.offset;
630 const std::array copies{copy};
631 Buffer& buffer = slot_buffers[buffer_id];
632 buffer.MarkUsage(copy.src_offset, copy.size);
633 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
634 }
635 runtime.PostCopyBarrier();
636 runtime.Finish();
637 for (const auto& [copy, buffer_id] : downloads) {
638 const Buffer& buffer = slot_buffers[buffer_id];
639 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
640 // Undo the modified offset
641 const u64 dst_offset = copy.dst_offset - download_staging.offset;
642 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
643 device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
644 }
645 } else {
646 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
647 for (const auto& [copy, buffer_id] : downloads) {
648 Buffer& buffer = slot_buffers[buffer_id];
649 buffer.ImmediateDownload(copy.src_offset,
650 immediate_buffer.subspan(0, copy.size));
651 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
652 device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
653 }
654 }
655 }
656 } 603 }
604 runtime.PostCopyBarrier();
605 pending_downloads.emplace_back(std::move(normalized_copies));
606 async_buffers.emplace_back(download_staging);
657} 607}
658 608
659template <class P> 609template <class P>
@@ -676,37 +626,31 @@ void BufferCache<P>::PopAsyncBuffers() {
676 async_buffers.pop_front(); 626 async_buffers.pop_front();
677 return; 627 return;
678 } 628 }
679 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 629 auto& downloads = pending_downloads.front();
680 auto& downloads = pending_downloads.front(); 630 auto& async_buffer = async_buffers.front();
681 auto& async_buffer = async_buffers.front(); 631 u8* base = async_buffer->mapped_span.data();
682 u8* base = async_buffer->mapped_span.data(); 632 const size_t base_offset = async_buffer->offset;
683 const size_t base_offset = async_buffer->offset; 633 for (const auto& copy : downloads) {
684 for (const auto& copy : downloads) { 634 const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
685 const DAddr device_addr = static_cast<DAddr>(copy.src_offset); 635 const u64 dst_offset = copy.dst_offset - base_offset;
686 const u64 dst_offset = copy.dst_offset - base_offset; 636 const u8* read_mapped_memory = base + dst_offset;
687 const u8* read_mapped_memory = base + dst_offset; 637 async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) {
688 ForEachInOverlapCounter( 638 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
689 async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { 639 end - start);
690 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], 640 });
691 end - start); 641 async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) {
692 if (count == 1) { 642 gpu_modified_ranges.Subtract(start, end - start);
693 const IntervalType base_interval{start, end}; 643 });
694 common_ranges.subtract(base_interval);
695 }
696 });
697 const IntervalType subtract_interval{device_addr, device_addr + copy.size};
698 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
699 }
700 async_buffers_death_ring.emplace_back(*async_buffer);
701 async_buffers.pop_front();
702 pending_downloads.pop_front();
703 } 644 }
645 async_buffers_death_ring.emplace_back(*async_buffer);
646 async_buffers.pop_front();
647 pending_downloads.pop_front();
704} 648}
705 649
706template <class P> 650template <class P>
707bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { 651bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
708 bool is_dirty = false; 652 bool is_dirty = false;
709 ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); 653 gpu_modified_ranges.ForEachInRange(addr, size, [&](DAddr, DAddr) { is_dirty = true; });
710 return is_dirty; 654 return is_dirty;
711} 655}
712 656
@@ -1320,10 +1264,8 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1320template <class P> 1264template <class P>
1321void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { 1265void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
1322 memory_tracker.MarkRegionAsGpuModified(device_addr, size); 1266 memory_tracker.MarkRegionAsGpuModified(device_addr, size);
1323 1267 gpu_modified_ranges.Add(device_addr, size);
1324 const IntervalType base_interval{device_addr, device_addr + size}; 1268 uncommitted_gpu_modified_ranges.Add(device_addr, size);
1325 common_ranges.add(base_interval);
1326 uncommitted_ranges.add(base_interval);
1327} 1269}
1328 1270
1329template <class P> 1271template <class P>
@@ -1600,9 +1542,8 @@ bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
1600template <class P> 1542template <class P>
1601void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, 1543void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
1602 std::span<const u8> inlined_buffer) { 1544 std::span<const u8> inlined_buffer) {
1603 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1545 ClearDownload(dest_address, copy_size);
1604 ClearDownload(subtract_interval); 1546 gpu_modified_ranges.Subtract(dest_address, copy_size);
1605 common_ranges.subtract(subtract_interval);
1606 1547
1607 BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); 1548 BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
1608 auto& buffer = slot_buffers[buffer_id]; 1549 auto& buffer = slot_buffers[buffer_id];
@@ -1652,12 +1593,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
1652 largest_copy = std::max(largest_copy, new_size); 1593 largest_copy = std::max(largest_copy, new_size);
1653 }; 1594 };
1654 1595
1655 const DAddr start_address = device_addr_out; 1596 gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
1656 const DAddr end_address = start_address + range_size; 1597 ClearDownload(device_addr_out, range_size);
1657 ForEachInRangeSet(common_ranges, start_address, range_size, add_download); 1598 gpu_modified_ranges.Subtract(device_addr_out, range_size);
1658 const IntervalType subtract_interval{start_address, end_address};
1659 ClearDownload(subtract_interval);
1660 common_ranges.subtract(subtract_interval);
1661 }); 1599 });
1662 if (total_size_bytes == 0) { 1600 if (total_size_bytes == 0) {
1663 return; 1601 return;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 59124458d..448516651 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -13,25 +13,15 @@
13#include <unordered_map> 13#include <unordered_map>
14#include <vector> 14#include <vector>
15 15
16#include <boost/container/small_vector.hpp>
17#define BOOST_NO_MT
18#include <boost/pool/detail/mutex.hpp>
19#undef BOOST_NO_MT
20#include <boost/icl/interval.hpp>
21#include <boost/icl/interval_base_set.hpp>
22#include <boost/icl/interval_set.hpp>
23#include <boost/icl/split_interval_map.hpp>
24#include <boost/pool/pool.hpp>
25#include <boost/pool/pool_alloc.hpp>
26#include <boost/pool/poolfwd.hpp>
27
28#include "common/common_types.h" 16#include "common/common_types.h"
29#include "common/div_ceil.h" 17#include "common/div_ceil.h"
30#include "common/literals.h" 18#include "common/literals.h"
31#include "common/lru_cache.h" 19#include "common/lru_cache.h"
32#include "common/microprofile.h" 20#include "common/microprofile.h"
21#include "common/range_sets.h"
33#include "common/scope_exit.h" 22#include "common/scope_exit.h"
34#include "common/settings.h" 23#include "common/settings.h"
24#include "common/slot_vector.h"
35#include "video_core/buffer_cache/buffer_base.h" 25#include "video_core/buffer_cache/buffer_base.h"
36#include "video_core/control/channel_state_cache.h" 26#include "video_core/control/channel_state_cache.h"
37#include "video_core/delayed_destruction_ring.h" 27#include "video_core/delayed_destruction_ring.h"
@@ -41,14 +31,8 @@
41#include "video_core/engines/maxwell_3d.h" 31#include "video_core/engines/maxwell_3d.h"
42#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
43#include "video_core/surface.h" 33#include "video_core/surface.h"
44#include "common/slot_vector.h"
45#include "video_core/texture_cache/types.h" 34#include "video_core/texture_cache/types.h"
46 35
47namespace boost {
48template <typename T>
49class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
50}
51
52namespace VideoCommon { 36namespace VideoCommon {
53 37
54MICROPROFILE_DECLARE(GPU_PrepareBuffers); 38MICROPROFILE_DECLARE(GPU_PrepareBuffers);
@@ -184,7 +168,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
184 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 168 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
185 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 169 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
186 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; 170 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
187 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
188 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; 171 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
189 172
190 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; 173 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
@@ -202,34 +185,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
202 using Async_Buffer = typename P::Async_Buffer; 185 using Async_Buffer = typename P::Async_Buffer;
203 using MemoryTracker = typename P::MemoryTracker; 186 using MemoryTracker = typename P::MemoryTracker;
204 187
205 using IntervalCompare = std::less<DAddr>;
206 using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
207 using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
208 using IntervalSet = boost::icl::interval_set<DAddr>;
209 using IntervalType = typename IntervalSet::interval_type;
210
211 template <typename Type>
212 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
213 // types
214 typedef counter_add_functor<Type> type;
215 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
216
217 // public member functions
218 void operator()(Type& current, const Type& added) const {
219 current += added;
220 if (current < base_type::identity_element()) {
221 current = base_type::identity_element();
222 }
223 }
224
225 // public static functions
226 static void version(Type&){};
227 };
228
229 using OverlapCombine = counter_add_functor<int>;
230 using OverlapSection = boost::icl::inter_section<int>;
231 using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
232
233 struct OverlapResult { 188 struct OverlapResult {
234 boost::container::small_vector<BufferId, 16> ids; 189 boost::container::small_vector<BufferId, 16> ids;
235 DAddr begin; 190 DAddr begin;
@@ -240,6 +195,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
240public: 195public:
241 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); 196 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
242 197
198 ~BufferCache();
199
243 void TickFrame(); 200 void TickFrame();
244 201
245 void WriteMemory(DAddr device_addr, u64 size); 202 void WriteMemory(DAddr device_addr, u64 size);
@@ -379,75 +336,6 @@ private:
379 } 336 }
380 } 337 }
381 338
382 template <typename Func>
383 void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
384 const DAddr start_address = device_addr;
385 const DAddr end_address = start_address + size;
386 const IntervalType search_interval{start_address, end_address};
387 auto it = current_range.lower_bound(search_interval);
388 if (it == current_range.end()) {
389 return;
390 }
391 auto end_it = current_range.upper_bound(search_interval);
392 for (; it != end_it; it++) {
393 DAddr inter_addr_end = it->upper();
394 DAddr inter_addr = it->lower();
395 if (inter_addr_end > end_address) {
396 inter_addr_end = end_address;
397 }
398 if (inter_addr < start_address) {
399 inter_addr = start_address;
400 }
401 func(inter_addr, inter_addr_end);
402 }
403 }
404
405 template <typename Func>
406 void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
407 Func&& func) {
408 const DAddr start_address = device_addr;
409 const DAddr end_address = start_address + size;
410 const IntervalType search_interval{start_address, end_address};
411 auto it = current_range.lower_bound(search_interval);
412 if (it == current_range.end()) {
413 return;
414 }
415 auto end_it = current_range.upper_bound(search_interval);
416 for (; it != end_it; it++) {
417 auto& inter = it->first;
418 DAddr inter_addr_end = inter.upper();
419 DAddr inter_addr = inter.lower();
420 if (inter_addr_end > end_address) {
421 inter_addr_end = end_address;
422 }
423 if (inter_addr < start_address) {
424 inter_addr = start_address;
425 }
426 func(inter_addr, inter_addr_end, it->second);
427 }
428 }
429
430 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
431 const IntervalType search_interval, int subtract_value) {
432 bool any_removals = false;
433 current_range.add(std::make_pair(search_interval, subtract_value));
434 do {
435 any_removals = false;
436 auto it = current_range.lower_bound(search_interval);
437 if (it == current_range.end()) {
438 return;
439 }
440 auto end_it = current_range.upper_bound(search_interval);
441 for (; it != end_it; it++) {
442 if (it->second <= 0) {
443 any_removals = true;
444 current_range.erase(it);
445 break;
446 }
447 }
448 } while (any_removals);
449 }
450
451 static bool IsRangeGranular(DAddr device_addr, size_t size) { 339 static bool IsRangeGranular(DAddr device_addr, size_t size) {
452 return (device_addr & ~Core::DEVICE_PAGEMASK) == 340 return (device_addr & ~Core::DEVICE_PAGEMASK) ==
453 ((device_addr + size) & ~Core::DEVICE_PAGEMASK); 341 ((device_addr + size) & ~Core::DEVICE_PAGEMASK);
@@ -552,7 +440,7 @@ private:
552 440
553 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; 441 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
554 442
555 void ClearDownload(IntervalType subtract_interval); 443 void ClearDownload(DAddr base_addr, u64 size);
556 444
557 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, 445 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
558 std::span<const u8> inlined_buffer); 446 std::span<const u8> inlined_buffer);
@@ -567,13 +455,12 @@ private:
567 u32 last_index_count = 0; 455 u32 last_index_count = 0;
568 456
569 MemoryTracker memory_tracker; 457 MemoryTracker memory_tracker;
570 IntervalSet uncommitted_ranges; 458 Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges;
571 IntervalSet common_ranges; 459 Common::RangeSet<DAddr> gpu_modified_ranges;
572 IntervalSet cached_ranges; 460 std::deque<Common::RangeSet<DAddr>> committed_gpu_modified_ranges;
573 std::deque<IntervalSet> committed_ranges;
574 461
575 // Async Buffers 462 // Async Buffers
576 OverlapCounter async_downloads; 463 Common::SplitRangeSet<DAddr> async_downloads;
577 std::deque<std::optional<Async_Buffer>> async_buffers; 464 std::deque<std::optional<Async_Buffer>> async_buffers;
578 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; 465 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
579 std::optional<Async_Buffer> current_buffer; 466 std::optional<Async_Buffer> current_buffer;