summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h250
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h135
-rw-r--r--src/video_core/query_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/slot_vector.h227
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h18
-rw-r--r--src/video_core/texture_cache/types.h16
12 files changed, 130 insertions, 535 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 16c905db9..55180f4b5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -274,7 +274,6 @@ add_library(video_core STATIC
274 texture_cache/image_view_info.h 274 texture_cache/image_view_info.h
275 texture_cache/render_targets.h 275 texture_cache/render_targets.h
276 texture_cache/samples_helper.h 276 texture_cache/samples_helper.h
277 texture_cache/slot_vector.h
278 texture_cache/texture_cache.cpp 277 texture_cache/texture_cache.cpp
279 texture_cache/texture_cache.h 278 texture_cache/texture_cache.h
280 texture_cache/texture_cache_base.h 279 texture_cache/texture_cache_base.h
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b4bf369d1..6d3d933c5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -7,6 +7,7 @@
7#include <memory> 7#include <memory>
8#include <numeric> 8#include <numeric>
9 9
10#include "common/range_sets.inc"
10#include "video_core/buffer_cache/buffer_cache_base.h" 11#include "video_core/buffer_cache/buffer_cache_base.h"
11#include "video_core/guest_memory.h" 12#include "video_core/guest_memory.h"
12#include "video_core/host1x/gpu_device_memory_manager.h" 13#include "video_core/host1x/gpu_device_memory_manager.h"
@@ -20,7 +21,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
20 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { 21 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
21 // Ensure the first slot is used for the null buffer 22 // Ensure the first slot is used for the null buffer
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 23 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 24 gpu_modified_ranges.Clear();
24 inline_buffer_id = NULL_BUFFER_ID; 25 inline_buffer_id = NULL_BUFFER_ID;
25 26
26 if (!runtime.CanReportMemoryUsage()) { 27 if (!runtime.CanReportMemoryUsage()) {
@@ -44,6 +45,9 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
44} 45}
45 46
46template <class P> 47template <class P>
48BufferCache<P>::~BufferCache() = default;
49
50template <class P>
47void BufferCache<P>::RunGarbageCollector() { 51void BufferCache<P>::RunGarbageCollector() {
48 const bool aggressive_gc = total_used_memory >= critical_memory; 52 const bool aggressive_gc = total_used_memory >= critical_memory;
49 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; 53 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
@@ -96,20 +100,17 @@ void BufferCache<P>::TickFrame() {
96 ++frame_tick; 100 ++frame_tick;
97 delayed_destruction_ring.Tick(); 101 delayed_destruction_ring.Tick();
98 102
99 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 103 for (auto& buffer : async_buffers_death_ring) {
100 for (auto& buffer : async_buffers_death_ring) { 104 runtime.FreeDeferredStagingBuffer(buffer);
101 runtime.FreeDeferredStagingBuffer(buffer);
102 }
103 async_buffers_death_ring.clear();
104 } 105 }
106 async_buffers_death_ring.clear();
105} 107}
106 108
107template <class P> 109template <class P>
108void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { 110void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
109 if (memory_tracker.IsRegionGpuModified(device_addr, size)) { 111 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
110 const IntervalType subtract_interval{device_addr, device_addr + size}; 112 ClearDownload(device_addr, size);
111 ClearDownload(subtract_interval); 113 gpu_modified_ranges.Subtract(device_addr, size);
112 common_ranges.subtract(subtract_interval);
113 } 114 }
114 memory_tracker.MarkRegionAsCpuModified(device_addr, size); 115 memory_tracker.MarkRegionAsCpuModified(device_addr, size);
115} 116}
@@ -174,11 +175,11 @@ void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
174} 175}
175 176
176template <class P> 177template <class P>
177void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 178void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) {
178 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); 179 async_downloads.DeleteAll(device_addr, size);
179 uncommitted_ranges.subtract(subtract_interval); 180 uncommitted_gpu_modified_ranges.Subtract(device_addr, size);
180 for (auto& interval_set : committed_ranges) { 181 for (auto& interval_set : committed_gpu_modified_ranges) {
181 interval_set.subtract(subtract_interval); 182 interval_set.Subtract(device_addr, size);
182 } 183 }
183} 184}
184 185
@@ -195,8 +196,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
195 return false; 196 return false;
196 } 197 }
197 198
198 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 199 ClearDownload(*cpu_dest_address, amount);
199 ClearDownload(subtract_interval);
200 200
201 BufferId buffer_a; 201 BufferId buffer_a;
202 BufferId buffer_b; 202 BufferId buffer_b;
@@ -215,21 +215,20 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
215 .size = amount, 215 .size = amount,
216 }}; 216 }};
217 217
218 boost::container::small_vector<IntervalType, 4> tmp_intervals; 218 boost::container::small_vector<std::pair<DAddr, size_t>, 4> tmp_intervals;
219 auto mirror = [&](DAddr base_address, DAddr base_address_end) { 219 auto mirror = [&](DAddr base_address, DAddr base_address_end) {
220 const u64 size = base_address_end - base_address; 220 const u64 size = base_address_end - base_address;
221 const DAddr diff = base_address - *cpu_src_address; 221 const DAddr diff = base_address - *cpu_src_address;
222 const DAddr new_base_address = *cpu_dest_address + diff; 222 const DAddr new_base_address = *cpu_dest_address + diff;
223 const IntervalType add_interval{new_base_address, new_base_address + size}; 223 tmp_intervals.push_back({new_base_address, size});
224 tmp_intervals.push_back(add_interval); 224 uncommitted_gpu_modified_ranges.Add(new_base_address, size);
225 uncommitted_ranges.add(add_interval);
226 }; 225 };
227 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); 226 gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror);
228 // This subtraction in this order is important for overlapping copies. 227 // This subtraction in this order is important for overlapping copies.
229 common_ranges.subtract(subtract_interval); 228 gpu_modified_ranges.Subtract(*cpu_dest_address, amount);
230 const bool has_new_downloads = tmp_intervals.size() != 0; 229 const bool has_new_downloads = tmp_intervals.size() != 0;
231 for (const IntervalType& add_interval : tmp_intervals) { 230 for (const auto& pair : tmp_intervals) {
232 common_ranges.add(add_interval); 231 gpu_modified_ranges.Add(pair.first, pair.second);
233 } 232 }
234 const auto& copy = copies[0]; 233 const auto& copy = copies[0];
235 src_buffer.MarkUsage(copy.src_offset, copy.size); 234 src_buffer.MarkUsage(copy.src_offset, copy.size);
@@ -257,9 +256,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
257 } 256 }
258 257
259 const size_t size = amount * sizeof(u32); 258 const size_t size = amount * sizeof(u32);
260 const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; 259 ClearDownload(*cpu_dst_address, size);
261 ClearDownload(subtract_interval); 260 gpu_modified_ranges.Subtract(*cpu_dst_address, size);
262 common_ranges.subtract(subtract_interval);
263 261
264 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); 262 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
265 Buffer& dest_buffer = slot_buffers[buffer]; 263 Buffer& dest_buffer = slot_buffers[buffer];
@@ -300,11 +298,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
300 MarkWrittenBuffer(buffer_id, device_addr, size); 298 MarkWrittenBuffer(buffer_id, device_addr, size);
301 break; 299 break;
302 case ObtainBufferOperation::DiscardWrite: { 300 case ObtainBufferOperation::DiscardWrite: {
303 DAddr device_addr_start = Common::AlignDown(device_addr, 64); 301 const DAddr device_addr_start = Common::AlignDown(device_addr, 64);
304 DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); 302 const DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
305 IntervalType interval{device_addr_start, device_addr_end}; 303 const size_t new_size = device_addr_end - device_addr_start;
306 ClearDownload(interval); 304 ClearDownload(device_addr_start, new_size);
307 common_ranges.subtract(interval); 305 gpu_modified_ranges.Subtract(device_addr_start, new_size);
308 break; 306 break;
309 } 307 }
310 default: 308 default:
@@ -504,46 +502,40 @@ void BufferCache<P>::FlushCachedWrites() {
504 502
505template <class P> 503template <class P>
506bool BufferCache<P>::HasUncommittedFlushes() const noexcept { 504bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
507 return !uncommitted_ranges.empty() || !committed_ranges.empty(); 505 return !uncommitted_gpu_modified_ranges.Empty() || !committed_gpu_modified_ranges.empty();
508} 506}
509 507
510template <class P> 508template <class P>
511void BufferCache<P>::AccumulateFlushes() { 509void BufferCache<P>::AccumulateFlushes() {
512 if (uncommitted_ranges.empty()) { 510 if (uncommitted_gpu_modified_ranges.Empty()) {
513 return; 511 return;
514 } 512 }
515 committed_ranges.emplace_back(std::move(uncommitted_ranges)); 513 committed_gpu_modified_ranges.emplace_back(std::move(uncommitted_gpu_modified_ranges));
516} 514}
517 515
518template <class P> 516template <class P>
519bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 517bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
520 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 518 return (!async_buffers.empty() && async_buffers.front().has_value());
521 return (!async_buffers.empty() && async_buffers.front().has_value());
522 } else {
523 return false;
524 }
525} 519}
526 520
527template <class P> 521template <class P>
528void BufferCache<P>::CommitAsyncFlushesHigh() { 522void BufferCache<P>::CommitAsyncFlushesHigh() {
529 AccumulateFlushes(); 523 AccumulateFlushes();
530 524
531 if (committed_ranges.empty()) { 525 if (committed_gpu_modified_ranges.empty()) {
532 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 526 async_buffers.emplace_back(std::optional<Async_Buffer>{});
533 async_buffers.emplace_back(std::optional<Async_Buffer>{});
534 }
535 return; 527 return;
536 } 528 }
537 MICROPROFILE_SCOPE(GPU_DownloadMemory); 529 MICROPROFILE_SCOPE(GPU_DownloadMemory);
538 530
539 auto it = committed_ranges.begin(); 531 auto it = committed_gpu_modified_ranges.begin();
540 while (it != committed_ranges.end()) { 532 while (it != committed_gpu_modified_ranges.end()) {
541 auto& current_intervals = *it; 533 auto& current_intervals = *it;
542 auto next_it = std::next(it); 534 auto next_it = std::next(it);
543 while (next_it != committed_ranges.end()) { 535 while (next_it != committed_gpu_modified_ranges.end()) {
544 for (auto& interval : *next_it) { 536 next_it->ForEach([&current_intervals](DAddr start, DAddr end) {
545 current_intervals.subtract(interval); 537 current_intervals.Subtract(start, end - start);
546 } 538 });
547 next_it++; 539 next_it++;
548 } 540 }
549 it++; 541 it++;
@@ -552,10 +544,10 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
552 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; 544 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
553 u64 total_size_bytes = 0; 545 u64 total_size_bytes = 0;
554 u64 largest_copy = 0; 546 u64 largest_copy = 0;
555 for (const IntervalSet& intervals : committed_ranges) { 547 for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) {
556 for (auto& interval : intervals) { 548 range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) {
557 const std::size_t size = interval.upper() - interval.lower(); 549 const std::size_t size = interval_upper - interval_lower;
558 const DAddr device_addr = interval.lower(); 550 const DAddr device_addr = interval_lower;
559 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 551 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
560 const DAddr buffer_start = buffer.CpuAddr(); 552 const DAddr buffer_start = buffer.CpuAddr();
561 const DAddr buffer_end = buffer_start + buffer.SizeBytes(); 553 const DAddr buffer_end = buffer_start + buffer.SizeBytes();
@@ -583,77 +575,35 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
583 largest_copy = std::max(largest_copy, new_size); 575 largest_copy = std::max(largest_copy, new_size);
584 }; 576 };
585 577
586 ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); 578 gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
579 add_download);
587 }); 580 });
588 }); 581 });
589 } 582 });
590 } 583 }
591 committed_ranges.clear(); 584 committed_gpu_modified_ranges.clear();
592 if (downloads.empty()) { 585 if (downloads.empty()) {
593 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 586 async_buffers.emplace_back(std::optional<Async_Buffer>{});
594 async_buffers.emplace_back(std::optional<Async_Buffer>{});
595 }
596 return; 587 return;
597 } 588 }
598 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 589 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
599 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); 590 boost::container::small_vector<BufferCopy, 4> normalized_copies;
600 boost::container::small_vector<BufferCopy, 4> normalized_copies; 591 runtime.PreCopyBarrier();
601 IntervalSet new_async_range{}; 592 for (auto& [copy, buffer_id] : downloads) {
602 runtime.PreCopyBarrier(); 593 copy.dst_offset += download_staging.offset;
603 for (auto& [copy, buffer_id] : downloads) { 594 const std::array copies{copy};
604 copy.dst_offset += download_staging.offset; 595 BufferCopy second_copy{copy};
605 const std::array copies{copy}; 596 Buffer& buffer = slot_buffers[buffer_id];
606 BufferCopy second_copy{copy}; 597 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
607 Buffer& buffer = slot_buffers[buffer_id]; 598 const DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
608 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 599 async_downloads.Add(orig_device_addr, copy.size);
609 DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); 600 buffer.MarkUsage(copy.src_offset, copy.size);
610 const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; 601 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
611 async_downloads += std::make_pair(base_interval, 1); 602 normalized_copies.push_back(second_copy);
612 buffer.MarkUsage(copy.src_offset, copy.size);
613 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
614 normalized_copies.push_back(second_copy);
615 }
616 runtime.PostCopyBarrier();
617 pending_downloads.emplace_back(std::move(normalized_copies));
618 async_buffers.emplace_back(download_staging);
619 } else {
620 if (!Settings::IsGPULevelHigh()) {
621 committed_ranges.clear();
622 uncommitted_ranges.clear();
623 } else {
624 if constexpr (USE_MEMORY_MAPS) {
625 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
626 runtime.PreCopyBarrier();
627 for (auto& [copy, buffer_id] : downloads) {
628 // Have in mind the staging buffer offset for the copy
629 copy.dst_offset += download_staging.offset;
630 const std::array copies{copy};
631 Buffer& buffer = slot_buffers[buffer_id];
632 buffer.MarkUsage(copy.src_offset, copy.size);
633 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
634 }
635 runtime.PostCopyBarrier();
636 runtime.Finish();
637 for (const auto& [copy, buffer_id] : downloads) {
638 const Buffer& buffer = slot_buffers[buffer_id];
639 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
640 // Undo the modified offset
641 const u64 dst_offset = copy.dst_offset - download_staging.offset;
642 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
643 device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
644 }
645 } else {
646 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
647 for (const auto& [copy, buffer_id] : downloads) {
648 Buffer& buffer = slot_buffers[buffer_id];
649 buffer.ImmediateDownload(copy.src_offset,
650 immediate_buffer.subspan(0, copy.size));
651 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
652 device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
653 }
654 }
655 }
656 } 603 }
604 runtime.PostCopyBarrier();
605 pending_downloads.emplace_back(std::move(normalized_copies));
606 async_buffers.emplace_back(download_staging);
657} 607}
658 608
659template <class P> 609template <class P>
@@ -676,37 +626,31 @@ void BufferCache<P>::PopAsyncBuffers() {
676 async_buffers.pop_front(); 626 async_buffers.pop_front();
677 return; 627 return;
678 } 628 }
679 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 629 auto& downloads = pending_downloads.front();
680 auto& downloads = pending_downloads.front(); 630 auto& async_buffer = async_buffers.front();
681 auto& async_buffer = async_buffers.front(); 631 u8* base = async_buffer->mapped_span.data();
682 u8* base = async_buffer->mapped_span.data(); 632 const size_t base_offset = async_buffer->offset;
683 const size_t base_offset = async_buffer->offset; 633 for (const auto& copy : downloads) {
684 for (const auto& copy : downloads) { 634 const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
685 const DAddr device_addr = static_cast<DAddr>(copy.src_offset); 635 const u64 dst_offset = copy.dst_offset - base_offset;
686 const u64 dst_offset = copy.dst_offset - base_offset; 636 const u8* read_mapped_memory = base + dst_offset;
687 const u8* read_mapped_memory = base + dst_offset; 637 async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) {
688 ForEachInOverlapCounter( 638 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
689 async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { 639 end - start);
690 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], 640 });
691 end - start); 641 async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) {
692 if (count == 1) { 642 gpu_modified_ranges.Subtract(start, end - start);
693 const IntervalType base_interval{start, end}; 643 });
694 common_ranges.subtract(base_interval);
695 }
696 });
697 const IntervalType subtract_interval{device_addr, device_addr + copy.size};
698 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
699 }
700 async_buffers_death_ring.emplace_back(*async_buffer);
701 async_buffers.pop_front();
702 pending_downloads.pop_front();
703 } 644 }
645 async_buffers_death_ring.emplace_back(*async_buffer);
646 async_buffers.pop_front();
647 pending_downloads.pop_front();
704} 648}
705 649
706template <class P> 650template <class P>
707bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { 651bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
708 bool is_dirty = false; 652 bool is_dirty = false;
709 ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); 653 gpu_modified_ranges.ForEachInRange(addr, size, [&](DAddr, DAddr) { is_dirty = true; });
710 return is_dirty; 654 return is_dirty;
711} 655}
712 656
@@ -1320,10 +1264,8 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1320template <class P> 1264template <class P>
1321void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { 1265void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
1322 memory_tracker.MarkRegionAsGpuModified(device_addr, size); 1266 memory_tracker.MarkRegionAsGpuModified(device_addr, size);
1323 1267 gpu_modified_ranges.Add(device_addr, size);
1324 const IntervalType base_interval{device_addr, device_addr + size}; 1268 uncommitted_gpu_modified_ranges.Add(device_addr, size);
1325 common_ranges.add(base_interval);
1326 uncommitted_ranges.add(base_interval);
1327} 1269}
1328 1270
1329template <class P> 1271template <class P>
@@ -1600,9 +1542,8 @@ bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
1600template <class P> 1542template <class P>
1601void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, 1543void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
1602 std::span<const u8> inlined_buffer) { 1544 std::span<const u8> inlined_buffer) {
1603 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1545 ClearDownload(dest_address, copy_size);
1604 ClearDownload(subtract_interval); 1546 gpu_modified_ranges.Subtract(dest_address, copy_size);
1605 common_ranges.subtract(subtract_interval);
1606 1547
1607 BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); 1548 BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
1608 auto& buffer = slot_buffers[buffer_id]; 1549 auto& buffer = slot_buffers[buffer_id];
@@ -1652,12 +1593,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
1652 largest_copy = std::max(largest_copy, new_size); 1593 largest_copy = std::max(largest_copy, new_size);
1653 }; 1594 };
1654 1595
1655 const DAddr start_address = device_addr_out; 1596 gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
1656 const DAddr end_address = start_address + range_size; 1597 ClearDownload(device_addr_out, range_size);
1657 ForEachInRangeSet(common_ranges, start_address, range_size, add_download); 1598 gpu_modified_ranges.Subtract(device_addr_out, range_size);
1658 const IntervalType subtract_interval{start_address, end_address};
1659 ClearDownload(subtract_interval);
1660 common_ranges.subtract(subtract_interval);
1661 }); 1599 });
1662 if (total_size_bytes == 0) { 1600 if (total_size_bytes == 0) {
1663 return; 1601 return;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 80dbb81e7..240e9f015 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -13,25 +13,15 @@
13#include <unordered_map> 13#include <unordered_map>
14#include <vector> 14#include <vector>
15 15
16#include <boost/container/small_vector.hpp>
17#define BOOST_NO_MT
18#include <boost/pool/detail/mutex.hpp>
19#undef BOOST_NO_MT
20#include <boost/icl/interval.hpp>
21#include <boost/icl/interval_base_set.hpp>
22#include <boost/icl/interval_set.hpp>
23#include <boost/icl/split_interval_map.hpp>
24#include <boost/pool/pool.hpp>
25#include <boost/pool/pool_alloc.hpp>
26#include <boost/pool/poolfwd.hpp>
27
28#include "common/common_types.h" 16#include "common/common_types.h"
29#include "common/div_ceil.h" 17#include "common/div_ceil.h"
30#include "common/literals.h" 18#include "common/literals.h"
31#include "common/lru_cache.h" 19#include "common/lru_cache.h"
32#include "common/microprofile.h" 20#include "common/microprofile.h"
21#include "common/range_sets.h"
33#include "common/scope_exit.h" 22#include "common/scope_exit.h"
34#include "common/settings.h" 23#include "common/settings.h"
24#include "common/slot_vector.h"
35#include "video_core/buffer_cache/buffer_base.h" 25#include "video_core/buffer_cache/buffer_base.h"
36#include "video_core/control/channel_state_cache.h" 26#include "video_core/control/channel_state_cache.h"
37#include "video_core/delayed_destruction_ring.h" 27#include "video_core/delayed_destruction_ring.h"
@@ -41,21 +31,15 @@
41#include "video_core/engines/maxwell_3d.h" 31#include "video_core/engines/maxwell_3d.h"
42#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
43#include "video_core/surface.h" 33#include "video_core/surface.h"
44#include "video_core/texture_cache/slot_vector.h"
45#include "video_core/texture_cache/types.h" 34#include "video_core/texture_cache/types.h"
46 35
47namespace boost {
48template <typename T>
49class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
50}
51
52namespace VideoCommon { 36namespace VideoCommon {
53 37
54MICROPROFILE_DECLARE(GPU_PrepareBuffers); 38MICROPROFILE_DECLARE(GPU_PrepareBuffers);
55MICROPROFILE_DECLARE(GPU_BindUploadBuffers); 39MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
56MICROPROFILE_DECLARE(GPU_DownloadMemory); 40MICROPROFILE_DECLARE(GPU_DownloadMemory);
57 41
58using BufferId = SlotId; 42using BufferId = Common::SlotId;
59 43
60using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
61using namespace Common::Literals; 45using namespace Common::Literals;
@@ -184,7 +168,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
184 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 168 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
185 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 169 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
186 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; 170 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
187 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
188 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; 171 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
189 172
190 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; 173 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
@@ -202,34 +185,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
202 using Async_Buffer = typename P::Async_Buffer; 185 using Async_Buffer = typename P::Async_Buffer;
203 using MemoryTracker = typename P::MemoryTracker; 186 using MemoryTracker = typename P::MemoryTracker;
204 187
205 using IntervalCompare = std::less<DAddr>;
206 using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
207 using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
208 using IntervalSet = boost::icl::interval_set<DAddr>;
209 using IntervalType = typename IntervalSet::interval_type;
210
211 template <typename Type>
212 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
213 // types
214 typedef counter_add_functor<Type> type;
215 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
216
217 // public member functions
218 void operator()(Type& current, const Type& added) const {
219 current += added;
220 if (current < base_type::identity_element()) {
221 current = base_type::identity_element();
222 }
223 }
224
225 // public static functions
226 static void version(Type&){};
227 };
228
229 using OverlapCombine = counter_add_functor<int>;
230 using OverlapSection = boost::icl::inter_section<int>;
231 using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
232
233 struct OverlapResult { 188 struct OverlapResult {
234 boost::container::small_vector<BufferId, 16> ids; 189 boost::container::small_vector<BufferId, 16> ids;
235 DAddr begin; 190 DAddr begin;
@@ -240,6 +195,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
240public: 195public:
241 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); 196 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
242 197
198 ~BufferCache();
199
243 void TickFrame(); 200 void TickFrame();
244 201
245 void WriteMemory(DAddr device_addr, u64 size); 202 void WriteMemory(DAddr device_addr, u64 size);
@@ -379,75 +336,6 @@ private:
379 } 336 }
380 } 337 }
381 338
382 template <typename Func>
383 void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
384 const DAddr start_address = device_addr;
385 const DAddr end_address = start_address + size;
386 const IntervalType search_interval{start_address, end_address};
387 auto it = current_range.lower_bound(search_interval);
388 if (it == current_range.end()) {
389 return;
390 }
391 auto end_it = current_range.upper_bound(search_interval);
392 for (; it != end_it; it++) {
393 DAddr inter_addr_end = it->upper();
394 DAddr inter_addr = it->lower();
395 if (inter_addr_end > end_address) {
396 inter_addr_end = end_address;
397 }
398 if (inter_addr < start_address) {
399 inter_addr = start_address;
400 }
401 func(inter_addr, inter_addr_end);
402 }
403 }
404
405 template <typename Func>
406 void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
407 Func&& func) {
408 const DAddr start_address = device_addr;
409 const DAddr end_address = start_address + size;
410 const IntervalType search_interval{start_address, end_address};
411 auto it = current_range.lower_bound(search_interval);
412 if (it == current_range.end()) {
413 return;
414 }
415 auto end_it = current_range.upper_bound(search_interval);
416 for (; it != end_it; it++) {
417 auto& inter = it->first;
418 DAddr inter_addr_end = inter.upper();
419 DAddr inter_addr = inter.lower();
420 if (inter_addr_end > end_address) {
421 inter_addr_end = end_address;
422 }
423 if (inter_addr < start_address) {
424 inter_addr = start_address;
425 }
426 func(inter_addr, inter_addr_end, it->second);
427 }
428 }
429
430 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
431 const IntervalType search_interval, int subtract_value) {
432 bool any_removals = false;
433 current_range.add(std::make_pair(search_interval, subtract_value));
434 do {
435 any_removals = false;
436 auto it = current_range.lower_bound(search_interval);
437 if (it == current_range.end()) {
438 return;
439 }
440 auto end_it = current_range.upper_bound(search_interval);
441 for (; it != end_it; it++) {
442 if (it->second <= 0) {
443 any_removals = true;
444 current_range.erase(it);
445 break;
446 }
447 }
448 } while (any_removals);
449 }
450
451 static bool IsRangeGranular(DAddr device_addr, size_t size) { 339 static bool IsRangeGranular(DAddr device_addr, size_t size) {
452 return (device_addr & ~Core::DEVICE_PAGEMASK) == 340 return (device_addr & ~Core::DEVICE_PAGEMASK) ==
453 ((device_addr + size) & ~Core::DEVICE_PAGEMASK); 341 ((device_addr + size) & ~Core::DEVICE_PAGEMASK);
@@ -552,14 +440,14 @@ private:
552 440
553 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; 441 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
554 442
555 void ClearDownload(IntervalType subtract_interval); 443 void ClearDownload(DAddr base_addr, u64 size);
556 444
557 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, 445 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
558 std::span<const u8> inlined_buffer); 446 std::span<const u8> inlined_buffer);
559 447
560 Tegra::MaxwellDeviceMemoryManager& device_memory; 448 Tegra::MaxwellDeviceMemoryManager& device_memory;
561 449
562 SlotVector<Buffer> slot_buffers; 450 Common::SlotVector<Buffer> slot_buffers;
563 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 451 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
564 452
565 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; 453 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
@@ -567,13 +455,12 @@ private:
567 u32 last_index_count = 0; 455 u32 last_index_count = 0;
568 456
569 MemoryTracker memory_tracker; 457 MemoryTracker memory_tracker;
570 IntervalSet uncommitted_ranges; 458 Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges;
571 IntervalSet common_ranges; 459 Common::RangeSet<DAddr> gpu_modified_ranges;
572 IntervalSet cached_ranges; 460 std::deque<Common::RangeSet<DAddr>> committed_gpu_modified_ranges;
573 std::deque<IntervalSet> committed_ranges;
574 461
575 // Async Buffers 462 // Async Buffers
576 OverlapCounter async_downloads; 463 Common::OverlapRangeSet<DAddr> async_downloads;
577 std::deque<std::optional<Async_Buffer>> async_buffers; 464 std::deque<std::optional<Async_Buffer>> async_buffers;
578 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; 465 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
579 std::optional<Async_Buffer> current_buffer; 466 std::optional<Async_Buffer> current_buffer;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 4861b123a..e1019f228 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -18,12 +18,12 @@
18 18
19#include "common/assert.h" 19#include "common/assert.h"
20#include "common/settings.h" 20#include "common/settings.h"
21#include "common/slot_vector.h"
21#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
22#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
23#include "video_core/host1x/gpu_device_memory_manager.h" 24#include "video_core/host1x/gpu_device_memory_manager.h"
24#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
25#include "video_core/rasterizer_interface.h" 26#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h"
27 27
28namespace VideoCore { 28namespace VideoCore {
29enum class QueryType { 29enum class QueryType {
@@ -37,7 +37,7 @@ constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
37 37
38namespace VideoCommon { 38namespace VideoCommon {
39 39
40using AsyncJobId = SlotId; 40using AsyncJobId = Common::SlotId;
41 41
42static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; 42static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
43 43
@@ -341,7 +341,7 @@ private:
341 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; 341 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
342 static constexpr unsigned YUZU_PAGEBITS = 12; 342 static constexpr unsigned YUZU_PAGEBITS = 12;
343 343
344 SlotVector<AsyncJob> slot_async_jobs; 344 Common::SlotVector<AsyncJob> slot_async_jobs;
345 345
346 VideoCore::RasterizerInterface& rasterizer; 346 VideoCore::RasterizerInterface& rasterizer;
347 Tegra::MaxwellDeviceMemoryManager& device_memory; 347 Tegra::MaxwellDeviceMemoryManager& device_memory;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index af34c272b..fd471e979 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -90,7 +90,7 @@ public:
90 void PostCopyBarrier(); 90 void PostCopyBarrier();
91 void Finish(); 91 void Finish();
92 92
93 void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {} 93 void TickFrame(Common::SlotVector<Buffer>&) noexcept {}
94 94
95 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); 95 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
96 96
@@ -251,7 +251,6 @@ struct BufferCacheParams {
251 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 251 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
252 static constexpr bool USE_MEMORY_MAPS = true; 252 static constexpr bool USE_MEMORY_MAPS = true;
253 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; 253 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
254 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
255 254
256 // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads 255 // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
257 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; 256 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e54edcc2..d4165d8e4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -30,13 +30,13 @@ class Image;
30class ImageView; 30class ImageView;
31class Sampler; 31class Sampler;
32 32
33using Common::SlotVector;
33using VideoCommon::ImageId; 34using VideoCommon::ImageId;
34using VideoCommon::ImageViewId; 35using VideoCommon::ImageViewId;
35using VideoCommon::ImageViewType; 36using VideoCommon::ImageViewType;
36using VideoCommon::NUM_RT; 37using VideoCommon::NUM_RT;
37using VideoCommon::Region2D; 38using VideoCommon::Region2D;
38using VideoCommon::RenderTargets; 39using VideoCommon::RenderTargets;
39using VideoCommon::SlotVector;
40 40
41struct FormatProperties { 41struct FormatProperties {
42 GLenum compatibility_class; 42 GLenum compatibility_class;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 31001d142..e5e1e3ab6 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -368,7 +368,7 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
368 return static_cast<u32>(device.GetStorageBufferAlignment()); 368 return static_cast<u32>(device.GetStorageBufferAlignment());
369} 369}
370 370
371void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept { 371void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept {
372 for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { 372 for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
373 it->ResetUsageTracking(); 373 it->ResetUsageTracking();
374 } 374 }
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index e273f4988..efe960258 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -81,7 +81,7 @@ public:
81 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 81 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
82 DescriptorPool& descriptor_pool); 82 DescriptorPool& descriptor_pool);
83 83
84 void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept; 84 void TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept;
85 85
86 void Finish(); 86 void Finish();
87 87
@@ -181,7 +181,6 @@ struct BufferCacheParams {
181 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 181 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
182 static constexpr bool USE_MEMORY_MAPS = true; 182 static constexpr bool USE_MEMORY_MAPS = true;
183 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; 183 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
184 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
185 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; 184 static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
186}; 185};
187 186
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0dbde65d6..aaeb5ef93 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -20,11 +20,11 @@ struct ResolutionScalingInfo;
20 20
21namespace Vulkan { 21namespace Vulkan {
22 22
23using Common::SlotVector;
23using VideoCommon::ImageId; 24using VideoCommon::ImageId;
24using VideoCommon::NUM_RT; 25using VideoCommon::NUM_RT;
25using VideoCommon::Region2D; 26using VideoCommon::Region2D;
26using VideoCommon::RenderTargets; 27using VideoCommon::RenderTargets;
27using VideoCommon::SlotVector;
28using VideoCore::Surface::PixelFormat; 28using VideoCore::Surface::PixelFormat;
29 29
30class BlitImageHelper; 30class BlitImageHelper;
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
deleted file mode 100644
index 3ffa2a661..000000000
--- a/src/video_core/texture_cache/slot_vector.h
+++ /dev/null
@@ -1,227 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <numeric>
9#include <type_traits>
10#include <utility>
11#include <vector>
12
13#include "common/assert.h"
14#include "common/common_types.h"
15#include "common/polyfill_ranges.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32 requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
33class SlotVector {
34public:
35 class Iterator {
36 friend SlotVector<T>;
37
38 public:
39 constexpr Iterator() = default;
40
41 Iterator& operator++() noexcept {
42 const u64* const bitset = slot_vector->stored_bitset.data();
43 const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
44 if (id.index < size) {
45 do {
46 ++id.index;
47 } while (id.index < size && !IsValid(bitset));
48 if (id.index == size) {
49 id.index = SlotId::INVALID_INDEX;
50 }
51 }
52 return *this;
53 }
54
55 Iterator operator++(int) noexcept {
56 const Iterator copy{*this};
57 ++*this;
58 return copy;
59 }
60
61 bool operator==(const Iterator& other) const noexcept {
62 return id.index == other.id.index;
63 }
64
65 bool operator!=(const Iterator& other) const noexcept {
66 return id.index != other.id.index;
67 }
68
69 std::pair<SlotId, T*> operator*() const noexcept {
70 return {id, std::addressof((*slot_vector)[id])};
71 }
72
73 T* operator->() const noexcept {
74 return std::addressof((*slot_vector)[id]);
75 }
76
77 private:
78 Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
79 : slot_vector{slot_vector_}, id{id_} {}
80
81 bool IsValid(const u64* bitset) const noexcept {
82 return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
83 }
84
85 SlotVector<T>* slot_vector;
86 SlotId id;
87 };
88
89 ~SlotVector() noexcept {
90 size_t index = 0;
91 for (u64 bits : stored_bitset) {
92 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
93 if ((bits & 1) != 0) {
94 values[index + bit].object.~T();
95 }
96 }
97 index += 64;
98 }
99 delete[] values;
100 }
101
102 [[nodiscard]] T& operator[](SlotId id) noexcept {
103 ValidateIndex(id);
104 return values[id.index].object;
105 }
106
107 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
108 ValidateIndex(id);
109 return values[id.index].object;
110 }
111
112 template <typename... Args>
113 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
114 const u32 index = FreeValueIndex();
115 new (&values[index].object) T(std::forward<Args>(args)...);
116 SetStorageBit(index);
117
118 return SlotId{index};
119 }
120
121 void erase(SlotId id) noexcept {
122 values[id.index].object.~T();
123 free_list.push_back(id.index);
124 ResetStorageBit(id.index);
125 }
126
127 [[nodiscard]] Iterator begin() noexcept {
128 const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
129 if (it == stored_bitset.end()) {
130 return end();
131 }
132 const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
133 const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
134 return Iterator(this, first_id);
135 }
136
137 [[nodiscard]] Iterator end() noexcept {
138 return Iterator(this, SlotId{SlotId::INVALID_INDEX});
139 }
140
141 [[nodiscard]] size_t size() const noexcept {
142 return values_capacity - free_list.size();
143 }
144
145private:
146 struct NonTrivialDummy {
147 NonTrivialDummy() noexcept {}
148 };
149
150 union Entry {
151 Entry() noexcept : dummy{} {}
152 ~Entry() noexcept {}
153
154 NonTrivialDummy dummy;
155 T object;
156 };
157
158 void SetStorageBit(u32 index) noexcept {
159 stored_bitset[index / 64] |= u64(1) << (index % 64);
160 }
161
162 void ResetStorageBit(u32 index) noexcept {
163 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
164 }
165
166 bool ReadStorageBit(u32 index) noexcept {
167 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
168 }
169
170 void ValidateIndex(SlotId id) const noexcept {
171 DEBUG_ASSERT(id);
172 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
173 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
174 }
175
176 [[nodiscard]] u32 FreeValueIndex() noexcept {
177 if (free_list.empty()) {
178 Reserve(values_capacity ? (values_capacity << 1) : 1);
179 }
180 const u32 free_index = free_list.back();
181 free_list.pop_back();
182 return free_index;
183 }
184
185 void Reserve(size_t new_capacity) noexcept {
186 Entry* const new_values = new Entry[new_capacity];
187 size_t index = 0;
188 for (u64 bits : stored_bitset) {
189 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
190 const size_t i = index + bit;
191 if ((bits & 1) == 0) {
192 continue;
193 }
194 T& old_value = values[i].object;
195 new (&new_values[i].object) T(std::move(old_value));
196 old_value.~T();
197 }
198 index += 64;
199 }
200
201 stored_bitset.resize((new_capacity + 63) / 64);
202
203 const size_t old_free_size = free_list.size();
204 free_list.resize(old_free_size + (new_capacity - values_capacity));
205 std::iota(free_list.begin() + old_free_size, free_list.end(),
206 static_cast<u32>(values_capacity));
207
208 delete[] values;
209 values = new_values;
210 values_capacity = new_capacity;
211 }
212
213 Entry* values = nullptr;
214 size_t values_capacity = 0;
215
216 std::vector<u64> stored_bitset;
217 std::vector<u32> free_list;
218};
219
220} // namespace VideoCommon
221
222template <>
223struct std::hash<VideoCommon::SlotId> {
224 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
225 return std::hash<u32>{}(id.index);
226 }
227};
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index e7b910121..da98a634b 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -21,6 +21,7 @@
21#include "common/lru_cache.h" 21#include "common/lru_cache.h"
22#include "common/polyfill_ranges.h" 22#include "common/polyfill_ranges.h"
23#include "common/scratch_buffer.h" 23#include "common/scratch_buffer.h"
24#include "common/slot_vector.h"
24#include "common/thread_worker.h" 25#include "common/thread_worker.h"
25#include "video_core/compatible_formats.h" 26#include "video_core/compatible_formats.h"
26#include "video_core/control/channel_state_cache.h" 27#include "video_core/control/channel_state_cache.h"
@@ -32,7 +33,6 @@
32#include "video_core/texture_cache/image_info.h" 33#include "video_core/texture_cache/image_info.h"
33#include "video_core/texture_cache/image_view_base.h" 34#include "video_core/texture_cache/image_view_base.h"
34#include "video_core/texture_cache/render_targets.h" 35#include "video_core/texture_cache/render_targets.h"
35#include "video_core/texture_cache/slot_vector.h"
36#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
37#include "video_core/textures/texture.h" 37#include "video_core/textures/texture.h"
38 38
@@ -451,16 +451,16 @@ private:
451 struct PendingDownload { 451 struct PendingDownload {
452 bool is_swizzle; 452 bool is_swizzle;
453 size_t async_buffer_id; 453 size_t async_buffer_id;
454 SlotId object_id; 454 Common::SlotId object_id;
455 }; 455 };
456 456
457 SlotVector<Image> slot_images; 457 Common::SlotVector<Image> slot_images;
458 SlotVector<ImageMapView> slot_map_views; 458 Common::SlotVector<ImageMapView> slot_map_views;
459 SlotVector<ImageView> slot_image_views; 459 Common::SlotVector<ImageView> slot_image_views;
460 SlotVector<ImageAlloc> slot_image_allocs; 460 Common::SlotVector<ImageAlloc> slot_image_allocs;
461 SlotVector<Sampler> slot_samplers; 461 Common::SlotVector<Sampler> slot_samplers;
462 SlotVector<Framebuffer> slot_framebuffers; 462 Common::SlotVector<Framebuffer> slot_framebuffers;
463 SlotVector<BufferDownload> slot_buffer_downloads; 463 Common::SlotVector<BufferDownload> slot_buffer_downloads;
464 464
465 // TODO: This data structure is not optimal and it should be reworked 465 // TODO: This data structure is not optimal and it should be reworked
466 466
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 0453456b4..07c304386 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -5,21 +5,21 @@
5 5
6#include "common/common_funcs.h" 6#include "common/common_funcs.h"
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/texture_cache/slot_vector.h" 8#include "common/slot_vector.h"
9 9
10namespace VideoCommon { 10namespace VideoCommon {
11 11
12constexpr size_t NUM_RT = 8; 12constexpr size_t NUM_RT = 8;
13constexpr size_t MAX_MIP_LEVELS = 14; 13constexpr size_t MAX_MIP_LEVELS = 14;
14 14
15constexpr SlotId CORRUPT_ID{0xfffffffe}; 15constexpr Common::SlotId CORRUPT_ID{0xfffffffe};
16 16
17using ImageId = SlotId; 17using ImageId = Common::SlotId;
18using ImageMapId = SlotId; 18using ImageMapId = Common::SlotId;
19using ImageViewId = SlotId; 19using ImageViewId = Common::SlotId;
20using ImageAllocId = SlotId; 20using ImageAllocId = Common::SlotId;
21using SamplerId = SlotId; 21using SamplerId = Common::SlotId;
22using FramebufferId = SlotId; 22using FramebufferId = Common::SlotId;
23 23
24/// Fake image ID for null image views 24/// Fake image ID for null image views
25constexpr ImageId NULL_IMAGE_ID{0}; 25constexpr ImageId NULL_IMAGE_ID{0};