diff options
| author | 2021-06-23 08:03:01 -0400 | |
|---|---|---|
| committer | 2021-06-23 08:03:01 -0400 | |
| commit | 17fff10e06e7935522a5a69705b9a750761aab79 (patch) | |
| tree | 7e7b3ae9fedbc0fed85f6c5c58e92e8d047efd87 /src/video_core/buffer_cache | |
| parent | Merge pull request #6508 from ReinUsesLisp/bootmanager-stop-token (diff) | |
| parent | Reaper: Set minimum cleaning limit on OGL. (diff) | |
| download | yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.gz yuzu-17fff10e06e7935522a5a69705b9a750761aab79.tar.xz yuzu-17fff10e06e7935522a5a69705b9a750761aab79.zip | |
Merge pull request #6465 from FernandoS27/sex-on-the-beach
GPU: Implement a garbage collector for GPU Caches (project Reaper+)
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 11 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 161 |
2 files changed, 127 insertions, 45 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a39505903..b121d36a3 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -256,6 +256,16 @@ public: | |||
| 256 | stream_score += score; | 256 | stream_score += score; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | /// Sets the new frame tick | ||
| 260 | void SetFrameTick(u64 new_frame_tick) noexcept { | ||
| 261 | frame_tick = new_frame_tick; | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the new frame tick | ||
| 265 | [[nodiscard]] u64 FrameTick() const noexcept { | ||
| 266 | return frame_tick; | ||
| 267 | } | ||
| 268 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | 269 | /// Returns the likeliness of this being a stream buffer |
| 260 | [[nodiscard]] int StreamScore() const noexcept { | 270 | [[nodiscard]] int StreamScore() const noexcept { |
| 261 | return stream_score; | 271 | return stream_score; |
| @@ -586,6 +596,7 @@ private: | |||
| 586 | RasterizerInterface* rasterizer = nullptr; | 596 | RasterizerInterface* rasterizer = nullptr; |
| 587 | VAddr cpu_addr = 0; | 597 | VAddr cpu_addr = 0; |
| 588 | Words words; | 598 | Words words; |
| 599 | u64 frame_tick = 0; | ||
| 589 | BufferFlagBits flags{}; | 600 | BufferFlagBits flags{}; |
| 590 | int stream_score = 0; | 601 | int stream_score = 0; |
| 591 | }; | 602 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d371b842f..6d04d00da 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | 17 | #include <boost/container/small_vector.hpp> |
| 18 | 18 | ||
| 19 | #include "common/common_sizes.h" | ||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | #include "common/div_ceil.h" | 21 | #include "common/div_ceil.h" |
| 21 | #include "common/microprofile.h" | 22 | #include "common/microprofile.h" |
| @@ -65,6 +66,9 @@ class BufferCache { | |||
| 65 | 66 | ||
| 66 | static constexpr BufferId NULL_BUFFER_ID{0}; | 67 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 67 | 68 | ||
| 69 | static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB; | ||
| 70 | static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB; | ||
| 71 | |||
| 68 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 72 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 69 | 73 | ||
| 70 | using Runtime = typename P::Runtime; | 74 | using Runtime = typename P::Runtime; |
| @@ -102,6 +106,8 @@ public: | |||
| 102 | 106 | ||
| 103 | void TickFrame(); | 107 | void TickFrame(); |
| 104 | 108 | ||
| 109 | void RunGarbageCollector(); | ||
| 110 | |||
| 105 | void WriteMemory(VAddr cpu_addr, u64 size); | 111 | void WriteMemory(VAddr cpu_addr, u64 size); |
| 106 | 112 | ||
| 107 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 113 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| @@ -243,6 +249,8 @@ private: | |||
| 243 | template <bool insert> | 249 | template <bool insert> |
| 244 | void ChangeRegister(BufferId buffer_id); | 250 | void ChangeRegister(BufferId buffer_id); |
| 245 | 251 | ||
| 252 | void TouchBuffer(Buffer& buffer) const noexcept; | ||
| 253 | |||
| 246 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 254 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 247 | 255 | ||
| 248 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 256 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| @@ -255,6 +263,10 @@ private: | |||
| 255 | 263 | ||
| 256 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | 264 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); |
| 257 | 265 | ||
| 266 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 267 | |||
| 268 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 269 | |||
| 258 | void DeleteBuffer(BufferId buffer_id); | 270 | void DeleteBuffer(BufferId buffer_id); |
| 259 | 271 | ||
| 260 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | 272 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); |
| @@ -319,6 +331,10 @@ private: | |||
| 319 | size_t immediate_buffer_capacity = 0; | 331 | size_t immediate_buffer_capacity = 0; |
| 320 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 332 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| 321 | 333 | ||
| 334 | typename SlotVector<Buffer>::Iterator deletion_iterator; | ||
| 335 | u64 frame_tick = 0; | ||
| 336 | u64 total_used_memory = 0; | ||
| 337 | |||
| 322 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | 338 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |
| 323 | }; | 339 | }; |
| 324 | 340 | ||
| @@ -332,6 +348,28 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 332 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 348 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { |
| 333 | // Ensure the first slot is used for the null buffer | 349 | // Ensure the first slot is used for the null buffer |
| 334 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 350 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 351 | deletion_iterator = slot_buffers.end(); | ||
| 352 | } | ||
| 353 | |||
| 354 | template <class P> | ||
| 355 | void BufferCache<P>::RunGarbageCollector() { | ||
| 356 | const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | ||
| 357 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | ||
| 358 | int num_iterations = aggressive_gc ? 64 : 32; | ||
| 359 | for (; num_iterations > 0; --num_iterations) { | ||
| 360 | if (deletion_iterator == slot_buffers.end()) { | ||
| 361 | deletion_iterator = slot_buffers.begin(); | ||
| 362 | } | ||
| 363 | ++deletion_iterator; | ||
| 364 | if (deletion_iterator == slot_buffers.end()) { | ||
| 365 | break; | ||
| 366 | } | ||
| 367 | const auto [buffer_id, buffer] = *deletion_iterator; | ||
| 368 | if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { | ||
| 369 | DownloadBufferMemory(*buffer); | ||
| 370 | DeleteBuffer(buffer_id); | ||
| 371 | } | ||
| 372 | } | ||
| 335 | } | 373 | } |
| 336 | 374 | ||
| 337 | template <class P> | 375 | template <class P> |
| @@ -349,6 +387,10 @@ void BufferCache<P>::TickFrame() { | |||
| 349 | const bool skip_preferred = hits * 256 < shots * 251; | 387 | const bool skip_preferred = hits * 256 < shots * 251; |
| 350 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 388 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 351 | 389 | ||
| 390 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { | ||
| 391 | RunGarbageCollector(); | ||
| 392 | } | ||
| 393 | ++frame_tick; | ||
| 352 | delayed_destruction_ring.Tick(); | 394 | delayed_destruction_ring.Tick(); |
| 353 | } | 395 | } |
| 354 | 396 | ||
| @@ -371,50 +413,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 371 | 413 | ||
| 372 | template <class P> | 414 | template <class P> |
| 373 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 415 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 374 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 416 | ForEachBufferInRange(cpu_addr, size, |
| 375 | boost::container::small_vector<BufferCopy, 1> copies; | 417 | [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); |
| 376 | u64 total_size_bytes = 0; | ||
| 377 | u64 largest_copy = 0; | ||
| 378 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 379 | copies.push_back(BufferCopy{ | ||
| 380 | .src_offset = range_offset, | ||
| 381 | .dst_offset = total_size_bytes, | ||
| 382 | .size = range_size, | ||
| 383 | }); | ||
| 384 | total_size_bytes += range_size; | ||
| 385 | largest_copy = std::max(largest_copy, range_size); | ||
| 386 | }); | ||
| 387 | if (total_size_bytes == 0) { | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 391 | |||
| 392 | if constexpr (USE_MEMORY_MAPS) { | ||
| 393 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 394 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 395 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 396 | for (BufferCopy& copy : copies) { | ||
| 397 | // Modify copies to have the staging offset in mind | ||
| 398 | copy.dst_offset += download_staging.offset; | ||
| 399 | } | ||
| 400 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 401 | runtime.Finish(); | ||
| 402 | for (const BufferCopy& copy : copies) { | ||
| 403 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 404 | // Undo the modified offset | ||
| 405 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 406 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 407 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 408 | } | ||
| 409 | } else { | ||
| 410 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 411 | for (const BufferCopy& copy : copies) { | ||
| 412 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 413 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 414 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | }); | ||
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | template <class P> | 420 | template <class P> |
| @@ -640,6 +640,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 640 | template <class P> | 640 | template <class P> |
| 641 | void BufferCache<P>::BindHostIndexBuffer() { | 641 | void BufferCache<P>::BindHostIndexBuffer() { |
| 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
| 643 | TouchBuffer(buffer); | ||
| 643 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 644 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 644 | const u32 size = index_buffer.size; | 645 | const u32 size = index_buffer.size; |
| 645 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 646 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| @@ -658,6 +659,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 658 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 659 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 659 | const Binding& binding = vertex_buffers[index]; | 660 | const Binding& binding = vertex_buffers[index]; |
| 660 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 661 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 662 | TouchBuffer(buffer); | ||
| 661 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 663 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 662 | if (!flags[Dirty::VertexBuffer0 + index]) { | 664 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 663 | continue; | 665 | continue; |
| @@ -693,6 +695,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 693 | const VAddr cpu_addr = binding.cpu_addr; | 695 | const VAddr cpu_addr = binding.cpu_addr; |
| 694 | const u32 size = binding.size; | 696 | const u32 size = binding.size; |
| 695 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 697 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 698 | TouchBuffer(buffer); | ||
| 696 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 699 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 697 | size <= uniform_buffer_skip_cache_size && | 700 | size <= uniform_buffer_skip_cache_size && |
| 698 | !buffer.IsRegionGpuModified(cpu_addr, size); | 701 | !buffer.IsRegionGpuModified(cpu_addr, size); |
| @@ -744,6 +747,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 744 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 747 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |
| 745 | const Binding& binding = storage_buffers[stage][index]; | 748 | const Binding& binding = storage_buffers[stage][index]; |
| 746 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 749 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 750 | TouchBuffer(buffer); | ||
| 747 | const u32 size = binding.size; | 751 | const u32 size = binding.size; |
| 748 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 752 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 749 | 753 | ||
| @@ -766,6 +770,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 766 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 770 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 767 | const Binding& binding = transform_feedback_buffers[index]; | 771 | const Binding& binding = transform_feedback_buffers[index]; |
| 768 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 772 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 773 | TouchBuffer(buffer); | ||
| 769 | const u32 size = binding.size; | 774 | const u32 size = binding.size; |
| 770 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 775 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 771 | 776 | ||
| @@ -784,6 +789,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 784 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 789 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { |
| 785 | const Binding& binding = compute_uniform_buffers[index]; | 790 | const Binding& binding = compute_uniform_buffers[index]; |
| 786 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 791 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 792 | TouchBuffer(buffer); | ||
| 787 | const u32 size = binding.size; | 793 | const u32 size = binding.size; |
| 788 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 794 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 789 | 795 | ||
| @@ -803,6 +809,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 803 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 809 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 804 | const Binding& binding = compute_storage_buffers[index]; | 810 | const Binding& binding = compute_storage_buffers[index]; |
| 805 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 811 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 812 | TouchBuffer(buffer); | ||
| 806 | const u32 size = binding.size; | 813 | const u32 size = binding.size; |
| 807 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 814 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 808 | 815 | ||
| @@ -1101,6 +1108,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1101 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1108 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1102 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1109 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1103 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1110 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1111 | TouchBuffer(slot_buffers[new_buffer_id]); | ||
| 1104 | for (const BufferId overlap_id : overlap.ids) { | 1112 | for (const BufferId overlap_id : overlap.ids) { |
| 1105 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1113 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1106 | } | 1114 | } |
| @@ -1122,8 +1130,14 @@ template <class P> | |||
| 1122 | template <bool insert> | 1130 | template <bool insert> |
| 1123 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | 1131 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { |
| 1124 | const Buffer& buffer = slot_buffers[buffer_id]; | 1132 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 1133 | const auto size = buffer.SizeBytes(); | ||
| 1134 | if (insert) { | ||
| 1135 | total_used_memory += Common::AlignUp(size, 1024); | ||
| 1136 | } else { | ||
| 1137 | total_used_memory -= Common::AlignUp(size, 1024); | ||
| 1138 | } | ||
| 1125 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1139 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1126 | const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); | 1140 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| 1127 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; | 1141 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; |
| 1128 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); | 1142 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); |
| 1129 | for (u64 page = page_begin; page != page_end; ++page) { | 1143 | for (u64 page = page_begin; page != page_end; ++page) { |
| @@ -1136,6 +1150,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1136 | } | 1150 | } |
| 1137 | 1151 | ||
| 1138 | template <class P> | 1152 | template <class P> |
| 1153 | void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | ||
| 1154 | buffer.SetFrameTick(frame_tick); | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | template <class P> | ||
| 1139 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1158 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1140 | if (buffer.CpuAddr() == 0) { | 1159 | if (buffer.CpuAddr() == 0) { |
| 1141 | return true; | 1160 | return true; |
| @@ -1212,6 +1231,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1212 | } | 1231 | } |
| 1213 | 1232 | ||
| 1214 | template <class P> | 1233 | template <class P> |
| 1234 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | ||
| 1235 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | template <class P> | ||
| 1239 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | ||
| 1240 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 1241 | u64 total_size_bytes = 0; | ||
| 1242 | u64 largest_copy = 0; | ||
| 1243 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1244 | copies.push_back(BufferCopy{ | ||
| 1245 | .src_offset = range_offset, | ||
| 1246 | .dst_offset = total_size_bytes, | ||
| 1247 | .size = range_size, | ||
| 1248 | }); | ||
| 1249 | total_size_bytes += range_size; | ||
| 1250 | largest_copy = std::max(largest_copy, range_size); | ||
| 1251 | }); | ||
| 1252 | if (total_size_bytes == 0) { | ||
| 1253 | return; | ||
| 1254 | } | ||
| 1255 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 1256 | |||
| 1257 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1258 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 1259 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 1260 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 1261 | for (BufferCopy& copy : copies) { | ||
| 1262 | // Modify copies to have the staging offset in mind | ||
| 1263 | copy.dst_offset += download_staging.offset; | ||
| 1264 | } | ||
| 1265 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 1266 | runtime.Finish(); | ||
| 1267 | for (const BufferCopy& copy : copies) { | ||
| 1268 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1269 | // Undo the modified offset | ||
| 1270 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 1271 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 1272 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 1273 | } | ||
| 1274 | } else { | ||
| 1275 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1276 | for (const BufferCopy& copy : copies) { | ||
| 1277 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 1278 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1279 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1280 | } | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | |||
| 1284 | template <class P> | ||
| 1215 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1285 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { |
| 1216 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1286 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1217 | if (binding.buffer_id == buffer_id) { | 1287 | if (binding.buffer_id == buffer_id) { |
| @@ -1236,6 +1306,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1236 | 1306 | ||
| 1237 | Unregister(buffer_id); | 1307 | Unregister(buffer_id); |
| 1238 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1308 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| 1309 | slot_buffers.erase(buffer_id); | ||
| 1239 | 1310 | ||
| 1240 | NotifyBufferDeletion(); | 1311 | NotifyBufferDeletion(); |
| 1241 | } | 1312 | } |