diff options
| author | 2021-01-19 21:59:53 -0300 | |
|---|---|---|
| committer | 2021-06-16 21:35:02 +0200 | |
| commit | a11bc4a382ebca52bdf0aab1a9474351e8d85cef (patch) | |
| tree | 6392fde60f5ee2e414733a193329e18d7f7fde42 /src | |
| parent | vulkan_memory_allocator: Release allocations with no commits (diff) | |
| download | yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.gz yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.tar.xz yuzu-a11bc4a382ebca52bdf0aab1a9474351e8d85cef.zip | |
Initial Reaper Setup
WIP
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 11 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 138 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/slot_vector.h | 70 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 44 |
6 files changed, 226 insertions, 56 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a39505903..b121d36a3 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -256,6 +256,16 @@ public: | |||
| 256 | stream_score += score; | 256 | stream_score += score; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | /// Sets the new frame tick | ||
| 260 | void SetFrameTick(u64 new_frame_tick) noexcept { | ||
| 261 | frame_tick = new_frame_tick; | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the new frame tick | ||
| 265 | [[nodiscard]] u64 FrameTick() const noexcept { | ||
| 266 | return frame_tick; | ||
| 267 | } | ||
| 268 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | 269 | /// Returns the likeliness of this being a stream buffer |
| 260 | [[nodiscard]] int StreamScore() const noexcept { | 270 | [[nodiscard]] int StreamScore() const noexcept { |
| 261 | return stream_score; | 271 | return stream_score; |
| @@ -586,6 +596,7 @@ private: | |||
| 586 | RasterizerInterface* rasterizer = nullptr; | 596 | RasterizerInterface* rasterizer = nullptr; |
| 587 | VAddr cpu_addr = 0; | 597 | VAddr cpu_addr = 0; |
| 588 | Words words; | 598 | Words words; |
| 599 | u64 frame_tick = 0; | ||
| 589 | BufferFlagBits flags{}; | 600 | BufferFlagBits flags{}; |
| 590 | int stream_score = 0; | 601 | int stream_score = 0; |
| 591 | }; | 602 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d371b842f..ecb7d3dee 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -243,6 +243,8 @@ private: | |||
| 243 | template <bool insert> | 243 | template <bool insert> |
| 244 | void ChangeRegister(BufferId buffer_id); | 244 | void ChangeRegister(BufferId buffer_id); |
| 245 | 245 | ||
| 246 | void TouchBuffer(Buffer& buffer) const noexcept; | ||
| 247 | |||
| 246 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 248 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 247 | 249 | ||
| 248 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 250 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| @@ -255,6 +257,10 @@ private: | |||
| 255 | 257 | ||
| 256 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | 258 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); |
| 257 | 259 | ||
| 260 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 261 | |||
| 262 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 263 | |||
| 258 | void DeleteBuffer(BufferId buffer_id); | 264 | void DeleteBuffer(BufferId buffer_id); |
| 259 | 265 | ||
| 260 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | 266 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); |
| @@ -319,6 +325,9 @@ private: | |||
| 319 | size_t immediate_buffer_capacity = 0; | 325 | size_t immediate_buffer_capacity = 0; |
| 320 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 326 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| 321 | 327 | ||
| 328 | typename SlotVector<Buffer>::Iterator deletion_iterator; | ||
| 329 | u64 frame_tick = 0; | ||
| 330 | |||
| 322 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | 331 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |
| 323 | }; | 332 | }; |
| 324 | 333 | ||
| @@ -332,6 +341,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 332 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 341 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { |
| 333 | // Ensure the first slot is used for the null buffer | 342 | // Ensure the first slot is used for the null buffer |
| 334 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 343 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 344 | deletion_iterator = slot_buffers.end(); | ||
| 335 | } | 345 | } |
| 336 | 346 | ||
| 337 | template <class P> | 347 | template <class P> |
| @@ -349,7 +359,24 @@ void BufferCache<P>::TickFrame() { | |||
| 349 | const bool skip_preferred = hits * 256 < shots * 251; | 359 | const bool skip_preferred = hits * 256 < shots * 251; |
| 350 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 360 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 351 | 361 | ||
| 362 | static constexpr u64 ticks_to_destroy = 120; | ||
| 363 | int num_iterations = 32; | ||
| 364 | for (; num_iterations > 0; --num_iterations) { | ||
| 365 | if (deletion_iterator == slot_buffers.end()) { | ||
| 366 | deletion_iterator = slot_buffers.begin(); | ||
| 367 | } | ||
| 368 | ++deletion_iterator; | ||
| 369 | if (deletion_iterator == slot_buffers.end()) { | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | const auto [buffer_id, buffer] = *deletion_iterator; | ||
| 373 | if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { | ||
| 374 | DownloadBufferMemory(*buffer); | ||
| 375 | DeleteBuffer(buffer_id); | ||
| 376 | } | ||
| 377 | } | ||
| 352 | delayed_destruction_ring.Tick(); | 378 | delayed_destruction_ring.Tick(); |
| 379 | ++frame_tick; | ||
| 353 | } | 380 | } |
| 354 | 381 | ||
| 355 | template <class P> | 382 | template <class P> |
| @@ -371,50 +398,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 371 | 398 | ||
| 372 | template <class P> | 399 | template <class P> |
| 373 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 400 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 374 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 401 | ForEachBufferInRange(cpu_addr, size, |
| 375 | boost::container::small_vector<BufferCopy, 1> copies; | 402 | [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); |
| 376 | u64 total_size_bytes = 0; | ||
| 377 | u64 largest_copy = 0; | ||
| 378 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 379 | copies.push_back(BufferCopy{ | ||
| 380 | .src_offset = range_offset, | ||
| 381 | .dst_offset = total_size_bytes, | ||
| 382 | .size = range_size, | ||
| 383 | }); | ||
| 384 | total_size_bytes += range_size; | ||
| 385 | largest_copy = std::max(largest_copy, range_size); | ||
| 386 | }); | ||
| 387 | if (total_size_bytes == 0) { | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 391 | |||
| 392 | if constexpr (USE_MEMORY_MAPS) { | ||
| 393 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 394 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 395 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 396 | for (BufferCopy& copy : copies) { | ||
| 397 | // Modify copies to have the staging offset in mind | ||
| 398 | copy.dst_offset += download_staging.offset; | ||
| 399 | } | ||
| 400 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 401 | runtime.Finish(); | ||
| 402 | for (const BufferCopy& copy : copies) { | ||
| 403 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 404 | // Undo the modified offset | ||
| 405 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 406 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 407 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 408 | } | ||
| 409 | } else { | ||
| 410 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 411 | for (const BufferCopy& copy : copies) { | ||
| 412 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 413 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 414 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | }); | ||
| 418 | } | 403 | } |
| 419 | 404 | ||
| 420 | template <class P> | 405 | template <class P> |
| @@ -640,6 +625,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 640 | template <class P> | 625 | template <class P> |
| 641 | void BufferCache<P>::BindHostIndexBuffer() { | 626 | void BufferCache<P>::BindHostIndexBuffer() { |
| 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 627 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
| 628 | TouchBuffer(buffer); | ||
| 643 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 629 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 644 | const u32 size = index_buffer.size; | 630 | const u32 size = index_buffer.size; |
| 645 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 631 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| @@ -658,6 +644,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 658 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 644 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 659 | const Binding& binding = vertex_buffers[index]; | 645 | const Binding& binding = vertex_buffers[index]; |
| 660 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 646 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 647 | TouchBuffer(buffer); | ||
| 661 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 648 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 662 | if (!flags[Dirty::VertexBuffer0 + index]) { | 649 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 663 | continue; | 650 | continue; |
| @@ -693,6 +680,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 693 | const VAddr cpu_addr = binding.cpu_addr; | 680 | const VAddr cpu_addr = binding.cpu_addr; |
| 694 | const u32 size = binding.size; | 681 | const u32 size = binding.size; |
| 695 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 682 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 683 | TouchBuffer(buffer); | ||
| 696 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 684 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 697 | size <= uniform_buffer_skip_cache_size && | 685 | size <= uniform_buffer_skip_cache_size && |
| 698 | !buffer.IsRegionGpuModified(cpu_addr, size); | 686 | !buffer.IsRegionGpuModified(cpu_addr, size); |
| @@ -744,6 +732,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 744 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 732 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |
| 745 | const Binding& binding = storage_buffers[stage][index]; | 733 | const Binding& binding = storage_buffers[stage][index]; |
| 746 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 734 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 735 | TouchBuffer(buffer); | ||
| 747 | const u32 size = binding.size; | 736 | const u32 size = binding.size; |
| 748 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 737 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 749 | 738 | ||
| @@ -766,6 +755,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 766 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 755 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 767 | const Binding& binding = transform_feedback_buffers[index]; | 756 | const Binding& binding = transform_feedback_buffers[index]; |
| 768 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 757 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 758 | TouchBuffer(buffer); | ||
| 769 | const u32 size = binding.size; | 759 | const u32 size = binding.size; |
| 770 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 760 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 771 | 761 | ||
| @@ -784,6 +774,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 784 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 774 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { |
| 785 | const Binding& binding = compute_uniform_buffers[index]; | 775 | const Binding& binding = compute_uniform_buffers[index]; |
| 786 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 776 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 777 | TouchBuffer(buffer); | ||
| 787 | const u32 size = binding.size; | 778 | const u32 size = binding.size; |
| 788 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 779 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 789 | 780 | ||
| @@ -803,6 +794,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 803 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 794 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 804 | const Binding& binding = compute_storage_buffers[index]; | 795 | const Binding& binding = compute_storage_buffers[index]; |
| 805 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 796 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 797 | TouchBuffer(buffer); | ||
| 806 | const u32 size = binding.size; | 798 | const u32 size = binding.size; |
| 807 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 799 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 808 | 800 | ||
| @@ -1101,6 +1093,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1101 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1093 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1102 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1094 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1103 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1095 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1096 | TouchBuffer(slot_buffers[new_buffer_id]); | ||
| 1104 | for (const BufferId overlap_id : overlap.ids) { | 1097 | for (const BufferId overlap_id : overlap.ids) { |
| 1105 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1098 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1106 | } | 1099 | } |
| @@ -1136,6 +1129,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1136 | } | 1129 | } |
| 1137 | 1130 | ||
| 1138 | template <class P> | 1131 | template <class P> |
| 1132 | void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | ||
| 1133 | buffer.SetFrameTick(frame_tick); | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | template <class P> | ||
| 1139 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1137 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1140 | if (buffer.CpuAddr() == 0) { | 1138 | if (buffer.CpuAddr() == 0) { |
| 1141 | return true; | 1139 | return true; |
| @@ -1212,6 +1210,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1212 | } | 1210 | } |
| 1213 | 1211 | ||
| 1214 | template <class P> | 1212 | template <class P> |
| 1213 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | ||
| 1214 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | template <class P> | ||
| 1218 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | ||
| 1219 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 1220 | u64 total_size_bytes = 0; | ||
| 1221 | u64 largest_copy = 0; | ||
| 1222 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1223 | copies.push_back(BufferCopy{ | ||
| 1224 | .src_offset = range_offset, | ||
| 1225 | .dst_offset = total_size_bytes, | ||
| 1226 | .size = range_size, | ||
| 1227 | }); | ||
| 1228 | total_size_bytes += range_size; | ||
| 1229 | largest_copy = std::max(largest_copy, range_size); | ||
| 1230 | }); | ||
| 1231 | if (total_size_bytes == 0) { | ||
| 1232 | return; | ||
| 1233 | } | ||
| 1234 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 1235 | |||
| 1236 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1237 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 1238 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 1239 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 1240 | for (BufferCopy& copy : copies) { | ||
| 1241 | // Modify copies to have the staging offset in mind | ||
| 1242 | copy.dst_offset += download_staging.offset; | ||
| 1243 | } | ||
| 1244 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 1245 | runtime.Finish(); | ||
| 1246 | for (const BufferCopy& copy : copies) { | ||
| 1247 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1248 | // Undo the modified offset | ||
| 1249 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 1250 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 1251 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 1252 | } | ||
| 1253 | } else { | ||
| 1254 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1255 | for (const BufferCopy& copy : copies) { | ||
| 1256 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 1257 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1258 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1259 | } | ||
| 1260 | } | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | template <class P> | ||
| 1215 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1264 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { |
| 1216 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1265 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1217 | if (binding.buffer_id == buffer_id) { | 1266 | if (binding.buffer_id == buffer_id) { |
| @@ -1236,6 +1285,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1236 | 1285 | ||
| 1237 | Unregister(buffer_id); | 1286 | Unregister(buffer_id); |
| 1238 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1287 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| 1288 | slot_buffers.erase(buffer_id); | ||
| 1239 | 1289 | ||
| 1240 | NotifyBufferDeletion(); | 1290 | NotifyBufferDeletion(); |
| 1241 | } | 1291 | } |
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 9914926b3..bd0e7e64e 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -113,6 +113,23 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie | |||
| 113 | image_view_ids.push_back(image_view_id); | 113 | image_view_ids.push_back(image_view_id); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | bool ImageBase::IsSafeDownload() const noexcept { | ||
| 117 | // Skip images that were not modified from the GPU | ||
| 118 | if (False(flags & ImageFlagBits::GpuModified)) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | // Skip images that .are. modified from the CPU | ||
| 122 | // We don't want to write sensitive data from the guest | ||
| 123 | if (True(flags & ImageFlagBits::CpuModified)) { | ||
| 124 | return false; | ||
| 125 | } | ||
| 126 | if (info.num_samples > 1) { | ||
| 127 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | return true; | ||
| 131 | } | ||
| 132 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | 133 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { |
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | 134 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; |
| 118 | ASSERT(lhs.info.type == rhs.info.type); | 135 | ASSERT(lhs.info.type == rhs.info.type); |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index b7f3b7e43..0f69d8a32 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -44,6 +44,8 @@ struct ImageBase { | |||
| 44 | 44 | ||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); |
| 46 | 46 | ||
| 47 | [[nodiscard]] bool IsSafeDownload() const noexcept; | ||
| 48 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | 49 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { |
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | 50 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; |
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | 51 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; |
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index eae3be6ea..1259e8263 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bit> | ||
| 8 | #include <concepts> | 9 | #include <concepts> |
| 9 | #include <numeric> | 10 | #include <numeric> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| @@ -32,6 +33,60 @@ template <class T> | |||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | 33 | requires std::is_nothrow_move_assignable_v<T>&& |
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | 34 | std::is_nothrow_move_constructible_v<T> class SlotVector { |
| 34 | public: | 35 | public: |
| 36 | class Iterator { | ||
| 37 | friend SlotVector<T>; | ||
| 38 | |||
| 39 | public: | ||
| 40 | constexpr Iterator() = default; | ||
| 41 | |||
| 42 | Iterator& operator++() noexcept { | ||
| 43 | const u64* const bitset = slot_vector->stored_bitset.data(); | ||
| 44 | const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; | ||
| 45 | if (id.index < size) { | ||
| 46 | do { | ||
| 47 | ++id.index; | ||
| 48 | } while (id.index < size && !IsValid(bitset)); | ||
| 49 | if (id.index == size) { | ||
| 50 | id.index = SlotId::INVALID_INDEX; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | Iterator operator++(int) noexcept { | ||
| 57 | const Iterator copy{*this}; | ||
| 58 | ++*this; | ||
| 59 | return copy; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool operator==(const Iterator& other) const noexcept { | ||
| 63 | return id.index == other.id.index; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool operator!=(const Iterator& other) const noexcept { | ||
| 67 | return id.index != other.id.index; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::pair<SlotId, T*> operator*() const noexcept { | ||
| 71 | return {id, std::addressof((*slot_vector)[id])}; | ||
| 72 | } | ||
| 73 | |||
| 74 | T* operator->() const noexcept { | ||
| 75 | return std::addressof((*slot_vector)[id]); | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept | ||
| 80 | : slot_vector{slot_vector_}, id{id_} {} | ||
| 81 | |||
| 82 | bool IsValid(const u64* bitset) noexcept { | ||
| 83 | return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | SlotVector<T>* slot_vector; | ||
| 87 | SlotId id; | ||
| 88 | }; | ||
| 89 | |||
| 35 | ~SlotVector() noexcept { | 90 | ~SlotVector() noexcept { |
| 36 | size_t index = 0; | 91 | size_t index = 0; |
| 37 | for (u64 bits : stored_bitset) { | 92 | for (u64 bits : stored_bitset) { |
| @@ -70,6 +125,20 @@ public: | |||
| 70 | ResetStorageBit(id.index); | 125 | ResetStorageBit(id.index); |
| 71 | } | 126 | } |
| 72 | 127 | ||
| 128 | [[nodiscard]] Iterator begin() noexcept { | ||
| 129 | const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); | ||
| 130 | if (it == stored_bitset.end()) { | ||
| 131 | return end(); | ||
| 132 | } | ||
| 133 | const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin())); | ||
| 134 | const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))}; | ||
| 135 | return Iterator(this, first_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] Iterator end() noexcept { | ||
| 139 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); | ||
| 140 | } | ||
| 141 | |||
| 73 | private: | 142 | private: |
| 74 | struct NonTrivialDummy { | 143 | struct NonTrivialDummy { |
| 75 | NonTrivialDummy() noexcept {} | 144 | NonTrivialDummy() noexcept {} |
| @@ -140,7 +209,6 @@ private: | |||
| 140 | 209 | ||
| 141 | Entry* values = nullptr; | 210 | Entry* values = nullptr; |
| 142 | size_t values_capacity = 0; | 211 | size_t values_capacity = 0; |
| 143 | size_t values_size = 0; | ||
| 144 | 212 | ||
| 145 | std::vector<u64> stored_bitset; | 213 | std::vector<u64> stored_bitset; |
| 146 | std::vector<u32> free_list; | 214 | std::vector<u32> free_list; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 59b7c678b..45ef155b5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -353,6 +353,7 @@ private: | |||
| 353 | 353 | ||
| 354 | u64 modification_tick = 0; | 354 | u64 modification_tick = 0; |
| 355 | u64 frame_tick = 0; | 355 | u64 frame_tick = 0; |
| 356 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 356 | }; | 357 | }; |
| 357 | 358 | ||
| 358 | template <class P> | 359 | template <class P> |
| @@ -373,10 +374,41 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 373 | // This way the null resource becomes a compile time constant | 374 | // This way the null resource becomes a compile time constant |
| 374 | void(slot_image_views.insert(runtime, NullImageParams{})); | 375 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 375 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 376 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 377 | |||
| 378 | deletion_iterator = slot_images.begin(); | ||
| 376 | } | 379 | } |
| 377 | 380 | ||
| 378 | template <class P> | 381 | template <class P> |
| 379 | void TextureCache<P>::TickFrame() { | 382 | void TextureCache<P>::TickFrame() { |
| 383 | static constexpr u64 ticks_to_destroy = 120; | ||
| 384 | int num_iterations = 32; | ||
| 385 | for (; num_iterations > 0; --num_iterations) { | ||
| 386 | if (deletion_iterator == slot_images.end()) { | ||
| 387 | deletion_iterator = slot_images.begin(); | ||
| 388 | if (deletion_iterator == slot_images.end()) { | ||
| 389 | break; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | const auto [image_id, image] = *deletion_iterator; | ||
| 393 | if (image->frame_tick + ticks_to_destroy < frame_tick) { | ||
| 394 | if (image->IsSafeDownload() && | ||
| 395 | std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { | ||
| 396 | return slot_images[alias.id].modification_tick > image->modification_tick; | ||
| 397 | })) { | ||
| 398 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 399 | const auto copies = FullDownloadCopies(image->info); | ||
| 400 | image->DownloadMemory(map, copies); | ||
| 401 | runtime.Finish(); | ||
| 402 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 403 | } | ||
| 404 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 405 | UntrackImage(*image); | ||
| 406 | } | ||
| 407 | UnregisterImage(image_id); | ||
| 408 | DeleteImage(image_id); | ||
| 409 | } | ||
| 410 | ++deletion_iterator; | ||
| 411 | } | ||
| 380 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | 412 | // Tick sentenced resources in this order to ensure they are destroyed in the right order |
| 381 | sentenced_images.Tick(); | 413 | sentenced_images.Tick(); |
| 382 | sentenced_framebuffers.Tick(); | 414 | sentenced_framebuffers.Tick(); |
| @@ -568,17 +600,7 @@ template <class P> | |||
| 568 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 600 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 569 | std::vector<ImageId> images; | 601 | std::vector<ImageId> images; |
| 570 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | 602 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { |
| 571 | // Skip images that were not modified from the GPU | 603 | if (!image.IsSafeDownload()) { |
| 572 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 573 | return; | ||
| 574 | } | ||
| 575 | // Skip images that .are. modified from the CPU | ||
| 576 | // We don't want to write sensitive data from the guest | ||
| 577 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 578 | return; | ||
| 579 | } | ||
| 580 | if (image.info.num_samples > 1) { | ||
| 581 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 582 | return; | 604 | return; |
| 583 | } | 605 | } |
| 584 | image.flags &= ~ImageFlagBits::GpuModified; | 606 | image.flags &= ~ImageFlagBits::GpuModified; |