diff options
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 11 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 138 |
2 files changed, 105 insertions, 44 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a39505903..b121d36a3 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -256,6 +256,16 @@ public: | |||
| 256 | stream_score += score; | 256 | stream_score += score; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | /// Sets the new frame tick | ||
| 260 | void SetFrameTick(u64 new_frame_tick) noexcept { | ||
| 261 | frame_tick = new_frame_tick; | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the new frame tick | ||
| 265 | [[nodiscard]] u64 FrameTick() const noexcept { | ||
| 266 | return frame_tick; | ||
| 267 | } | ||
| 268 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | 269 | /// Returns the likeliness of this being a stream buffer |
| 260 | [[nodiscard]] int StreamScore() const noexcept { | 270 | [[nodiscard]] int StreamScore() const noexcept { |
| 261 | return stream_score; | 271 | return stream_score; |
| @@ -586,6 +596,7 @@ private: | |||
| 586 | RasterizerInterface* rasterizer = nullptr; | 596 | RasterizerInterface* rasterizer = nullptr; |
| 587 | VAddr cpu_addr = 0; | 597 | VAddr cpu_addr = 0; |
| 588 | Words words; | 598 | Words words; |
| 599 | u64 frame_tick = 0; | ||
| 589 | BufferFlagBits flags{}; | 600 | BufferFlagBits flags{}; |
| 590 | int stream_score = 0; | 601 | int stream_score = 0; |
| 591 | }; | 602 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d371b842f..ecb7d3dee 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -243,6 +243,8 @@ private: | |||
| 243 | template <bool insert> | 243 | template <bool insert> |
| 244 | void ChangeRegister(BufferId buffer_id); | 244 | void ChangeRegister(BufferId buffer_id); |
| 245 | 245 | ||
| 246 | void TouchBuffer(Buffer& buffer) const noexcept; | ||
| 247 | |||
| 246 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 248 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 247 | 249 | ||
| 248 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 250 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| @@ -255,6 +257,10 @@ private: | |||
| 255 | 257 | ||
| 256 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | 258 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); |
| 257 | 259 | ||
| 260 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 261 | |||
| 262 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 263 | |||
| 258 | void DeleteBuffer(BufferId buffer_id); | 264 | void DeleteBuffer(BufferId buffer_id); |
| 259 | 265 | ||
| 260 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | 266 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); |
| @@ -319,6 +325,9 @@ private: | |||
| 319 | size_t immediate_buffer_capacity = 0; | 325 | size_t immediate_buffer_capacity = 0; |
| 320 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 326 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| 321 | 327 | ||
| 328 | typename SlotVector<Buffer>::Iterator deletion_iterator; | ||
| 329 | u64 frame_tick = 0; | ||
| 330 | |||
| 322 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | 331 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |
| 323 | }; | 332 | }; |
| 324 | 333 | ||
| @@ -332,6 +341,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 332 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 341 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { |
| 333 | // Ensure the first slot is used for the null buffer | 342 | // Ensure the first slot is used for the null buffer |
| 334 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 343 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 344 | deletion_iterator = slot_buffers.end(); | ||
| 335 | } | 345 | } |
| 336 | 346 | ||
| 337 | template <class P> | 347 | template <class P> |
| @@ -349,7 +359,24 @@ void BufferCache<P>::TickFrame() { | |||
| 349 | const bool skip_preferred = hits * 256 < shots * 251; | 359 | const bool skip_preferred = hits * 256 < shots * 251; |
| 350 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 360 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 351 | 361 | ||
| 362 | static constexpr u64 ticks_to_destroy = 120; | ||
| 363 | int num_iterations = 32; | ||
| 364 | for (; num_iterations > 0; --num_iterations) { | ||
| 365 | if (deletion_iterator == slot_buffers.end()) { | ||
| 366 | deletion_iterator = slot_buffers.begin(); | ||
| 367 | } | ||
| 368 | ++deletion_iterator; | ||
| 369 | if (deletion_iterator == slot_buffers.end()) { | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | const auto [buffer_id, buffer] = *deletion_iterator; | ||
| 373 | if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { | ||
| 374 | DownloadBufferMemory(*buffer); | ||
| 375 | DeleteBuffer(buffer_id); | ||
| 376 | } | ||
| 377 | } | ||
| 352 | delayed_destruction_ring.Tick(); | 378 | delayed_destruction_ring.Tick(); |
| 379 | ++frame_tick; | ||
| 353 | } | 380 | } |
| 354 | 381 | ||
| 355 | template <class P> | 382 | template <class P> |
| @@ -371,50 +398,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 371 | 398 | ||
| 372 | template <class P> | 399 | template <class P> |
| 373 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 400 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 374 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 401 | ForEachBufferInRange(cpu_addr, size, |
| 375 | boost::container::small_vector<BufferCopy, 1> copies; | 402 | [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); |
| 376 | u64 total_size_bytes = 0; | ||
| 377 | u64 largest_copy = 0; | ||
| 378 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 379 | copies.push_back(BufferCopy{ | ||
| 380 | .src_offset = range_offset, | ||
| 381 | .dst_offset = total_size_bytes, | ||
| 382 | .size = range_size, | ||
| 383 | }); | ||
| 384 | total_size_bytes += range_size; | ||
| 385 | largest_copy = std::max(largest_copy, range_size); | ||
| 386 | }); | ||
| 387 | if (total_size_bytes == 0) { | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 391 | |||
| 392 | if constexpr (USE_MEMORY_MAPS) { | ||
| 393 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 394 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 395 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 396 | for (BufferCopy& copy : copies) { | ||
| 397 | // Modify copies to have the staging offset in mind | ||
| 398 | copy.dst_offset += download_staging.offset; | ||
| 399 | } | ||
| 400 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 401 | runtime.Finish(); | ||
| 402 | for (const BufferCopy& copy : copies) { | ||
| 403 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 404 | // Undo the modified offset | ||
| 405 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 406 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 407 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 408 | } | ||
| 409 | } else { | ||
| 410 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 411 | for (const BufferCopy& copy : copies) { | ||
| 412 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 413 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 414 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | }); | ||
| 418 | } | 403 | } |
| 419 | 404 | ||
| 420 | template <class P> | 405 | template <class P> |
| @@ -640,6 +625,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 640 | template <class P> | 625 | template <class P> |
| 641 | void BufferCache<P>::BindHostIndexBuffer() { | 626 | void BufferCache<P>::BindHostIndexBuffer() { |
| 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 627 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
| 628 | TouchBuffer(buffer); | ||
| 643 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 629 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 644 | const u32 size = index_buffer.size; | 630 | const u32 size = index_buffer.size; |
| 645 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 631 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| @@ -658,6 +644,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 658 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 644 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 659 | const Binding& binding = vertex_buffers[index]; | 645 | const Binding& binding = vertex_buffers[index]; |
| 660 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 646 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 647 | TouchBuffer(buffer); | ||
| 661 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 648 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 662 | if (!flags[Dirty::VertexBuffer0 + index]) { | 649 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 663 | continue; | 650 | continue; |
| @@ -693,6 +680,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 693 | const VAddr cpu_addr = binding.cpu_addr; | 680 | const VAddr cpu_addr = binding.cpu_addr; |
| 694 | const u32 size = binding.size; | 681 | const u32 size = binding.size; |
| 695 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 682 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 683 | TouchBuffer(buffer); | ||
| 696 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 684 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 697 | size <= uniform_buffer_skip_cache_size && | 685 | size <= uniform_buffer_skip_cache_size && |
| 698 | !buffer.IsRegionGpuModified(cpu_addr, size); | 686 | !buffer.IsRegionGpuModified(cpu_addr, size); |
| @@ -744,6 +732,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 744 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 732 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |
| 745 | const Binding& binding = storage_buffers[stage][index]; | 733 | const Binding& binding = storage_buffers[stage][index]; |
| 746 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 734 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 735 | TouchBuffer(buffer); | ||
| 747 | const u32 size = binding.size; | 736 | const u32 size = binding.size; |
| 748 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 737 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 749 | 738 | ||
| @@ -766,6 +755,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 766 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 755 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 767 | const Binding& binding = transform_feedback_buffers[index]; | 756 | const Binding& binding = transform_feedback_buffers[index]; |
| 768 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 757 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 758 | TouchBuffer(buffer); | ||
| 769 | const u32 size = binding.size; | 759 | const u32 size = binding.size; |
| 770 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 760 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 771 | 761 | ||
| @@ -784,6 +774,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 784 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 774 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { |
| 785 | const Binding& binding = compute_uniform_buffers[index]; | 775 | const Binding& binding = compute_uniform_buffers[index]; |
| 786 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 776 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 777 | TouchBuffer(buffer); | ||
| 787 | const u32 size = binding.size; | 778 | const u32 size = binding.size; |
| 788 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 779 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 789 | 780 | ||
| @@ -803,6 +794,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 803 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 794 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 804 | const Binding& binding = compute_storage_buffers[index]; | 795 | const Binding& binding = compute_storage_buffers[index]; |
| 805 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 796 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 797 | TouchBuffer(buffer); | ||
| 806 | const u32 size = binding.size; | 798 | const u32 size = binding.size; |
| 807 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 799 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 808 | 800 | ||
| @@ -1101,6 +1093,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1101 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1093 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1102 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1094 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1103 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1095 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1096 | TouchBuffer(slot_buffers[new_buffer_id]); | ||
| 1104 | for (const BufferId overlap_id : overlap.ids) { | 1097 | for (const BufferId overlap_id : overlap.ids) { |
| 1105 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1098 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1106 | } | 1099 | } |
| @@ -1136,6 +1129,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1136 | } | 1129 | } |
| 1137 | 1130 | ||
| 1138 | template <class P> | 1131 | template <class P> |
| 1132 | void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | ||
| 1133 | buffer.SetFrameTick(frame_tick); | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | template <class P> | ||
| 1139 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1137 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1140 | if (buffer.CpuAddr() == 0) { | 1138 | if (buffer.CpuAddr() == 0) { |
| 1141 | return true; | 1139 | return true; |
| @@ -1212,6 +1210,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1212 | } | 1210 | } |
| 1213 | 1211 | ||
| 1214 | template <class P> | 1212 | template <class P> |
| 1213 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | ||
| 1214 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | template <class P> | ||
| 1218 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | ||
| 1219 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 1220 | u64 total_size_bytes = 0; | ||
| 1221 | u64 largest_copy = 0; | ||
| 1222 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1223 | copies.push_back(BufferCopy{ | ||
| 1224 | .src_offset = range_offset, | ||
| 1225 | .dst_offset = total_size_bytes, | ||
| 1226 | .size = range_size, | ||
| 1227 | }); | ||
| 1228 | total_size_bytes += range_size; | ||
| 1229 | largest_copy = std::max(largest_copy, range_size); | ||
| 1230 | }); | ||
| 1231 | if (total_size_bytes == 0) { | ||
| 1232 | return; | ||
| 1233 | } | ||
| 1234 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 1235 | |||
| 1236 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1237 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 1238 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 1239 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 1240 | for (BufferCopy& copy : copies) { | ||
| 1241 | // Modify copies to have the staging offset in mind | ||
| 1242 | copy.dst_offset += download_staging.offset; | ||
| 1243 | } | ||
| 1244 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 1245 | runtime.Finish(); | ||
| 1246 | for (const BufferCopy& copy : copies) { | ||
| 1247 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1248 | // Undo the modified offset | ||
| 1249 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 1250 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 1251 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 1252 | } | ||
| 1253 | } else { | ||
| 1254 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1255 | for (const BufferCopy& copy : copies) { | ||
| 1256 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 1257 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1258 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1259 | } | ||
| 1260 | } | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | template <class P> | ||
| 1215 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1264 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { |
| 1216 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1265 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1217 | if (binding.buffer_id == buffer_id) { | 1266 | if (binding.buffer_id == buffer_id) { |
| @@ -1236,6 +1285,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1236 | 1285 | ||
| 1237 | Unregister(buffer_id); | 1286 | Unregister(buffer_id); |
| 1238 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1287 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| 1288 | slot_buffers.erase(buffer_id); | ||
| 1239 | 1289 | ||
| 1240 | NotifyBufferDeletion(); | 1290 | NotifyBufferDeletion(); |
| 1241 | } | 1291 | } |