diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 229 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 65 |
2 files changed, 154 insertions, 140 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a0701ce4e..43fe5b080 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | namespace VideoCommon { | 12 | namespace VideoCommon { |
| 13 | 13 | ||
| 14 | using Core::Memory::YUZU_PAGESIZE; | ||
| 15 | |||
| 14 | template <class P> | 16 | template <class P> |
| 15 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 16 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| @@ -87,9 +89,11 @@ void BufferCache<P>::TickFrame() { | |||
| 87 | template <class P> | 89 | template <class P> |
| 88 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 90 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { |
| 89 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); | 91 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); |
| 90 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; | 92 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { |
| 91 | ClearDownload(subtract_interval); | 93 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; |
| 92 | common_ranges.subtract(subtract_interval); | 94 | ClearDownload(subtract_interval); |
| 95 | common_ranges.subtract(subtract_interval); | ||
| 96 | } | ||
| 93 | } | 97 | } |
| 94 | 98 | ||
| 95 | template <class P> | 99 | template <class P> |
| @@ -102,17 +106,33 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 102 | 106 | ||
| 103 | template <class P> | 107 | template <class P> |
| 104 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 108 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 109 | WaitOnAsyncFlushes(cpu_addr, size); | ||
| 105 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 110 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { |
| 106 | DownloadBufferMemory(buffer, cpu_addr, size); | 111 | DownloadBufferMemory(buffer, cpu_addr, size); |
| 107 | }); | 112 | }); |
| 108 | } | 113 | } |
| 109 | 114 | ||
| 110 | template <class P> | 115 | template <class P> |
| 116 | void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) { | ||
| 117 | bool must_wait = false; | ||
| 118 | ForEachInOverlapCounter(async_downloads, cpu_addr, size, | ||
| 119 | [&](VAddr, VAddr, int) { must_wait = true; }); | ||
| 120 | bool must_release = false; | ||
| 121 | ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; }); | ||
| 122 | if (must_release) { | ||
| 123 | std::function<void()> tmp([]() {}); | ||
| 124 | rasterizer.SignalFence(std::move(tmp)); | ||
| 125 | } | ||
| 126 | if (must_wait || must_release) { | ||
| 127 | rasterizer.ReleaseFences(); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | template <class P> | ||
| 111 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | 132 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { |
| 133 | async_downloads -= std::make_pair(subtract_interval, std::numeric_limits<int>::max()); | ||
| 112 | uncommitted_ranges.subtract(subtract_interval); | 134 | uncommitted_ranges.subtract(subtract_interval); |
| 113 | for (auto& interval_set : async_downloads) { | 135 | pending_ranges.subtract(subtract_interval); |
| 114 | interval_set.subtract(subtract_interval); | ||
| 115 | } | ||
| 116 | for (auto& interval_set : committed_ranges) { | 136 | for (auto& interval_set : committed_ranges) { |
| 117 | interval_set.subtract(subtract_interval); | 137 | interval_set.subtract(subtract_interval); |
| 118 | } | 138 | } |
| @@ -132,6 +152,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 132 | } | 152 | } |
| 133 | 153 | ||
| 134 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 154 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 155 | WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount)); | ||
| 135 | ClearDownload(subtract_interval); | 156 | ClearDownload(subtract_interval); |
| 136 | 157 | ||
| 137 | BufferId buffer_a; | 158 | BufferId buffer_a; |
| @@ -162,6 +183,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 162 | tmp_intervals.push_back(add_interval); | 183 | tmp_intervals.push_back(add_interval); |
| 163 | if (is_high_accuracy) { | 184 | if (is_high_accuracy) { |
| 164 | uncommitted_ranges.add(add_interval); | 185 | uncommitted_ranges.add(add_interval); |
| 186 | pending_ranges.add(add_interval); | ||
| 165 | } | 187 | } |
| 166 | }; | 188 | }; |
| 167 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); | 189 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); |
| @@ -413,18 +435,15 @@ template <class P> | |||
| 413 | void BufferCache<P>::FlushCachedWrites() { | 435 | void BufferCache<P>::FlushCachedWrites() { |
| 414 | cached_write_buffer_ids.clear(); | 436 | cached_write_buffer_ids.clear(); |
| 415 | memory_tracker.FlushCachedWrites(); | 437 | memory_tracker.FlushCachedWrites(); |
| 416 | /*for (auto& interval : cached_ranges) { | 438 | for (auto& interval : cached_ranges) { |
| 417 | VAddr cpu_addr = interval.lower(); | 439 | ClearDownload(interval); |
| 418 | const std::size_t size = interval.upper() - interval.lower(); | 440 | } |
| 419 | memory_tracker.FlushCachedWrites(cpu_addr, size); | ||
| 420 | // common_ranges.subtract(interval); | ||
| 421 | }*/ | ||
| 422 | cached_ranges.clear(); | 441 | cached_ranges.clear(); |
| 423 | } | 442 | } |
| 424 | 443 | ||
| 425 | template <class P> | 444 | template <class P> |
| 426 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | 445 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { |
| 427 | return !uncommitted_ranges.empty() || !committed_ranges.empty() || !pending_queries.empty(); | 446 | return !uncommitted_ranges.empty() || !committed_ranges.empty(); |
| 428 | } | 447 | } |
| 429 | 448 | ||
| 430 | template <class P> | 449 | template <class P> |
| @@ -437,8 +456,11 @@ void BufferCache<P>::AccumulateFlushes() { | |||
| 437 | 456 | ||
| 438 | template <class P> | 457 | template <class P> |
| 439 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 458 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 440 | return (!async_buffers.empty() && async_buffers.front().has_value()) || | 459 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 441 | (!query_async_buffers.empty() && query_async_buffers.front().has_value()); | 460 | return (!async_buffers.empty() && async_buffers.front().has_value()); |
| 461 | } else { | ||
| 462 | return false; | ||
| 463 | } | ||
| 442 | } | 464 | } |
| 443 | 465 | ||
| 444 | template <class P> | 466 | template <class P> |
| @@ -446,11 +468,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 446 | AccumulateFlushes(); | 468 | AccumulateFlushes(); |
| 447 | 469 | ||
| 448 | if (committed_ranges.empty()) { | 470 | if (committed_ranges.empty()) { |
| 449 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 471 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 472 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 473 | } | ||
| 450 | return; | 474 | return; |
| 451 | } | 475 | } |
| 452 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 476 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 453 | 477 | ||
| 478 | pending_ranges.clear(); | ||
| 454 | auto it = committed_ranges.begin(); | 479 | auto it = committed_ranges.begin(); |
| 455 | while (it != committed_ranges.end()) { | 480 | while (it != committed_ranges.end()) { |
| 456 | auto& current_intervals = *it; | 481 | auto& current_intervals = *it; |
| @@ -491,7 +516,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 491 | buffer_id, | 516 | buffer_id, |
| 492 | }); | 517 | }); |
| 493 | // Align up to avoid cache conflicts | 518 | // Align up to avoid cache conflicts |
| 494 | constexpr u64 align = 8ULL; | 519 | constexpr u64 align = 64ULL; |
| 495 | constexpr u64 mask = ~(align - 1ULL); | 520 | constexpr u64 mask = ~(align - 1ULL); |
| 496 | total_size_bytes += (new_size + align - 1) & mask; | 521 | total_size_bytes += (new_size + align - 1) & mask; |
| 497 | largest_copy = std::max(largest_copy, new_size); | 522 | largest_copy = std::max(largest_copy, new_size); |
| @@ -504,7 +529,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 504 | } | 529 | } |
| 505 | committed_ranges.clear(); | 530 | committed_ranges.clear(); |
| 506 | if (downloads.empty()) { | 531 | if (downloads.empty()) { |
| 507 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 532 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 533 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 534 | } | ||
| 508 | return; | 535 | return; |
| 509 | } | 536 | } |
| 510 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 537 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| @@ -520,99 +547,54 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 520 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 547 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 521 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 548 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 522 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 549 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 523 | new_async_range.add(base_interval); | 550 | async_downloads += std::make_pair(base_interval, 1); |
| 524 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 551 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 525 | normalized_copies.push_back(second_copy); | 552 | normalized_copies.push_back(second_copy); |
| 526 | } | 553 | } |
| 527 | async_downloads.emplace_back(std::move(new_async_range)); | 554 | runtime.PostCopyBarrier(); |
| 528 | pending_downloads.emplace_back(std::move(normalized_copies)); | 555 | pending_downloads.emplace_back(std::move(normalized_copies)); |
| 529 | async_buffers.emplace_back(download_staging); | 556 | async_buffers.emplace_back(download_staging); |
| 530 | } else { | 557 | } else { |
| 531 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 558 | if constexpr (USE_MEMORY_MAPS) { |
| 532 | for (const auto& [copy, buffer_id] : downloads) { | 559 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 533 | Buffer& buffer = slot_buffers[buffer_id]; | 560 | runtime.PreCopyBarrier(); |
| 534 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 561 | for (auto& [copy, buffer_id] : downloads) { |
| 535 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 562 | // Have in mind the staging buffer offset for the copy |
| 536 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 563 | copy.dst_offset += download_staging.offset; |
| 537 | } | 564 | const std::array copies{copy}; |
| 538 | } | 565 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); |
| 539 | } | 566 | } |
| 540 | 567 | runtime.PostCopyBarrier(); | |
| 541 | template <class P> | 568 | runtime.Finish(); |
| 542 | void BufferCache<P>::CommitAsyncQueries() { | 569 | for (const auto& [copy, buffer_id] : downloads) { |
| 543 | if (pending_queries.empty()) { | 570 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 544 | query_async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 571 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 545 | return; | 572 | // Undo the modified offset |
| 546 | } | 573 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 547 | 574 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | |
| 548 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 575 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); |
| 549 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 8> downloads; | 576 | } |
| 550 | u64 total_size_bytes = 0; | 577 | } else { |
| 551 | u64 largest_copy = 0; | 578 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 552 | do { | 579 | for (const auto& [copy, buffer_id] : downloads) { |
| 553 | has_deleted_buffers = false; | 580 | Buffer& buffer = slot_buffers[buffer_id]; |
| 554 | downloads.clear(); | 581 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 555 | total_size_bytes = 0; | 582 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 556 | largest_copy = 0; | 583 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); |
| 557 | for (const auto& query_info : pending_queries) { | ||
| 558 | const std::size_t size = query_info.second; | ||
| 559 | const VAddr cpu_addr = query_info.first; | ||
| 560 | const BufferId buffer_id = FindBuffer(cpu_addr, static_cast<u32>(size)); | ||
| 561 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 562 | if (has_deleted_buffers) { | ||
| 563 | break; | ||
| 564 | } | 584 | } |
| 565 | downloads.push_back({ | ||
| 566 | BufferCopy{ | ||
| 567 | .src_offset = buffer.Offset(cpu_addr), | ||
| 568 | .dst_offset = total_size_bytes, | ||
| 569 | .size = size, | ||
| 570 | }, | ||
| 571 | buffer_id, | ||
| 572 | }); | ||
| 573 | constexpr u64 align = 8ULL; | ||
| 574 | constexpr u64 mask = ~(align - 1ULL); | ||
| 575 | total_size_bytes += (size + align - 1) & mask; | ||
| 576 | largest_copy = std::max(largest_copy, size); | ||
| 577 | } | ||
| 578 | } while (has_deleted_buffers); | ||
| 579 | pending_queries.clear(); | ||
| 580 | if (downloads.empty()) { | ||
| 581 | query_async_buffers.push_back(std::optional<Async_Buffer>{}); | ||
| 582 | return; | ||
| 583 | } | ||
| 584 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 585 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||
| 586 | boost::container::small_vector<BufferCopy, 8> normalized_copies; | ||
| 587 | runtime.PreCopyBarrier(); | ||
| 588 | for (auto& [copy, buffer_id] : downloads) { | ||
| 589 | // Have in mind the staging buffer offset for the copy | ||
| 590 | copy.dst_offset += download_staging.offset; | ||
| 591 | const std::array copies{copy}; | ||
| 592 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 593 | BufferCopy second_copy{copy}; | ||
| 594 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + second_copy.src_offset; | ||
| 595 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 596 | normalized_copies.push_back(second_copy); | ||
| 597 | } | 585 | } |
| 598 | committed_queries.emplace_back(std::move(normalized_copies)); | ||
| 599 | query_async_buffers.emplace_back(download_staging); | ||
| 600 | } else { | ||
| 601 | query_async_buffers.push_back(std::optional<Async_Buffer>{}); | ||
| 602 | } | 586 | } |
| 603 | } | 587 | } |
| 604 | 588 | ||
| 605 | template <class P> | 589 | template <class P> |
| 606 | void BufferCache<P>::CommitAsyncFlushes() { | 590 | void BufferCache<P>::CommitAsyncFlushes() { |
| 607 | CommitAsyncFlushesHigh(); | 591 | CommitAsyncFlushesHigh(); |
| 608 | CommitAsyncQueries(); | ||
| 609 | } | 592 | } |
| 610 | 593 | ||
| 611 | template <class P> | 594 | template <class P> |
| 612 | void BufferCache<P>::PopAsyncFlushes() { | 595 | void BufferCache<P>::PopAsyncFlushes() { |
| 613 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 596 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 614 | PopAsyncBuffers(); | 597 | PopAsyncBuffers(); |
| 615 | PopAsyncQueries(); | ||
| 616 | } | 598 | } |
| 617 | 599 | ||
| 618 | template <class P> | 600 | template <class P> |
| @@ -627,59 +609,34 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 627 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 609 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 628 | auto& downloads = pending_downloads.front(); | 610 | auto& downloads = pending_downloads.front(); |
| 629 | auto& async_buffer = async_buffers.front(); | 611 | auto& async_buffer = async_buffers.front(); |
| 630 | auto& async_range = async_downloads.front(); | ||
| 631 | u8* base = async_buffer->mapped_span.data(); | 612 | u8* base = async_buffer->mapped_span.data(); |
| 632 | const size_t base_offset = async_buffer->offset; | 613 | const size_t base_offset = async_buffer->offset; |
| 633 | for (const auto& copy : downloads) { | 614 | for (const auto& copy : downloads) { |
| 634 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | 615 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); |
| 635 | const u64 dst_offset = copy.dst_offset - base_offset; | 616 | const u64 dst_offset = copy.dst_offset - base_offset; |
| 636 | const u8* read_mapped_memory = base + dst_offset; | 617 | const u8* read_mapped_memory = base + dst_offset; |
| 637 | ForEachInRangeSet(async_range, cpu_addr, copy.size, [&](VAddr start, VAddr end) { | 618 | ForEachInOverlapCounter( |
| 638 | const size_t diff = start - cpu_addr; | 619 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { |
| 639 | const size_t new_size = end - start; | 620 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], |
| 640 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[diff], new_size); | 621 | end - start); |
| 641 | const IntervalType base_interval{start, end}; | 622 | if (count == 1) { |
| 642 | common_ranges.subtract(base_interval); | 623 | const IntervalType base_interval{start, end}; |
| 643 | }); | 624 | common_ranges.subtract(base_interval); |
| 625 | } | ||
| 626 | }); | ||
| 627 | async_downloads -= std::make_pair(IntervalType(cpu_addr, cpu_addr + copy.size), 1); | ||
| 644 | } | 628 | } |
| 645 | runtime.FreeDeferredStagingBuffer(*async_buffer); | 629 | runtime.FreeDeferredStagingBuffer(*async_buffer); |
| 646 | async_buffers.pop_front(); | 630 | async_buffers.pop_front(); |
| 647 | pending_downloads.pop_front(); | 631 | pending_downloads.pop_front(); |
| 648 | async_downloads.pop_front(); | ||
| 649 | } | ||
| 650 | } | ||
| 651 | |||
| 652 | template <class P> | ||
| 653 | void BufferCache<P>::PopAsyncQueries() { | ||
| 654 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 655 | if (query_async_buffers.empty()) { | ||
| 656 | return; | ||
| 657 | } | ||
| 658 | if (!query_async_buffers.front().has_value()) { | ||
| 659 | query_async_buffers.pop_front(); | ||
| 660 | return; | ||
| 661 | } | ||
| 662 | auto& downloads = committed_queries.front(); | ||
| 663 | auto& async_buffer = query_async_buffers.front(); | ||
| 664 | flushed_queries.clear(); | ||
| 665 | u8* base = async_buffer->mapped_span.data(); | ||
| 666 | const size_t base_offset = async_buffer->offset; | ||
| 667 | for (const auto& copy : downloads) { | ||
| 668 | const size_t dst_offset = copy.dst_offset - base_offset; | ||
| 669 | const u8* read_mapped_memory = base + dst_offset; | ||
| 670 | u64 new_value{}; | ||
| 671 | std::memcpy(&new_value, read_mapped_memory, copy.size); | ||
| 672 | flushed_queries.push_back(new_value); | ||
| 673 | } | ||
| 674 | runtime.FreeDeferredStagingBuffer(*async_buffer); | ||
| 675 | committed_queries.pop_front(); | ||
| 676 | query_async_buffers.pop_front(); | ||
| 677 | } | 632 | } |
| 678 | } | 633 | } |
| 679 | 634 | ||
| 680 | template <class P> | 635 | template <class P> |
| 681 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 636 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 682 | return memory_tracker.IsRegionGpuModified(addr, size); | 637 | bool is_dirty = false; |
| 638 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | ||
| 639 | return is_dirty; | ||
| 683 | } | 640 | } |
| 684 | 641 | ||
| 685 | template <class P> | 642 | template <class P> |
| @@ -1232,16 +1189,18 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { | |||
| 1232 | } | 1189 | } |
| 1233 | 1190 | ||
| 1234 | template <class P> | 1191 | template <class P> |
| 1235 | void BufferCache<P>::MarkWrittenBuffer(BufferId, VAddr cpu_addr, u32 size) { | 1192 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { |
| 1236 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); | 1193 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); |
| 1237 | 1194 | ||
| 1195 | if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { | ||
| 1196 | SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); | ||
| 1197 | } | ||
| 1198 | |||
| 1238 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1199 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
| 1239 | common_ranges.add(base_interval); | 1200 | common_ranges.add(base_interval); |
| 1240 | for (auto& interval_set : async_downloads) { | ||
| 1241 | interval_set.subtract(base_interval); | ||
| 1242 | } | ||
| 1243 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | 1201 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { |
| 1244 | uncommitted_ranges.add(base_interval); | 1202 | uncommitted_ranges.add(base_interval); |
| 1203 | pending_ranges.add(base_interval); | ||
| 1245 | } | 1204 | } |
| 1246 | } | 1205 | } |
| 1247 | 1206 | ||
| @@ -1530,7 +1489,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1530 | if (!is_dirty) { | 1489 | if (!is_dirty) { |
| 1531 | return false; | 1490 | return false; |
| 1532 | } | 1491 | } |
| 1533 | if (!IsRegionGpuModified(dest_address, copy_size)) { | 1492 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1493 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | ||
| 1494 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||
| 1534 | return false; | 1495 | return false; |
| 1535 | } | 1496 | } |
| 1536 | 1497 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 4b3677da3..6f29cba25 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <boost/pool/detail/mutex.hpp> | 17 | #include <boost/pool/detail/mutex.hpp> |
| 18 | #undef BOOST_NO_MT | 18 | #undef BOOST_NO_MT |
| 19 | #include <boost/icl/interval_set.hpp> | 19 | #include <boost/icl/interval_set.hpp> |
| 20 | #include <boost/icl/split_interval_map.hpp> | ||
| 20 | #include <boost/pool/pool.hpp> | 21 | #include <boost/pool/pool.hpp> |
| 21 | #include <boost/pool/pool_alloc.hpp> | 22 | #include <boost/pool/pool_alloc.hpp> |
| 22 | 23 | ||
| @@ -44,8 +45,7 @@ | |||
| 44 | 45 | ||
| 45 | namespace boost { | 46 | namespace boost { |
| 46 | template <typename T> | 47 | template <typename T> |
| 47 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::default_mutex, 4096, | 48 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; |
| 48 | 0>; | ||
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | namespace VideoCommon { | 51 | namespace VideoCommon { |
| @@ -123,6 +123,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI | |||
| 123 | boost::icl::interval_set<VAddr, IntervalCompare, IntervalInstance, IntervalAllocator>; | 123 | boost::icl::interval_set<VAddr, IntervalCompare, IntervalInstance, IntervalAllocator>; |
| 124 | using IntervalType = typename IntervalSet::interval_type; | 124 | using IntervalType = typename IntervalSet::interval_type; |
| 125 | 125 | ||
| 126 | template <typename Type> | ||
| 127 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 128 | // types | ||
| 129 | typedef counter_add_functor<Type> type; | ||
| 130 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 131 | |||
| 132 | // public member functions | ||
| 133 | void operator()(Type& current, const Type& added) const { | ||
| 134 | current += added; | ||
| 135 | if (current < base_type::identity_element()) { | ||
| 136 | current = base_type::identity_element(); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | |||
| 140 | // public static functions | ||
| 141 | static void version(Type&){}; | ||
| 142 | }; | ||
| 143 | |||
| 144 | using OverlapCombine = ICL_COMBINE_INSTANCE(counter_add_functor, int); | ||
| 145 | using OverlapSection = ICL_SECTION_INSTANCE(boost::icl::inter_section, int); | ||
| 146 | using OverlapCounter = | ||
| 147 | boost::icl::split_interval_map<VAddr, int, boost::icl::partial_absorber, IntervalCompare, | ||
| 148 | OverlapCombine, OverlapSection, IntervalInstance, | ||
| 149 | IntervalAllocator>; | ||
| 150 | |||
| 126 | struct Empty {}; | 151 | struct Empty {}; |
| 127 | 152 | ||
| 128 | struct OverlapResult { | 153 | struct OverlapResult { |
| @@ -219,12 +244,9 @@ public: | |||
| 219 | /// Commit asynchronous downloads | 244 | /// Commit asynchronous downloads |
| 220 | void CommitAsyncFlushes(); | 245 | void CommitAsyncFlushes(); |
| 221 | void CommitAsyncFlushesHigh(); | 246 | void CommitAsyncFlushesHigh(); |
| 222 | void CommitAsyncQueries(); | ||
| 223 | 247 | ||
| 224 | /// Pop asynchronous downloads | 248 | /// Pop asynchronous downloads |
| 225 | void PopAsyncFlushes(); | 249 | void PopAsyncFlushes(); |
| 226 | |||
| 227 | void PopAsyncQueries(); | ||
| 228 | void PopAsyncBuffers(); | 250 | void PopAsyncBuffers(); |
| 229 | 251 | ||
| 230 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | 252 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); |
| @@ -302,6 +324,34 @@ private: | |||
| 302 | } | 324 | } |
| 303 | } | 325 | } |
| 304 | 326 | ||
| 327 | template <typename Func> | ||
| 328 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||
| 329 | Func&& func) { | ||
| 330 | const VAddr start_address = cpu_addr; | ||
| 331 | const VAddr end_address = start_address + size; | ||
| 332 | const IntervalType search_interval{start_address, end_address}; | ||
| 333 | auto it = current_range.lower_bound(search_interval); | ||
| 334 | if (it == current_range.end()) { | ||
| 335 | return; | ||
| 336 | } | ||
| 337 | auto end_it = current_range.upper_bound(search_interval); | ||
| 338 | for (; it != end_it; it++) { | ||
| 339 | auto& inter = it->first; | ||
| 340 | VAddr inter_addr_end = inter.upper(); | ||
| 341 | VAddr inter_addr = inter.lower(); | ||
| 342 | if (inter_addr_end > end_address) { | ||
| 343 | inter_addr_end = end_address; | ||
| 344 | } | ||
| 345 | if (inter_addr < start_address) { | ||
| 346 | inter_addr = start_address; | ||
| 347 | } | ||
| 348 | if (it->second <= 0) { | ||
| 349 | __debugbreak(); | ||
| 350 | } | ||
| 351 | func(inter_addr, inter_addr_end, it->second); | ||
| 352 | } | ||
| 353 | } | ||
| 354 | |||
| 305 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | 355 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |
| 306 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | 356 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == |
| 307 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | 357 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); |
| @@ -309,6 +359,8 @@ private: | |||
| 309 | 359 | ||
| 310 | void RunGarbageCollector(); | 360 | void RunGarbageCollector(); |
| 311 | 361 | ||
| 362 | void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size); | ||
| 363 | |||
| 312 | void BindHostIndexBuffer(); | 364 | void BindHostIndexBuffer(); |
| 313 | 365 | ||
| 314 | void BindHostVertexBuffers(); | 366 | void BindHostVertexBuffers(); |
| @@ -474,10 +526,11 @@ private: | |||
| 474 | IntervalSet uncommitted_ranges; | 526 | IntervalSet uncommitted_ranges; |
| 475 | IntervalSet common_ranges; | 527 | IntervalSet common_ranges; |
| 476 | IntervalSet cached_ranges; | 528 | IntervalSet cached_ranges; |
| 529 | IntervalSet pending_ranges; | ||
| 477 | std::deque<IntervalSet> committed_ranges; | 530 | std::deque<IntervalSet> committed_ranges; |
| 478 | 531 | ||
| 479 | // Async Buffers | 532 | // Async Buffers |
| 480 | std::deque<IntervalSet> async_downloads; | 533 | OverlapCounter async_downloads; |
| 481 | std::deque<std::optional<Async_Buffer>> async_buffers; | 534 | std::deque<std::optional<Async_Buffer>> async_buffers; |
| 482 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | 535 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; |
| 483 | std::optional<Async_Buffer> current_buffer; | 536 | std::optional<Async_Buffer> current_buffer; |