diff options
| author | 2021-07-04 18:28:20 +0200 | |
|---|---|---|
| committer | 2021-07-09 22:20:36 +0200 | |
| commit | 35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da (patch) | |
| tree | 05c463ac9a74c7a63a33ab7315d2a9133b61d6e5 /src/video_core/buffer_cache | |
| parent | Buffer Cache: Fix High Downloads and don't predownload on Extreme. (diff) | |
| download | yuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.tar.gz yuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.tar.xz yuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.zip | |
Videocore: Address Feedback & CLANG Format.
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 151 |
2 files changed, 75 insertions, 78 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a56b4c3a8..9e39858c8 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -468,7 +468,7 @@ private: | |||
| 468 | 468 | ||
| 469 | const u64 current_word = state_words[word_index] & bits; | 469 | const u64 current_word = state_words[word_index] & bits; |
| 470 | if (clear) { | 470 | if (clear) { |
| 471 | state_words[word_index] &= ~bits; | 471 | state_words[word_index] &= ~bits; |
| 472 | } | 472 | } |
| 473 | 473 | ||
| 474 | if constexpr (type == Type::CPU) { | 474 | if constexpr (type == Type::CPU) { |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d28930e80..dc2b1f447 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/icl/interval_set.hpp> | ||
| 18 | #include <boost/container/small_vector.hpp> | 17 | #include <boost/container/small_vector.hpp> |
| 18 | #include <boost/icl/interval_set.hpp> | ||
| 19 | 19 | ||
| 20 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 21 | #include "common/div_ceil.h" | 21 | #include "common/div_ceil.h" |
| @@ -333,10 +333,7 @@ private: | |||
| 333 | 333 | ||
| 334 | std::vector<BufferId> cached_write_buffer_ids; | 334 | std::vector<BufferId> cached_write_buffer_ids; |
| 335 | 335 | ||
| 336 | // TODO: This data structure is not optimal and it should be reworked | 336 | IntervalSet uncommitted_ranges; |
| 337 | IntervalSet uncommitted_ranges; | ||
| 338 | std::deque<IntervalSet> committed_ranges; | ||
| 339 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | ||
| 340 | 337 | ||
| 341 | size_t immediate_buffer_capacity = 0; | 338 | size_t immediate_buffer_capacity = 0; |
| 342 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 339 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| @@ -564,74 +561,75 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | |||
| 564 | 561 | ||
| 565 | template <class P> | 562 | template <class P> |
| 566 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 563 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 567 | const IntervalSet& intervals = uncommitted_ranges; | 564 | const IntervalSet& intervals = uncommitted_ranges; |
| 568 | if (intervals.empty()) { | 565 | if (intervals.empty()) { |
| 569 | return; | 566 | return; |
| 570 | } | 567 | } |
| 571 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 568 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 572 | 569 | ||
| 573 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 570 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
| 574 | u64 total_size_bytes = 0; | 571 | u64 total_size_bytes = 0; |
| 575 | u64 largest_copy = 0; | 572 | u64 largest_copy = 0; |
| 576 | for (auto& interval : intervals) { | 573 | for (auto& interval : intervals) { |
| 577 | const std::size_t size = interval.upper() - interval.lower(); | 574 | const std::size_t size = interval.upper() - interval.lower(); |
| 578 | const VAddr cpu_addr = interval.lower(); | 575 | const VAddr cpu_addr = interval.lower(); |
| 579 | const VAddr cpu_addr_end = interval.upper(); | 576 | const VAddr cpu_addr_end = interval.upper(); |
| 580 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 577 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 581 | boost::container::small_vector<BufferCopy, 1> copies; | 578 | boost::container::small_vector<BufferCopy, 1> copies; |
| 582 | buffer.ForEachDownloadRange(cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { | 579 | buffer.ForEachDownloadRange( |
| 583 | VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; | 580 | cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { |
| 584 | VAddr cpu_addr_end2 = cpu_addr_base + range_size; | 581 | VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; |
| 585 | const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); | 582 | VAddr cpu_addr_end2 = cpu_addr_base + range_size; |
| 586 | cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); | 583 | const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); |
| 587 | const s64 difference2 = s64(cpu_addr - cpu_addr_base); | 584 | cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); |
| 588 | cpu_addr_base += u64(std::max<s64>(difference2, 0)); | 585 | const s64 difference2 = s64(cpu_addr - cpu_addr_base); |
| 589 | const u64 new_size = cpu_addr_end2 - cpu_addr_base; | 586 | cpu_addr_base += u64(std::max<s64>(difference2, 0)); |
| 590 | const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); | 587 | const u64 new_size = cpu_addr_end2 - cpu_addr_base; |
| 591 | ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size)); | 588 | const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); |
| 592 | downloads.push_back({ | 589 | ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size)); |
| 593 | BufferCopy{ | 590 | downloads.push_back({ |
| 594 | .src_offset = new_offset, | 591 | BufferCopy{ |
| 595 | .dst_offset = total_size_bytes, | 592 | .src_offset = new_offset, |
| 596 | .size = new_size, | 593 | .dst_offset = total_size_bytes, |
| 597 | }, | 594 | .size = new_size, |
| 598 | buffer_id, | 595 | }, |
| 599 | }); | 596 | buffer_id, |
| 600 | total_size_bytes += new_size; | 597 | }); |
| 601 | buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size); | 598 | total_size_bytes += new_size; |
| 602 | largest_copy = std::max(largest_copy, new_size); | 599 | buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size); |
| 603 | }); | 600 | largest_copy = std::max(largest_copy, new_size); |
| 604 | }); | 601 | }); |
| 605 | } | 602 | }); |
| 606 | if (downloads.empty()) { | 603 | } |
| 607 | return; | 604 | if (downloads.empty()) { |
| 608 | } | 605 | return; |
| 609 | if constexpr (USE_MEMORY_MAPS) { | 606 | } |
| 610 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 607 | if constexpr (USE_MEMORY_MAPS) { |
| 611 | for (auto& [copy, buffer_id] : downloads) { | 608 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 612 | // Have in mind the staging buffer offset for the copy | 609 | for (auto& [copy, buffer_id] : downloads) { |
| 613 | copy.dst_offset += download_staging.offset; | 610 | // Have in mind the staging buffer offset for the copy |
| 614 | const std::array copies{copy}; | 611 | copy.dst_offset += download_staging.offset; |
| 615 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); | 612 | const std::array copies{copy}; |
| 616 | } | 613 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); |
| 617 | runtime.Finish(); | 614 | } |
| 618 | for (const auto& [copy, buffer_id] : downloads) { | 615 | runtime.Finish(); |
| 619 | const Buffer& buffer = slot_buffers[buffer_id]; | 616 | for (const auto& [copy, buffer_id] : downloads) { |
| 620 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 617 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 621 | // Undo the modified offset | 618 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 622 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 619 | // Undo the modified offset |
| 623 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 620 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 624 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 621 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; |
| 625 | } | 622 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); |
| 626 | } else { | 623 | } |
| 627 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 624 | } else { |
| 628 | for (const auto& [copy, buffer_id] : downloads) { | 625 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 629 | Buffer& buffer = slot_buffers[buffer_id]; | 626 | for (const auto& [copy, buffer_id] : downloads) { |
| 630 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 627 | Buffer& buffer = slot_buffers[buffer_id]; |
| 631 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 628 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 632 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 629 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 633 | } | 630 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); |
| 634 | } | 631 | } |
| 632 | } | ||
| 635 | } | 633 | } |
| 636 | 634 | ||
| 637 | template <class P> | 635 | template <class P> |
| @@ -644,9 +642,7 @@ void BufferCache<P>::CommitAsyncFlushes() { | |||
| 644 | } | 642 | } |
| 645 | 643 | ||
| 646 | template <class P> | 644 | template <class P> |
| 647 | void BufferCache<P>::PopAsyncFlushes() { | 645 | void BufferCache<P>::PopAsyncFlushes() {} |
| 648 | |||
| 649 | } | ||
| 650 | 646 | ||
| 651 | template <class P> | 647 | template <class P> |
| 652 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 648 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| @@ -1055,7 +1051,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
| 1055 | Buffer& buffer = slot_buffers[buffer_id]; | 1051 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1056 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1052 | buffer.MarkRegionAsGpuModified(cpu_addr, size); |
| 1057 | 1053 | ||
| 1058 | const bool is_accuracy_high = Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | 1054 | const bool is_accuracy_high = |
| 1055 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 1059 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 1056 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| 1060 | if (!is_async && !is_accuracy_high) { | 1057 | if (!is_async && !is_accuracy_high) { |
| 1061 | return; | 1058 | return; |