summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-07-04 18:28:20 +0200
committerGravatar Fernando Sahmkow2021-07-09 22:20:36 +0200
commit35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da (patch)
tree05c463ac9a74c7a63a33ab7315d2a9133b61d6e5 /src/video_core/buffer_cache
parentBuffer Cache: Fix High Downloads and don't predownload on Extreme. (diff)
downloadyuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.tar.gz
yuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.tar.xz
yuzu-35327dbde348f5b9a1c7f2bbb7e03bc1f361c3da.zip
Videocore: Address Feedback & CLANG Format.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h151
2 files changed, 75 insertions, 78 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index a56b4c3a8..9e39858c8 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -468,7 +468,7 @@ private:
468 468
469 const u64 current_word = state_words[word_index] & bits; 469 const u64 current_word = state_words[word_index] & bits;
470 if (clear) { 470 if (clear) {
471 state_words[word_index] &= ~bits; 471 state_words[word_index] &= ~bits;
472 } 472 }
473 473
474 if constexpr (type == Type::CPU) { 474 if constexpr (type == Type::CPU) {
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d28930e80..dc2b1f447 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -14,8 +14,8 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/icl/interval_set.hpp>
18#include <boost/container/small_vector.hpp> 17#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_set.hpp>
19 19
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "common/div_ceil.h" 21#include "common/div_ceil.h"
@@ -333,10 +333,7 @@ private:
333 333
334 std::vector<BufferId> cached_write_buffer_ids; 334 std::vector<BufferId> cached_write_buffer_ids;
335 335
336 // TODO: This data structure is not optimal and it should be reworked 336 IntervalSet uncommitted_ranges;
337 IntervalSet uncommitted_ranges;
338 std::deque<IntervalSet> committed_ranges;
339 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
340 337
341 size_t immediate_buffer_capacity = 0; 338 size_t immediate_buffer_capacity = 0;
342 std::unique_ptr<u8[]> immediate_buffer_alloc; 339 std::unique_ptr<u8[]> immediate_buffer_alloc;
@@ -564,74 +561,75 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
564 561
565template <class P> 562template <class P>
566void BufferCache<P>::CommitAsyncFlushesHigh() { 563void BufferCache<P>::CommitAsyncFlushesHigh() {
567 const IntervalSet& intervals = uncommitted_ranges; 564 const IntervalSet& intervals = uncommitted_ranges;
568 if (intervals.empty()) { 565 if (intervals.empty()) {
569 return; 566 return;
570 } 567 }
571 MICROPROFILE_SCOPE(GPU_DownloadMemory); 568 MICROPROFILE_SCOPE(GPU_DownloadMemory);
572 569
573 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; 570 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
574 u64 total_size_bytes = 0; 571 u64 total_size_bytes = 0;
575 u64 largest_copy = 0; 572 u64 largest_copy = 0;
576 for (auto& interval : intervals) { 573 for (auto& interval : intervals) {
577 const std::size_t size = interval.upper() - interval.lower(); 574 const std::size_t size = interval.upper() - interval.lower();
578 const VAddr cpu_addr = interval.lower(); 575 const VAddr cpu_addr = interval.lower();
579 const VAddr cpu_addr_end = interval.upper(); 576 const VAddr cpu_addr_end = interval.upper();
580 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 577 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
581 boost::container::small_vector<BufferCopy, 1> copies; 578 boost::container::small_vector<BufferCopy, 1> copies;
582 buffer.ForEachDownloadRange(cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { 579 buffer.ForEachDownloadRange(
583 VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; 580 cpu_addr, size, false, [&](u64 range_offset, u64 range_size) {
584 VAddr cpu_addr_end2 = cpu_addr_base + range_size; 581 VAddr cpu_addr_base = buffer.CpuAddr() + range_offset;
585 const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); 582 VAddr cpu_addr_end2 = cpu_addr_base + range_size;
586 cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); 583 const s64 difference = s64(cpu_addr_end2 - cpu_addr_end);
587 const s64 difference2 = s64(cpu_addr - cpu_addr_base); 584 cpu_addr_end2 -= u64(std::max<s64>(difference, 0));
588 cpu_addr_base += u64(std::max<s64>(difference2, 0)); 585 const s64 difference2 = s64(cpu_addr - cpu_addr_base);
589 const u64 new_size = cpu_addr_end2 - cpu_addr_base; 586 cpu_addr_base += u64(std::max<s64>(difference2, 0));
590 const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); 587 const u64 new_size = cpu_addr_end2 - cpu_addr_base;
591 ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size)); 588 const u64 new_offset = cpu_addr_base - buffer.CpuAddr();
592 downloads.push_back({ 589 ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size));
593 BufferCopy{ 590 downloads.push_back({
594 .src_offset = new_offset, 591 BufferCopy{
595 .dst_offset = total_size_bytes, 592 .src_offset = new_offset,
596 .size = new_size, 593 .dst_offset = total_size_bytes,
597 }, 594 .size = new_size,
598 buffer_id, 595 },
599 }); 596 buffer_id,
600 total_size_bytes += new_size; 597 });
601 buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size); 598 total_size_bytes += new_size;
602 largest_copy = std::max(largest_copy, new_size); 599 buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size);
603 }); 600 largest_copy = std::max(largest_copy, new_size);
604 }); 601 });
605 } 602 });
606 if (downloads.empty()) { 603 }
607 return; 604 if (downloads.empty()) {
608 } 605 return;
609 if constexpr (USE_MEMORY_MAPS) { 606 }
610 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 607 if constexpr (USE_MEMORY_MAPS) {
611 for (auto& [copy, buffer_id] : downloads) { 608 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
612 // Have in mind the staging buffer offset for the copy 609 for (auto& [copy, buffer_id] : downloads) {
613 copy.dst_offset += download_staging.offset; 610 // Have in mind the staging buffer offset for the copy
614 const std::array copies{copy}; 611 copy.dst_offset += download_staging.offset;
615 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); 612 const std::array copies{copy};
616 } 613 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
617 runtime.Finish(); 614 }
618 for (const auto& [copy, buffer_id] : downloads) { 615 runtime.Finish();
619 const Buffer& buffer = slot_buffers[buffer_id]; 616 for (const auto& [copy, buffer_id] : downloads) {
620 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 617 const Buffer& buffer = slot_buffers[buffer_id];
621 // Undo the modified offset 618 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
622 const u64 dst_offset = copy.dst_offset - download_staging.offset; 619 // Undo the modified offset
623 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 620 const u64 dst_offset = copy.dst_offset - download_staging.offset;
624 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 621 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
625 } 622 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
626 } else { 623 }
627 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 624 } else {
628 for (const auto& [copy, buffer_id] : downloads) { 625 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
629 Buffer& buffer = slot_buffers[buffer_id]; 626 for (const auto& [copy, buffer_id] : downloads) {
630 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 627 Buffer& buffer = slot_buffers[buffer_id];
631 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 628 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
632 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 629 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
633 } 630 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
634 } 631 }
632 }
635} 633}
636 634
637template <class P> 635template <class P>
@@ -644,9 +642,7 @@ void BufferCache<P>::CommitAsyncFlushes() {
644} 642}
645 643
646template <class P> 644template <class P>
647void BufferCache<P>::PopAsyncFlushes() { 645void BufferCache<P>::PopAsyncFlushes() {}
648
649}
650 646
651template <class P> 647template <class P>
652bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 648bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
@@ -1055,7 +1051,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1055 Buffer& buffer = slot_buffers[buffer_id]; 1051 Buffer& buffer = slot_buffers[buffer_id];
1056 buffer.MarkRegionAsGpuModified(cpu_addr, size); 1052 buffer.MarkRegionAsGpuModified(cpu_addr, size);
1057 1053
1058 const bool is_accuracy_high = Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; 1054 const bool is_accuracy_high =
1055 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
1059 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); 1056 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
1060 if (!is_async && !is_accuracy_high) { 1057 if (!is_async && !is_accuracy_high) {
1061 return; 1058 return;