summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h95
-rw-r--r--src/video_core/fence_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
4 files changed, 71 insertions, 38 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2bd86f215..33fa5305e 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -152,6 +152,8 @@ public:
152 /// Return true when there are uncommitted buffers to be downloaded 152 /// Return true when there are uncommitted buffers to be downloaded
153 [[nodiscard]] bool HasUncommittedFlushes() const noexcept; 153 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
154 154
155 void AccumulateFlushes();
156
155 /// Return true when the caller should wait for async downloads 157 /// Return true when the caller should wait for async downloads
156 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; 158 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
157 159
@@ -334,6 +336,7 @@ private:
334 std::vector<BufferId> cached_write_buffer_ids; 336 std::vector<BufferId> cached_write_buffer_ids;
335 337
336 IntervalSet uncommitted_ranges; 338 IntervalSet uncommitted_ranges;
339 std::deque<IntervalSet> committed_ranges;
337 340
338 size_t immediate_buffer_capacity = 0; 341 size_t immediate_buffer_capacity = 0;
339 std::unique_ptr<u8[]> immediate_buffer_alloc; 342 std::unique_ptr<u8[]> immediate_buffer_alloc;
@@ -551,7 +554,19 @@ void BufferCache<P>::FlushCachedWrites() {
551 554
552template <class P> 555template <class P>
553bool BufferCache<P>::HasUncommittedFlushes() const noexcept { 556bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
554 return !uncommitted_ranges.empty(); 557 return !uncommitted_ranges.empty() || !committed_ranges.empty();
558}
559
560template <class P>
561void BufferCache<P>::AccumulateFlushes() {
562 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
563 uncommitted_ranges.clear();
564 return;
565 }
566 if (uncommitted_ranges.empty()) {
567 return;
568 }
569 committed_ranges.emplace_back(std::move(uncommitted_ranges));
555} 570}
556 571
557template <class P> 572template <class P>
@@ -561,8 +576,8 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
561 576
562template <class P> 577template <class P>
563void BufferCache<P>::CommitAsyncFlushesHigh() { 578void BufferCache<P>::CommitAsyncFlushesHigh() {
564 const IntervalSet& intervals = uncommitted_ranges; 579 AccumulateFlushes();
565 if (intervals.empty()) { 580 if (committed_ranges.empty()) {
566 return; 581 return;
567 } 582 }
568 MICROPROFILE_SCOPE(GPU_DownloadMemory); 583 MICROPROFILE_SCOPE(GPU_DownloadMemory);
@@ -570,43 +585,46 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
570 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; 585 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
571 u64 total_size_bytes = 0; 586 u64 total_size_bytes = 0;
572 u64 largest_copy = 0; 587 u64 largest_copy = 0;
573 for (auto& interval : intervals) { 588 for (const IntervalSet& intervals : committed_ranges) {
574 const std::size_t size = interval.upper() - interval.lower(); 589 for (auto& interval : intervals) {
575 const VAddr cpu_addr = interval.lower(); 590 const std::size_t size = interval.upper() - interval.lower();
576 const VAddr cpu_addr_end = interval.upper(); 591 const VAddr cpu_addr = interval.lower();
577 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 592 const VAddr cpu_addr_end = interval.upper();
578 boost::container::small_vector<BufferCopy, 1> copies; 593 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
579 buffer.ForEachDownloadRange( 594 boost::container::small_vector<BufferCopy, 1> copies;
580 cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { 595 buffer.ForEachDownloadRange(
581 VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; 596 cpu_addr, size, false, [&](u64 range_offset, u64 range_size) {
582 VAddr cpu_addr_end2 = cpu_addr_base + range_size; 597 VAddr cpu_addr_base = buffer.CpuAddr() + range_offset;
583 const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); 598 VAddr cpu_addr_end2 = cpu_addr_base + range_size;
584 cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); 599 const s64 difference = s64(cpu_addr_end2 - cpu_addr_end);
585 const s64 difference2 = s64(cpu_addr - cpu_addr_base); 600 cpu_addr_end2 -= u64(std::max<s64>(difference, 0));
586 cpu_addr_base += u64(std::max<s64>(difference2, 0)); 601 const s64 difference2 = s64(cpu_addr - cpu_addr_base);
587 const u64 new_size = cpu_addr_end2 - cpu_addr_base; 602 cpu_addr_base += u64(std::max<s64>(difference2, 0));
588 const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); 603 const u64 new_size = cpu_addr_end2 - cpu_addr_base;
589 downloads.push_back({ 604 const u64 new_offset = cpu_addr_base - buffer.CpuAddr();
590 BufferCopy{ 605 downloads.push_back({
591 .src_offset = new_offset, 606 BufferCopy{
592 .dst_offset = total_size_bytes, 607 .src_offset = new_offset,
593 .size = new_size, 608 .dst_offset = total_size_bytes,
594 }, 609 .size = new_size,
595 buffer_id, 610 },
611 buffer_id,
612 });
613 total_size_bytes += new_size;
614 largest_copy = std::max(largest_copy, new_size);
615 constexpr u64 align_mask = ~(32ULL - 1);
616 const VAddr align_up_address = (cpu_addr_base + 31) & align_mask;
617 const u64 difference_base = align_up_address - cpu_addr_base;
618 if (difference_base > new_size) {
619 return;
620 }
621 const u64 fixed_size = new_size - difference_base;
622 buffer.UnmarkRegionAsGpuModified(align_up_address, fixed_size & align_mask);
596 }); 623 });
597 total_size_bytes += new_size; 624 });
598 largest_copy = std::max(largest_copy, new_size); 625 }
599 constexpr u64 align_mask = ~(32ULL - 1);
600 const VAddr align_up_address = (cpu_addr_base + 31) & align_mask;
601 const u64 difference_base = align_up_address - cpu_addr_base;
602 if (difference_base > new_size) {
603 return;
604 }
605 const u64 fixed_size = new_size - difference_base;
606 buffer.UnmarkRegionAsGpuModified(align_up_address, fixed_size & align_mask);
607 });
608 });
609 } 626 }
627 committed_ranges.clear();
610 if (downloads.empty()) { 628 if (downloads.empty()) {
611 return; 629 return;
612 } 630 }
@@ -644,6 +662,7 @@ void BufferCache<P>::CommitAsyncFlushes() {
644 CommitAsyncFlushesHigh(); 662 CommitAsyncFlushesHigh();
645 } else { 663 } else {
646 uncommitted_ranges.clear(); 664 uncommitted_ranges.clear();
665 committed_ranges.clear();
647 } 666 }
648} 667}
649 668
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index cd17667cb..b702b2015 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -54,6 +54,12 @@ public:
54 delayed_destruction_ring.Tick(); 54 delayed_destruction_ring.Tick();
55 } 55 }
56 56
57 // Unlike other fences, this one doesn't
58 void SignalOrdering() {
59 std::scoped_lock lock{buffer_cache.mutex};
60 buffer_cache.AccumulateFlushes();
61 }
62
57 void SignalReference() { 63 void SignalReference() {
58 // Only sync references on High 64 // Only sync references on High
59 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { 65 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 64869abf9..514c2f47d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -657,6 +657,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
657 657
658void RasterizerOpenGL::WaitForIdle() { 658void RasterizerOpenGL::WaitForIdle() {
659 glMemoryBarrier(GL_ALL_BARRIER_BITS); 659 glMemoryBarrier(GL_ALL_BARRIER_BITS);
660 if (!gpu.IsAsync()) {
661 return;
662 }
663 fence_manager.SignalOrdering();
660} 664}
661 665
662void RasterizerOpenGL::FragmentBarrier() { 666void RasterizerOpenGL::FragmentBarrier() {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 687c13009..392de47d0 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -619,6 +619,10 @@ void RasterizerVulkan::WaitForIdle() {
619 cmdbuf.SetEvent(event, flags); 619 cmdbuf.SetEvent(event, flags);
620 cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); 620 cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
621 }); 621 });
622 if (!gpu.IsAsync()) {
623 return;
624 }
625 fence_manager.SignalOrdering();
622} 626}
623 627
624void RasterizerVulkan::FragmentBarrier() { 628void RasterizerVulkan::FragmentBarrier() {