diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 95 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 |
4 files changed, 71 insertions, 38 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2bd86f215..33fa5305e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -152,6 +152,8 @@ public: | |||
| 152 | /// Return true when there are uncommitted buffers to be downloaded | 152 | /// Return true when there are uncommitted buffers to be downloaded |
| 153 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | 153 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 154 | 154 | ||
| 155 | void AccumulateFlushes(); | ||
| 156 | |||
| 155 | /// Return true when the caller should wait for async downloads | 157 | /// Return true when the caller should wait for async downloads |
| 156 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | 158 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 157 | 159 | ||
| @@ -334,6 +336,7 @@ private: | |||
| 334 | std::vector<BufferId> cached_write_buffer_ids; | 336 | std::vector<BufferId> cached_write_buffer_ids; |
| 335 | 337 | ||
| 336 | IntervalSet uncommitted_ranges; | 338 | IntervalSet uncommitted_ranges; |
| 339 | std::deque<IntervalSet> committed_ranges; | ||
| 337 | 340 | ||
| 338 | size_t immediate_buffer_capacity = 0; | 341 | size_t immediate_buffer_capacity = 0; |
| 339 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 342 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| @@ -551,7 +554,19 @@ void BufferCache<P>::FlushCachedWrites() { | |||
| 551 | 554 | ||
| 552 | template <class P> | 555 | template <class P> |
| 553 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | 556 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { |
| 554 | return !uncommitted_ranges.empty(); | 557 | return !uncommitted_ranges.empty() || !committed_ranges.empty(); |
| 558 | } | ||
| 559 | |||
| 560 | template <class P> | ||
| 561 | void BufferCache<P>::AccumulateFlushes() { | ||
| 562 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 563 | uncommitted_ranges.clear(); | ||
| 564 | return; | ||
| 565 | } | ||
| 566 | if (uncommitted_ranges.empty()) { | ||
| 567 | return; | ||
| 568 | } | ||
| 569 | committed_ranges.emplace_back(std::move(uncommitted_ranges)); | ||
| 555 | } | 570 | } |
| 556 | 571 | ||
| 557 | template <class P> | 572 | template <class P> |
| @@ -561,8 +576,8 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | |||
| 561 | 576 | ||
| 562 | template <class P> | 577 | template <class P> |
| 563 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 578 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 564 | const IntervalSet& intervals = uncommitted_ranges; | 579 | AccumulateFlushes(); |
| 565 | if (intervals.empty()) { | 580 | if (committed_ranges.empty()) { |
| 566 | return; | 581 | return; |
| 567 | } | 582 | } |
| 568 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 583 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| @@ -570,43 +585,46 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 570 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 585 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
| 571 | u64 total_size_bytes = 0; | 586 | u64 total_size_bytes = 0; |
| 572 | u64 largest_copy = 0; | 587 | u64 largest_copy = 0; |
| 573 | for (auto& interval : intervals) { | 588 | for (const IntervalSet& intervals : committed_ranges) { |
| 574 | const std::size_t size = interval.upper() - interval.lower(); | 589 | for (auto& interval : intervals) { |
| 575 | const VAddr cpu_addr = interval.lower(); | 590 | const std::size_t size = interval.upper() - interval.lower(); |
| 576 | const VAddr cpu_addr_end = interval.upper(); | 591 | const VAddr cpu_addr = interval.lower(); |
| 577 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 592 | const VAddr cpu_addr_end = interval.upper(); |
| 578 | boost::container::small_vector<BufferCopy, 1> copies; | 593 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 579 | buffer.ForEachDownloadRange( | 594 | boost::container::small_vector<BufferCopy, 1> copies; |
| 580 | cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { | 595 | buffer.ForEachDownloadRange( |
| 581 | VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; | 596 | cpu_addr, size, false, [&](u64 range_offset, u64 range_size) { |
| 582 | VAddr cpu_addr_end2 = cpu_addr_base + range_size; | 597 | VAddr cpu_addr_base = buffer.CpuAddr() + range_offset; |
| 583 | const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); | 598 | VAddr cpu_addr_end2 = cpu_addr_base + range_size; |
| 584 | cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); | 599 | const s64 difference = s64(cpu_addr_end2 - cpu_addr_end); |
| 585 | const s64 difference2 = s64(cpu_addr - cpu_addr_base); | 600 | cpu_addr_end2 -= u64(std::max<s64>(difference, 0)); |
| 586 | cpu_addr_base += u64(std::max<s64>(difference2, 0)); | 601 | const s64 difference2 = s64(cpu_addr - cpu_addr_base); |
| 587 | const u64 new_size = cpu_addr_end2 - cpu_addr_base; | 602 | cpu_addr_base += u64(std::max<s64>(difference2, 0)); |
| 588 | const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); | 603 | const u64 new_size = cpu_addr_end2 - cpu_addr_base; |
| 589 | downloads.push_back({ | 604 | const u64 new_offset = cpu_addr_base - buffer.CpuAddr(); |
| 590 | BufferCopy{ | 605 | downloads.push_back({ |
| 591 | .src_offset = new_offset, | 606 | BufferCopy{ |
| 592 | .dst_offset = total_size_bytes, | 607 | .src_offset = new_offset, |
| 593 | .size = new_size, | 608 | .dst_offset = total_size_bytes, |
| 594 | }, | 609 | .size = new_size, |
| 595 | buffer_id, | 610 | }, |
| 611 | buffer_id, | ||
| 612 | }); | ||
| 613 | total_size_bytes += new_size; | ||
| 614 | largest_copy = std::max(largest_copy, new_size); | ||
| 615 | constexpr u64 align_mask = ~(32ULL - 1); | ||
| 616 | const VAddr align_up_address = (cpu_addr_base + 31) & align_mask; | ||
| 617 | const u64 difference_base = align_up_address - cpu_addr_base; | ||
| 618 | if (difference_base > new_size) { | ||
| 619 | return; | ||
| 620 | } | ||
| 621 | const u64 fixed_size = new_size - difference_base; | ||
| 622 | buffer.UnmarkRegionAsGpuModified(align_up_address, fixed_size & align_mask); | ||
| 596 | }); | 623 | }); |
| 597 | total_size_bytes += new_size; | 624 | }); |
| 598 | largest_copy = std::max(largest_copy, new_size); | 625 | } |
| 599 | constexpr u64 align_mask = ~(32ULL - 1); | ||
| 600 | const VAddr align_up_address = (cpu_addr_base + 31) & align_mask; | ||
| 601 | const u64 difference_base = align_up_address - cpu_addr_base; | ||
| 602 | if (difference_base > new_size) { | ||
| 603 | return; | ||
| 604 | } | ||
| 605 | const u64 fixed_size = new_size - difference_base; | ||
| 606 | buffer.UnmarkRegionAsGpuModified(align_up_address, fixed_size & align_mask); | ||
| 607 | }); | ||
| 608 | }); | ||
| 609 | } | 626 | } |
| 627 | committed_ranges.clear(); | ||
| 610 | if (downloads.empty()) { | 628 | if (downloads.empty()) { |
| 611 | return; | 629 | return; |
| 612 | } | 630 | } |
| @@ -644,6 +662,7 @@ void BufferCache<P>::CommitAsyncFlushes() { | |||
| 644 | CommitAsyncFlushesHigh(); | 662 | CommitAsyncFlushesHigh(); |
| 645 | } else { | 663 | } else { |
| 646 | uncommitted_ranges.clear(); | 664 | uncommitted_ranges.clear(); |
| 665 | committed_ranges.clear(); | ||
| 647 | } | 666 | } |
| 648 | } | 667 | } |
| 649 | 668 | ||
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index cd17667cb..b702b2015 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -54,6 +54,12 @@ public: | |||
| 54 | delayed_destruction_ring.Tick(); | 54 | delayed_destruction_ring.Tick(); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | // Unlike other fences, this one doesn't | ||
| 58 | void SignalOrdering() { | ||
| 59 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 60 | buffer_cache.AccumulateFlushes(); | ||
| 61 | } | ||
| 62 | |||
| 57 | void SignalReference() { | 63 | void SignalReference() { |
| 58 | // Only sync references on High | 64 | // Only sync references on High |
| 59 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | 65 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 64869abf9..514c2f47d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -657,6 +657,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 657 | 657 | ||
| 658 | void RasterizerOpenGL::WaitForIdle() { | 658 | void RasterizerOpenGL::WaitForIdle() { |
| 659 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | 659 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
| 660 | if (!gpu.IsAsync()) { | ||
| 661 | return; | ||
| 662 | } | ||
| 663 | fence_manager.SignalOrdering(); | ||
| 660 | } | 664 | } |
| 661 | 665 | ||
| 662 | void RasterizerOpenGL::FragmentBarrier() { | 666 | void RasterizerOpenGL::FragmentBarrier() { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 687c13009..392de47d0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -619,6 +619,10 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 619 | cmdbuf.SetEvent(event, flags); | 619 | cmdbuf.SetEvent(event, flags); |
| 620 | cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); | 620 | cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); |
| 621 | }); | 621 | }); |
| 622 | if (!gpu.IsAsync()) { | ||
| 623 | return; | ||
| 624 | } | ||
| 625 | fence_manager.SignalOrdering(); | ||
| 622 | } | 626 | } |
| 623 | 627 | ||
| 624 | void RasterizerVulkan::FragmentBarrier() { | 628 | void RasterizerVulkan::FragmentBarrier() { |