diff options
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 113 |
1 files changed, 18 insertions, 95 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 98343628c..f86edaa3e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -170,11 +170,6 @@ public: | |||
| 170 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | 170 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |
| 171 | bool is_written, bool is_image); | 171 | bool is_written, bool is_image); |
| 172 | 172 | ||
| 173 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 174 | bool synchronize = true, | ||
| 175 | bool mark_as_written = false, | ||
| 176 | bool discard_downloads = false); | ||
| 177 | |||
| 178 | void FlushCachedWrites(); | 173 | void FlushCachedWrites(); |
| 179 | 174 | ||
| 180 | /// Return true when there are uncommitted buffers to be downloaded | 175 | /// Return true when there are uncommitted buffers to be downloaded |
| @@ -354,8 +349,6 @@ private: | |||
| 354 | 349 | ||
| 355 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 350 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 356 | 351 | ||
| 357 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 358 | |||
| 359 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 352 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 360 | std::span<BufferCopy> copies); | 353 | std::span<BufferCopy> copies); |
| 361 | 354 | ||
| @@ -442,7 +435,6 @@ private: | |||
| 442 | 435 | ||
| 443 | std::vector<BufferId> cached_write_buffer_ids; | 436 | std::vector<BufferId> cached_write_buffer_ids; |
| 444 | 437 | ||
| 445 | IntervalSet discarded_ranges; | ||
| 446 | IntervalSet uncommitted_ranges; | 438 | IntervalSet uncommitted_ranges; |
| 447 | IntervalSet common_ranges; | 439 | IntervalSet common_ranges; |
| 448 | std::deque<IntervalSet> committed_ranges; | 440 | std::deque<IntervalSet> committed_ranges; |
| @@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 600 | }}; | 592 | }}; |
| 601 | 593 | ||
| 602 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 594 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 603 | const bool is_high_accuracy = | ||
| 604 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 605 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 595 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { |
| 606 | const u64 size = base_address_end - base_address; | 596 | const u64 size = base_address_end - base_address; |
| 607 | const VAddr diff = base_address - *cpu_src_address; | 597 | const VAddr diff = base_address - *cpu_src_address; |
| 608 | const VAddr new_base_address = *cpu_dest_address + diff; | 598 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 609 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 599 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 600 | uncommitted_ranges.add(add_interval); | ||
| 610 | tmp_intervals.push_back(add_interval); | 601 | tmp_intervals.push_back(add_interval); |
| 611 | if (is_high_accuracy) { | ||
| 612 | uncommitted_ranges.add(add_interval); | ||
| 613 | } | ||
| 614 | }; | 602 | }; |
| 615 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 603 | ForEachWrittenRange(*cpu_src_address, amount, mirror); |
| 616 | // This subtraction in this order is important for overlapping copies. | 604 | // This subtraction in this order is important for overlapping copies. |
| @@ -822,32 +810,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 822 | } | 810 | } |
| 823 | 811 | ||
| 824 | template <class P> | 812 | template <class P> |
| 825 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 826 | bool synchronize, | ||
| 827 | bool mark_as_written, | ||
| 828 | bool discard_downloads) { | ||
| 829 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||
| 830 | if (!cpu_addr) { | ||
| 831 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||
| 832 | } | ||
| 833 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 834 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 835 | if (synchronize) { | ||
| 836 | // SynchronizeBuffer(buffer, *cpu_addr, size); | ||
| 837 | SynchronizeBufferNoModified(buffer, *cpu_addr, size); | ||
| 838 | } | ||
| 839 | if (mark_as_written) { | ||
| 840 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||
| 841 | } | ||
| 842 | if (discard_downloads) { | ||
| 843 | IntervalType interval{*cpu_addr, size}; | ||
| 844 | ClearDownload(interval); | ||
| 845 | discarded_ranges.subtract(interval); | ||
| 846 | } | ||
| 847 | return {&buffer, buffer.Offset(*cpu_addr)}; | ||
| 848 | } | ||
| 849 | |||
| 850 | template <class P> | ||
| 851 | void BufferCache<P>::FlushCachedWrites() { | 813 | void BufferCache<P>::FlushCachedWrites() { |
| 852 | for (const BufferId buffer_id : cached_write_buffer_ids) { | 814 | for (const BufferId buffer_id : cached_write_buffer_ids) { |
| 853 | slot_buffers[buffer_id].FlushCachedWrites(); | 815 | slot_buffers[buffer_id].FlushCachedWrites(); |
| @@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 862 | 824 | ||
| 863 | template <class P> | 825 | template <class P> |
| 864 | void BufferCache<P>::AccumulateFlushes() { | 826 | void BufferCache<P>::AccumulateFlushes() { |
| 827 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 828 | uncommitted_ranges.clear(); | ||
| 829 | return; | ||
| 830 | } | ||
| 865 | if (uncommitted_ranges.empty()) { | 831 | if (uncommitted_ranges.empty()) { |
| 866 | return; | 832 | return; |
| 867 | } | 833 | } |
| @@ -877,14 +843,12 @@ template <class P> | |||
| 877 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 843 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 878 | AccumulateFlushes(); | 844 | AccumulateFlushes(); |
| 879 | 845 | ||
| 880 | for (const auto& interval : discarded_ranges) { | ||
| 881 | common_ranges.subtract(interval); | ||
| 882 | } | ||
| 883 | |||
| 884 | if (committed_ranges.empty()) { | 846 | if (committed_ranges.empty()) { |
| 885 | return; | 847 | return; |
| 886 | } | 848 | } |
| 887 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 849 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 850 | const bool is_accuracy_normal = | ||
| 851 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 888 | 852 | ||
| 889 | auto it = committed_ranges.begin(); | 853 | auto it = committed_ranges.begin(); |
| 890 | while (it != committed_ranges.end()) { | 854 | while (it != committed_ranges.end()) { |
| @@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 909 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 873 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 910 | buffer.ForEachDownloadRangeAndClear( | 874 | buffer.ForEachDownloadRangeAndClear( |
| 911 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 875 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
| 876 | if (is_accuracy_normal) { | ||
| 877 | return; | ||
| 878 | } | ||
| 912 | const VAddr buffer_addr = buffer.CpuAddr(); | 879 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 913 | const auto add_download = [&](VAddr start, VAddr end) { | 880 | const auto add_download = [&](VAddr start, VAddr end) { |
| 914 | const u64 new_offset = start - buffer_addr; | 881 | const u64 new_offset = start - buffer_addr; |
| @@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 973 | 940 | ||
| 974 | template <class P> | 941 | template <class P> |
| 975 | void BufferCache<P>::CommitAsyncFlushes() { | 942 | void BufferCache<P>::CommitAsyncFlushes() { |
| 976 | CommitAsyncFlushesHigh(); | 943 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { |
| 944 | CommitAsyncFlushesHigh(); | ||
| 945 | } else { | ||
| 946 | uncommitted_ranges.clear(); | ||
| 947 | committed_ranges.clear(); | ||
| 948 | } | ||
| 977 | } | 949 | } |
| 978 | 950 | ||
| 979 | template <class P> | 951 | template <class P> |
| @@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1353 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 1325 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1354 | const auto& index_array = draw_state.index_buffer; | 1326 | const auto& index_array = draw_state.index_buffer; |
| 1355 | auto& flags = maxwell3d->dirty.flags; | 1327 | auto& flags = maxwell3d->dirty.flags; |
| 1356 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | 1328 | if (!flags[Dirty::IndexBuffer]) { |
| 1357 | return; | 1329 | return; |
| 1358 | } | 1330 | } |
| 1359 | flags[Dirty::IndexBuffer] = false; | 1331 | flags[Dirty::IndexBuffer] = false; |
| @@ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
| 1574 | if (!is_async) { | 1546 | if (!is_async) { |
| 1575 | return; | 1547 | return; |
| 1576 | } | 1548 | } |
| 1577 | const bool is_high_accuracy = | 1549 | uncommitted_ranges.add(base_interval); |
| 1578 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 1579 | if (is_high_accuracy) { | ||
| 1580 | uncommitted_ranges.add(base_interval); | ||
| 1581 | } | ||
| 1582 | } | 1550 | } |
| 1583 | 1551 | ||
| 1584 | template <class P> | 1552 | template <class P> |
| @@ -1772,51 +1740,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1772 | } | 1740 | } |
| 1773 | 1741 | ||
| 1774 | template <class P> | 1742 | template <class P> |
| 1775 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1776 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1777 | u64 total_size_bytes = 0; | ||
| 1778 | u64 largest_copy = 0; | ||
| 1779 | IntervalSet found_sets{}; | ||
| 1780 | auto make_copies = [&] { | ||
| 1781 | for (auto& interval : found_sets) { | ||
| 1782 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1783 | const VAddr cpu_addr = interval.lower(); | ||
| 1784 | copies.push_back(BufferCopy{ | ||
| 1785 | .src_offset = total_size_bytes, | ||
| 1786 | .dst_offset = cpu_addr - buffer.CpuAddr(), | ||
| 1787 | .size = sub_size, | ||
| 1788 | }); | ||
| 1789 | total_size_bytes += sub_size; | ||
| 1790 | largest_copy = std::max(largest_copy, sub_size); | ||
| 1791 | } | ||
| 1792 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1793 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1794 | }; | ||
| 1795 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1796 | const VAddr base_adr = buffer.CpuAddr() + range_offset; | ||
| 1797 | const VAddr end_adr = base_adr + range_size; | ||
| 1798 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1799 | found_sets.add(add_interval); | ||
| 1800 | }); | ||
| 1801 | if (found_sets.empty()) { | ||
| 1802 | return true; | ||
| 1803 | } | ||
| 1804 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1805 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1806 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1807 | if (it == common_ranges.end()) { | ||
| 1808 | make_copies(); | ||
| 1809 | return false; | ||
| 1810 | } | ||
| 1811 | while (it != it_end) { | ||
| 1812 | found_sets.subtract(*it); | ||
| 1813 | it++; | ||
| 1814 | } | ||
| 1815 | make_copies(); | ||
| 1816 | return false; | ||
| 1817 | } | ||
| 1818 | |||
| 1819 | template <class P> | ||
| 1820 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1743 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1821 | std::span<BufferCopy> copies) { | 1744 | std::span<BufferCopy> copies) { |
| 1822 | if constexpr (USE_MEMORY_MAPS) { | 1745 | if constexpr (USE_MEMORY_MAPS) { |