summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h113
1 files changed, 18 insertions, 95 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 98343628c..f86edaa3e 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -170,11 +170,6 @@ public:
170 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, 170 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
171 bool is_written, bool is_image); 171 bool is_written, bool is_image);
172 172
173 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
174 bool synchronize = true,
175 bool mark_as_written = false,
176 bool discard_downloads = false);
177
178 void FlushCachedWrites(); 173 void FlushCachedWrites();
179 174
180 /// Return true when there are uncommitted buffers to be downloaded 175 /// Return true when there are uncommitted buffers to be downloaded
@@ -354,8 +349,6 @@ private:
354 349
355 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); 350 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
356 351
357 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
358
359 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 352 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
360 std::span<BufferCopy> copies); 353 std::span<BufferCopy> copies);
361 354
@@ -442,7 +435,6 @@ private:
442 435
443 std::vector<BufferId> cached_write_buffer_ids; 436 std::vector<BufferId> cached_write_buffer_ids;
444 437
445 IntervalSet discarded_ranges;
446 IntervalSet uncommitted_ranges; 438 IntervalSet uncommitted_ranges;
447 IntervalSet common_ranges; 439 IntervalSet common_ranges;
448 std::deque<IntervalSet> committed_ranges; 440 std::deque<IntervalSet> committed_ranges;
@@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
600 }}; 592 }};
601 593
602 boost::container::small_vector<IntervalType, 4> tmp_intervals; 594 boost::container::small_vector<IntervalType, 4> tmp_intervals;
603 const bool is_high_accuracy =
604 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
605 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 595 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
606 const u64 size = base_address_end - base_address; 596 const u64 size = base_address_end - base_address;
607 const VAddr diff = base_address - *cpu_src_address; 597 const VAddr diff = base_address - *cpu_src_address;
608 const VAddr new_base_address = *cpu_dest_address + diff; 598 const VAddr new_base_address = *cpu_dest_address + diff;
609 const IntervalType add_interval{new_base_address, new_base_address + size}; 599 const IntervalType add_interval{new_base_address, new_base_address + size};
600 uncommitted_ranges.add(add_interval);
610 tmp_intervals.push_back(add_interval); 601 tmp_intervals.push_back(add_interval);
611 if (is_high_accuracy) {
612 uncommitted_ranges.add(add_interval);
613 }
614 }; 602 };
615 ForEachWrittenRange(*cpu_src_address, amount, mirror); 603 ForEachWrittenRange(*cpu_src_address, amount, mirror);
616 // This subtraction in this order is important for overlapping copies. 604 // This subtraction in this order is important for overlapping copies.
@@ -822,32 +810,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
822} 810}
823 811
824template <class P> 812template <class P>
825std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
826 bool synchronize,
827 bool mark_as_written,
828 bool discard_downloads) {
829 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
830 if (!cpu_addr) {
831 return {&slot_buffers[NULL_BUFFER_ID], 0};
832 }
833 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
834 Buffer& buffer = slot_buffers[buffer_id];
835 if (synchronize) {
836 // SynchronizeBuffer(buffer, *cpu_addr, size);
837 SynchronizeBufferNoModified(buffer, *cpu_addr, size);
838 }
839 if (mark_as_written) {
840 MarkWrittenBuffer(buffer_id, *cpu_addr, size);
841 }
842 if (discard_downloads) {
843 IntervalType interval{*cpu_addr, size};
844 ClearDownload(interval);
845 discarded_ranges.subtract(interval);
846 }
847 return {&buffer, buffer.Offset(*cpu_addr)};
848}
849
850template <class P>
851void BufferCache<P>::FlushCachedWrites() { 813void BufferCache<P>::FlushCachedWrites() {
852 for (const BufferId buffer_id : cached_write_buffer_ids) { 814 for (const BufferId buffer_id : cached_write_buffer_ids) {
853 slot_buffers[buffer_id].FlushCachedWrites(); 815 slot_buffers[buffer_id].FlushCachedWrites();
@@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
862 824
863template <class P> 825template <class P>
864void BufferCache<P>::AccumulateFlushes() { 826void BufferCache<P>::AccumulateFlushes() {
827 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
828 uncommitted_ranges.clear();
829 return;
830 }
865 if (uncommitted_ranges.empty()) { 831 if (uncommitted_ranges.empty()) {
866 return; 832 return;
867 } 833 }
@@ -877,14 +843,12 @@ template <class P>
877void BufferCache<P>::CommitAsyncFlushesHigh() { 843void BufferCache<P>::CommitAsyncFlushesHigh() {
878 AccumulateFlushes(); 844 AccumulateFlushes();
879 845
880 for (const auto& interval : discarded_ranges) {
881 common_ranges.subtract(interval);
882 }
883
884 if (committed_ranges.empty()) { 846 if (committed_ranges.empty()) {
885 return; 847 return;
886 } 848 }
887 MICROPROFILE_SCOPE(GPU_DownloadMemory); 849 MICROPROFILE_SCOPE(GPU_DownloadMemory);
850 const bool is_accuracy_normal =
851 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
888 852
889 auto it = committed_ranges.begin(); 853 auto it = committed_ranges.begin();
890 while (it != committed_ranges.end()) { 854 while (it != committed_ranges.end()) {
@@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
909 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 873 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
910 buffer.ForEachDownloadRangeAndClear( 874 buffer.ForEachDownloadRangeAndClear(
911 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 875 cpu_addr, size, [&](u64 range_offset, u64 range_size) {
876 if (is_accuracy_normal) {
877 return;
878 }
912 const VAddr buffer_addr = buffer.CpuAddr(); 879 const VAddr buffer_addr = buffer.CpuAddr();
913 const auto add_download = [&](VAddr start, VAddr end) { 880 const auto add_download = [&](VAddr start, VAddr end) {
914 const u64 new_offset = start - buffer_addr; 881 const u64 new_offset = start - buffer_addr;
@@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
973 940
974template <class P> 941template <class P>
975void BufferCache<P>::CommitAsyncFlushes() { 942void BufferCache<P>::CommitAsyncFlushes() {
976 CommitAsyncFlushesHigh(); 943 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
944 CommitAsyncFlushesHigh();
945 } else {
946 uncommitted_ranges.clear();
947 committed_ranges.clear();
948 }
977} 949}
978 950
979template <class P> 951template <class P>
@@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1353 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 1325 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1354 const auto& index_array = draw_state.index_buffer; 1326 const auto& index_array = draw_state.index_buffer;
1355 auto& flags = maxwell3d->dirty.flags; 1327 auto& flags = maxwell3d->dirty.flags;
1356 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { 1328 if (!flags[Dirty::IndexBuffer]) {
1357 return; 1329 return;
1358 } 1330 }
1359 flags[Dirty::IndexBuffer] = false; 1331 flags[Dirty::IndexBuffer] = false;
@@ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1574 if (!is_async) { 1546 if (!is_async) {
1575 return; 1547 return;
1576 } 1548 }
1577 const bool is_high_accuracy = 1549 uncommitted_ranges.add(base_interval);
1578 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
1579 if (is_high_accuracy) {
1580 uncommitted_ranges.add(base_interval);
1581 }
1582} 1550}
1583 1551
1584template <class P> 1552template <class P>
@@ -1772,51 +1740,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1772} 1740}
1773 1741
1774template <class P> 1742template <class P>
1775bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1776 boost::container::small_vector<BufferCopy, 4> copies;
1777 u64 total_size_bytes = 0;
1778 u64 largest_copy = 0;
1779 IntervalSet found_sets{};
1780 auto make_copies = [&] {
1781 for (auto& interval : found_sets) {
1782 const std::size_t sub_size = interval.upper() - interval.lower();
1783 const VAddr cpu_addr = interval.lower();
1784 copies.push_back(BufferCopy{
1785 .src_offset = total_size_bytes,
1786 .dst_offset = cpu_addr - buffer.CpuAddr(),
1787 .size = sub_size,
1788 });
1789 total_size_bytes += sub_size;
1790 largest_copy = std::max(largest_copy, sub_size);
1791 }
1792 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1793 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1794 };
1795 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1796 const VAddr base_adr = buffer.CpuAddr() + range_offset;
1797 const VAddr end_adr = base_adr + range_size;
1798 const IntervalType add_interval{base_adr, end_adr};
1799 found_sets.add(add_interval);
1800 });
1801 if (found_sets.empty()) {
1802 return true;
1803 }
1804 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1805 auto it = common_ranges.lower_bound(search_interval);
1806 auto it_end = common_ranges.upper_bound(search_interval);
1807 if (it == common_ranges.end()) {
1808 make_copies();
1809 return false;
1810 }
1811 while (it != it_end) {
1812 found_sets.subtract(*it);
1813 it++;
1814 }
1815 make_copies();
1816 return false;
1817}
1818
1819template <class P>
1820void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1743void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1821 std::span<BufferCopy> copies) { 1744 std::span<BufferCopy> copies) {
1822 if constexpr (USE_MEMORY_MAPS) { 1745 if constexpr (USE_MEMORY_MAPS) {