summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h69
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h25
-rw-r--r--src/video_core/buffer_cache/word_manager.h6
3 files changed, 61 insertions, 39 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 43fe5b080..faa48a678 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -22,6 +22,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 23 common_ranges.clear();
24 24
25 active_async_buffers = IMPLEMENTS_ASYNC_DOWNLOADS && !Settings::IsGPULevelHigh();
26
25 if (!runtime.CanReportMemoryUsage()) { 27 if (!runtime.CanReportMemoryUsage()) {
26 minimum_memory = DEFAULT_EXPECTED_MEMORY; 28 minimum_memory = DEFAULT_EXPECTED_MEMORY;
27 critical_memory = DEFAULT_CRITICAL_MEMORY; 29 critical_memory = DEFAULT_CRITICAL_MEMORY;
@@ -72,6 +74,8 @@ void BufferCache<P>::TickFrame() {
72 uniform_cache_hits[0] = 0; 74 uniform_cache_hits[0] = 0;
73 uniform_cache_shots[0] = 0; 75 uniform_cache_shots[0] = 0;
74 76
77 active_async_buffers = IMPLEMENTS_ASYNC_DOWNLOADS && !Settings::IsGPULevelHigh();
78
75 const bool skip_preferred = hits * 256 < shots * 251; 79 const bool skip_preferred = hits * 256 < shots * 251;
76 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 80 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
77 81
@@ -130,7 +134,7 @@ void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
130 134
131template <class P> 135template <class P>
132void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 136void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
133 async_downloads -= std::make_pair(subtract_interval, std::numeric_limits<int>::max()); 137 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
134 uncommitted_ranges.subtract(subtract_interval); 138 uncommitted_ranges.subtract(subtract_interval);
135 pending_ranges.subtract(subtract_interval); 139 pending_ranges.subtract(subtract_interval);
136 for (auto& interval_set : committed_ranges) { 140 for (auto& interval_set : committed_ranges) {
@@ -173,18 +177,14 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
173 }}; 177 }};
174 178
175 boost::container::small_vector<IntervalType, 4> tmp_intervals; 179 boost::container::small_vector<IntervalType, 4> tmp_intervals;
176 const bool is_high_accuracy =
177 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
178 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 180 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
179 const u64 size = base_address_end - base_address; 181 const u64 size = base_address_end - base_address;
180 const VAddr diff = base_address - *cpu_src_address; 182 const VAddr diff = base_address - *cpu_src_address;
181 const VAddr new_base_address = *cpu_dest_address + diff; 183 const VAddr new_base_address = *cpu_dest_address + diff;
182 const IntervalType add_interval{new_base_address, new_base_address + size}; 184 const IntervalType add_interval{new_base_address, new_base_address + size};
183 tmp_intervals.push_back(add_interval); 185 tmp_intervals.push_back(add_interval);
184 if (is_high_accuracy) { 186 uncommitted_ranges.add(add_interval);
185 uncommitted_ranges.add(add_interval); 187 pending_ranges.add(add_interval);
186 pending_ranges.add(add_interval);
187 }
188 }; 188 };
189 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); 189 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
190 // This subtraction in this order is important for overlapping copies. 190 // This subtraction in this order is important for overlapping copies.
@@ -468,7 +468,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
468 AccumulateFlushes(); 468 AccumulateFlushes();
469 469
470 if (committed_ranges.empty()) { 470 if (committed_ranges.empty()) {
471 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 471 if (active_async_buffers) {
472 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 472 async_buffers.emplace_back(std::optional<Async_Buffer>{});
473 } 473 }
474 return; 474 return;
@@ -529,31 +529,33 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
529 } 529 }
530 committed_ranges.clear(); 530 committed_ranges.clear();
531 if (downloads.empty()) { 531 if (downloads.empty()) {
532 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 532 if (active_async_buffers) {
533 async_buffers.emplace_back(std::optional<Async_Buffer>{}); 533 async_buffers.emplace_back(std::optional<Async_Buffer>{});
534 } 534 }
535 return; 535 return;
536 } 536 }
537 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 537 if (active_async_buffers) {
538 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); 538 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
539 boost::container::small_vector<BufferCopy, 4> normalized_copies; 539 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
540 IntervalSet new_async_range{}; 540 boost::container::small_vector<BufferCopy, 4> normalized_copies;
541 runtime.PreCopyBarrier(); 541 IntervalSet new_async_range{};
542 for (auto& [copy, buffer_id] : downloads) { 542 runtime.PreCopyBarrier();
543 copy.dst_offset += download_staging.offset; 543 for (auto& [copy, buffer_id] : downloads) {
544 const std::array copies{copy}; 544 copy.dst_offset += download_staging.offset;
545 BufferCopy second_copy{copy}; 545 const std::array copies{copy};
546 Buffer& buffer = slot_buffers[buffer_id]; 546 BufferCopy second_copy{copy};
547 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 547 Buffer& buffer = slot_buffers[buffer_id];
548 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 548 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
549 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 549 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
550 async_downloads += std::make_pair(base_interval, 1); 550 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
551 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 551 async_downloads += std::make_pair(base_interval, 1);
552 normalized_copies.push_back(second_copy); 552 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
553 normalized_copies.push_back(second_copy);
554 }
555 runtime.PostCopyBarrier();
556 pending_downloads.emplace_back(std::move(normalized_copies));
557 async_buffers.emplace_back(download_staging);
553 } 558 }
554 runtime.PostCopyBarrier();
555 pending_downloads.emplace_back(std::move(normalized_copies));
556 async_buffers.emplace_back(download_staging);
557 } else { 559 } else {
558 if constexpr (USE_MEMORY_MAPS) { 560 if constexpr (USE_MEMORY_MAPS) {
559 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 561 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
@@ -624,7 +626,8 @@ void BufferCache<P>::PopAsyncBuffers() {
624 common_ranges.subtract(base_interval); 626 common_ranges.subtract(base_interval);
625 } 627 }
626 }); 628 });
627 async_downloads -= std::make_pair(IntervalType(cpu_addr, cpu_addr + copy.size), 1); 629 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size};
630 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
628 } 631 }
629 runtime.FreeDeferredStagingBuffer(*async_buffer); 632 runtime.FreeDeferredStagingBuffer(*async_buffer);
630 async_buffers.pop_front(); 633 async_buffers.pop_front();
@@ -1198,10 +1201,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1198 1201
1199 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1202 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1200 common_ranges.add(base_interval); 1203 common_ranges.add(base_interval);
1201 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 1204 uncommitted_ranges.add(base_interval);
1202 uncommitted_ranges.add(base_interval); 1205 pending_ranges.add(base_interval);
1203 pending_ranges.add(base_interval);
1204 }
1205} 1206}
1206 1207
1207template <class P> 1208template <class P>
@@ -1542,7 +1543,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1542 .size = new_size, 1543 .size = new_size,
1543 }); 1544 });
1544 // Align up to avoid cache conflicts 1545 // Align up to avoid cache conflicts
1545 constexpr u64 align = 8ULL; 1546 constexpr u64 align = 64ULL;
1546 constexpr u64 mask = ~(align - 1ULL); 1547 constexpr u64 mask = ~(align - 1ULL);
1547 total_size_bytes += (new_size + align - 1) & mask; 1548 total_size_bytes += (new_size + align - 1) & mask;
1548 largest_copy = std::max(largest_copy, new_size); 1549 largest_copy = std::max(largest_copy, new_size);
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 6f29cba25..d4914a8f5 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -345,13 +345,30 @@ private:
345 if (inter_addr < start_address) { 345 if (inter_addr < start_address) {
346 inter_addr = start_address; 346 inter_addr = start_address;
347 } 347 }
348 if (it->second <= 0) {
349 __debugbreak();
350 }
351 func(inter_addr, inter_addr_end, it->second); 348 func(inter_addr, inter_addr_end, it->second);
352 } 349 }
353 } 350 }
354 351
352 void RemoveEachInOverlapCounter(OverlapCounter& current_range, const IntervalType search_interval, int subtract_value) {
353 bool any_removals = false;
354 current_range.add(std::make_pair(search_interval, subtract_value));
355 do {
356 any_removals = false;
357 auto it = current_range.lower_bound(search_interval);
358 if (it == current_range.end()) {
359 return;
360 }
361 auto end_it = current_range.upper_bound(search_interval);
362 for (; it != end_it; it++) {
363 if (it->second <= 0) {
364 any_removals = true;
365 current_range.erase(it);
366 break;
367 }
368 }
369 } while (any_removals);
370 }
371
355 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 372 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
356 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == 373 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
357 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); 374 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
@@ -554,6 +571,8 @@ private:
554 u64 minimum_memory = 0; 571 u64 minimum_memory = 0;
555 u64 critical_memory = 0; 572 u64 critical_memory = 0;
556 573
574 bool active_async_buffers = false;
575
557 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; 576 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
558}; 577};
559 578
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
index 782951fe7..21729752b 100644
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -273,7 +273,7 @@ public:
273 untracked_words[word_index] &= ~bits; 273 untracked_words[word_index] &= ~bits;
274 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); 274 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
275 } 275 }
276 const u64 word = current_word; 276 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
277 u64 page = page_begin; 277 u64 page = page_begin;
278 page_begin = 0; 278 page_begin = 0;
279 279
@@ -321,6 +321,7 @@ public:
321 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 321 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
322 static_assert(type != Type::Untracked); 322 static_assert(type != Type::Untracked);
323 323
324 const u64* const untracked_words = Array<Type::Untracked>();
324 const u64* const state_words = Array<type>(); 325 const u64* const state_words = Array<type>();
325 const u64 num_query_words = size / BYTES_PER_WORD + 1; 326 const u64 num_query_words = size / BYTES_PER_WORD + 1;
326 const u64 word_begin = offset / BYTES_PER_WORD; 327 const u64 word_begin = offset / BYTES_PER_WORD;
@@ -328,7 +329,8 @@ public:
328 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 329 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
329 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 330 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
330 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 331 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
331 const u64 word = state_words[word_index]; 332 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
333 const u64 word = state_words[word_index] & ~off_word;
332 if (word == 0) { 334 if (word == 0) {
333 continue; 335 continue;
334 } 336 }