diff options
| -rw-r--r-- | src/tests/video_core/memory_tracker.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 114 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 2 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/word_manager.h | 13 |
4 files changed, 70 insertions, 63 deletions
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 3981907a2..618793668 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -535,12 +535,12 @@ TEST_CASE("MemoryTracker: Cached write downloads") { | |||
| 535 | memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | 535 | memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); |
| 536 | int num = 0; | 536 | int num = 0; |
| 537 | memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 537 | memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); |
| 538 | REQUIRE(num == 1); | 538 | REQUIRE(num == 0); |
| 539 | num = 0; | 539 | num = 0; |
| 540 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 540 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); |
| 541 | REQUIRE(num == 0); | 541 | REQUIRE(num == 0); |
| 542 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | 542 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |
| 543 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | 543 | REQUIRE(memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |
| 544 | memory_track->FlushCachedWrites(); | 544 | memory_track->FlushCachedWrites(); |
| 545 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | 545 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |
| 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 474822354..0b15944d6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -23,8 +23,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 23 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| 24 | inline_buffer_id = NULL_BUFFER_ID; | 24 | inline_buffer_id = NULL_BUFFER_ID; |
| 25 | 25 | ||
| 26 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 27 | |||
| 28 | if (!runtime.CanReportMemoryUsage()) { | 26 | if (!runtime.CanReportMemoryUsage()) { |
| 29 | minimum_memory = DEFAULT_EXPECTED_MEMORY; | 27 | minimum_memory = DEFAULT_EXPECTED_MEMORY; |
| 30 | critical_memory = DEFAULT_CRITICAL_MEMORY; | 28 | critical_memory = DEFAULT_CRITICAL_MEMORY; |
| @@ -75,8 +73,6 @@ void BufferCache<P>::TickFrame() { | |||
| 75 | uniform_cache_hits[0] = 0; | 73 | uniform_cache_hits[0] = 0; |
| 76 | uniform_cache_shots[0] = 0; | 74 | uniform_cache_shots[0] = 0; |
| 77 | 75 | ||
| 78 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 79 | |||
| 80 | const bool skip_preferred = hits * 256 < shots * 251; | 76 | const bool skip_preferred = hits * 256 < shots * 251; |
| 81 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 77 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 82 | 78 | ||
| @@ -491,9 +487,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 491 | 487 | ||
| 492 | if (committed_ranges.empty()) { | 488 | if (committed_ranges.empty()) { |
| 493 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 489 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 494 | if (active_async_buffers) { | 490 | |
| 495 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 491 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 496 | } | ||
| 497 | } | 492 | } |
| 498 | return; | 493 | return; |
| 499 | } | 494 | } |
| @@ -554,64 +549,65 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 554 | committed_ranges.clear(); | 549 | committed_ranges.clear(); |
| 555 | if (downloads.empty()) { | 550 | if (downloads.empty()) { |
| 556 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 551 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 557 | if (active_async_buffers) { | 552 | |
| 558 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | 553 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 559 | } | ||
| 560 | } | 554 | } |
| 561 | return; | 555 | return; |
| 562 | } | 556 | } |
| 563 | if (active_async_buffers) { | 557 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 564 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 558 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 565 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | 559 | boost::container::small_vector<BufferCopy, 4> normalized_copies; |
| 566 | boost::container::small_vector<BufferCopy, 4> normalized_copies; | 560 | IntervalSet new_async_range{}; |
| 567 | IntervalSet new_async_range{}; | 561 | runtime.PreCopyBarrier(); |
| 568 | runtime.PreCopyBarrier(); | 562 | for (auto& [copy, buffer_id] : downloads) { |
| 569 | for (auto& [copy, buffer_id] : downloads) { | 563 | copy.dst_offset += download_staging.offset; |
| 570 | copy.dst_offset += download_staging.offset; | 564 | const std::array copies{copy}; |
| 571 | const std::array copies{copy}; | 565 | BufferCopy second_copy{copy}; |
| 572 | BufferCopy second_copy{copy}; | 566 | Buffer& buffer = slot_buffers[buffer_id]; |
| 573 | Buffer& buffer = slot_buffers[buffer_id]; | 567 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 574 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 568 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 575 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 569 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 576 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 570 | async_downloads += std::make_pair(base_interval, 1); |
| 577 | async_downloads += std::make_pair(base_interval, 1); | 571 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 578 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 572 | normalized_copies.push_back(second_copy); |
| 579 | normalized_copies.push_back(second_copy); | ||
| 580 | } | ||
| 581 | runtime.PostCopyBarrier(); | ||
| 582 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 583 | async_buffers.emplace_back(download_staging); | ||
| 584 | } else { | ||
| 585 | committed_ranges.clear(); | ||
| 586 | uncommitted_ranges.clear(); | ||
| 587 | } | 573 | } |
| 574 | runtime.PostCopyBarrier(); | ||
| 575 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 576 | async_buffers.emplace_back(download_staging); | ||
| 588 | } else { | 577 | } else { |
| 589 | if constexpr (USE_MEMORY_MAPS) { | 578 | if (!Settings::IsGPULevelHigh()) { |
| 590 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 579 | committed_ranges.clear(); |
| 591 | runtime.PreCopyBarrier(); | 580 | uncommitted_ranges.clear(); |
| 592 | for (auto& [copy, buffer_id] : downloads) { | ||
| 593 | // Have in mind the staging buffer offset for the copy | ||
| 594 | copy.dst_offset += download_staging.offset; | ||
| 595 | const std::array copies{copy}; | ||
| 596 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | ||
| 597 | } | ||
| 598 | runtime.PostCopyBarrier(); | ||
| 599 | runtime.Finish(); | ||
| 600 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 601 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 602 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 603 | // Undo the modified offset | ||
| 604 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 605 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 606 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 607 | } | ||
| 608 | } else { | 581 | } else { |
| 609 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 582 | if constexpr (USE_MEMORY_MAPS) { |
| 610 | for (const auto& [copy, buffer_id] : downloads) { | 583 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 611 | Buffer& buffer = slot_buffers[buffer_id]; | 584 | runtime.PreCopyBarrier(); |
| 612 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 585 | for (auto& [copy, buffer_id] : downloads) { |
| 613 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 586 | // Have in mind the staging buffer offset for the copy |
| 614 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 587 | copy.dst_offset += download_staging.offset; |
| 588 | const std::array copies{copy}; | ||
| 589 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | ||
| 590 | false); | ||
| 591 | } | ||
| 592 | runtime.PostCopyBarrier(); | ||
| 593 | runtime.Finish(); | ||
| 594 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 595 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 596 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 597 | // Undo the modified offset | ||
| 598 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 599 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 600 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 601 | } | ||
| 602 | } else { | ||
| 603 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 604 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 605 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 606 | buffer.ImmediateDownload(copy.src_offset, | ||
| 607 | immediate_buffer.subspan(0, copy.size)); | ||
| 608 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 609 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 610 | } | ||
| 615 | } | 611 | } |
| 616 | } | 612 | } |
| 617 | } | 613 | } |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index e3914a53a..0445ec47f 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -572,8 +572,6 @@ private: | |||
| 572 | u64 critical_memory = 0; | 572 | u64 critical_memory = 0; |
| 573 | BufferId inline_buffer_id; | 573 | BufferId inline_buffer_id; |
| 574 | 574 | ||
| 575 | bool active_async_buffers = false; | ||
| 576 | |||
| 577 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 575 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |
| 578 | std::vector<u8> tmp_buffer; | 576 | std::vector<u8> tmp_buffer; |
| 579 | }; | 577 | }; |
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 0fb199a54..a336bde41 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -302,6 +302,9 @@ public: | |||
| 302 | (pending_pointer - pending_offset) * BYTES_PER_PAGE); | 302 | (pending_pointer - pending_offset) * BYTES_PER_PAGE); |
| 303 | }; | 303 | }; |
| 304 | IterateWords(offset, size, [&](size_t index, u64 mask) { | 304 | IterateWords(offset, size, [&](size_t index, u64 mask) { |
| 305 | if constexpr (type == Type::GPU) { | ||
| 306 | mask &= ~untracked_words[index]; | ||
| 307 | } | ||
| 305 | const u64 word = state_words[index] & mask; | 308 | const u64 word = state_words[index] & mask; |
| 306 | if constexpr (clear) { | 309 | if constexpr (clear) { |
| 307 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | 310 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { |
| @@ -350,8 +353,13 @@ public: | |||
| 350 | static_assert(type != Type::Untracked); | 353 | static_assert(type != Type::Untracked); |
| 351 | 354 | ||
| 352 | const std::span<const u64> state_words = words.template Span<type>(); | 355 | const std::span<const u64> state_words = words.template Span<type>(); |
| 356 | [[maybe_unused]] const std::span<const u64> untracked_words = | ||
| 357 | words.template Span<Type::Untracked>(); | ||
| 353 | bool result = false; | 358 | bool result = false; |
| 354 | IterateWords(offset, size, [&](size_t index, u64 mask) { | 359 | IterateWords(offset, size, [&](size_t index, u64 mask) { |
| 360 | if constexpr (type == Type::GPU) { | ||
| 361 | mask &= ~untracked_words[index]; | ||
| 362 | } | ||
| 355 | const u64 word = state_words[index] & mask; | 363 | const u64 word = state_words[index] & mask; |
| 356 | if (word != 0) { | 364 | if (word != 0) { |
| 357 | result = true; | 365 | result = true; |
| @@ -372,9 +380,14 @@ public: | |||
| 372 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | 380 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |
| 373 | static_assert(type != Type::Untracked); | 381 | static_assert(type != Type::Untracked); |
| 374 | const std::span<const u64> state_words = words.template Span<type>(); | 382 | const std::span<const u64> state_words = words.template Span<type>(); |
| 383 | [[maybe_unused]] const std::span<const u64> untracked_words = | ||
| 384 | words.template Span<Type::Untracked>(); | ||
| 375 | u64 begin = std::numeric_limits<u64>::max(); | 385 | u64 begin = std::numeric_limits<u64>::max(); |
| 376 | u64 end = 0; | 386 | u64 end = 0; |
| 377 | IterateWords(offset, size, [&](size_t index, u64 mask) { | 387 | IterateWords(offset, size, [&](size_t index, u64 mask) { |
| 388 | if constexpr (type == Type::GPU) { | ||
| 389 | mask &= ~untracked_words[index]; | ||
| 390 | } | ||
| 378 | const u64 word = state_words[index] & mask; | 391 | const u64 word = state_words[index] & mask; |
| 379 | if (word == 0) { | 392 | if (word == 0) { |
| 380 | return; | 393 | return; |