diff options
| author | 2021-07-20 17:35:05 -0400 | |
|---|---|---|
| committer | 2021-07-20 17:35:05 -0400 | |
| commit | c53b688411d916192820520ef5280ad5e378aa11 (patch) | |
| tree | 6b319d4eaa3b7807b9590268f308b1c03fb5d3a1 /src/video_core/buffer_cache | |
| parent | Merge pull request #6658 from Morph1984/render-window-fix (diff) | |
| parent | Buffer cache: Fixes, Clang and Feedback. (diff) | |
| download | yuzu-c53b688411d916192820520ef5280ad5e378aa11.tar.gz yuzu-c53b688411d916192820520ef5280ad5e378aa11.tar.xz yuzu-c53b688411d916192820520ef5280ad5e378aa11.zip | |
Merge pull request #6629 from FernandoS27/accel-dma-2
DMAEngine: Accelerate BufferClear [accelerateDMA Part 2]
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 78 |
1 files changed, 70 insertions, 8 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2871682f6..7373cb62d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -164,11 +164,16 @@ public: | |||
| 164 | /// Pop asynchronous downloads | 164 | /// Pop asynchronous downloads |
| 165 | void PopAsyncFlushes(); | 165 | void PopAsyncFlushes(); |
| 166 | 166 | ||
| 167 | [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | 167 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); |
| 168 | |||
| 169 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 168 | 170 | ||
| 169 | /// Return true when a CPU region is modified from the GPU | 171 | /// Return true when a CPU region is modified from the GPU |
| 170 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 172 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 171 | 173 | ||
| 174 | /// Return true when a region is registered on the cache | ||
| 175 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 176 | |||
| 172 | /// Return true when a CPU region is modified from the CPU | 177 | /// Return true when a CPU region is modified from the CPU |
| 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 178 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |
| 174 | 179 | ||
| @@ -324,6 +329,8 @@ private: | |||
| 324 | 329 | ||
| 325 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | 330 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; |
| 326 | 331 | ||
| 332 | void ClearDownload(IntervalType subtract_interval); | ||
| 333 | |||
| 327 | VideoCore::RasterizerInterface& rasterizer; | 334 | VideoCore::RasterizerInterface& rasterizer; |
| 328 | Tegra::Engines::Maxwell3D& maxwell3d; | 335 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 329 | Tegra::Engines::KeplerCompute& kepler_compute; | 336 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -463,23 +470,28 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | |||
| 463 | } | 470 | } |
| 464 | 471 | ||
| 465 | template <class P> | 472 | template <class P> |
| 473 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | ||
| 474 | uncommitted_ranges.subtract(subtract_interval); | ||
| 475 | for (auto& interval_set : committed_ranges) { | ||
| 476 | interval_set.subtract(subtract_interval); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | template <class P> | ||
| 466 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 481 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 467 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | 482 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); |
| 468 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | 483 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); |
| 469 | if (!cpu_src_address || !cpu_dest_address) { | 484 | if (!cpu_src_address || !cpu_dest_address) { |
| 470 | return false; | 485 | return false; |
| 471 | } | 486 | } |
| 472 | const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | 487 | const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount); |
| 473 | const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | 488 | const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount); |
| 474 | if (!source_dirty && !dest_dirty) { | 489 | if (!source_dirty && !dest_dirty) { |
| 475 | return false; | 490 | return false; |
| 476 | } | 491 | } |
| 477 | 492 | ||
| 478 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 493 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 479 | uncommitted_ranges.subtract(subtract_interval); | 494 | ClearDownload(subtract_interval); |
| 480 | for (auto& interval_set : committed_ranges) { | ||
| 481 | interval_set.subtract(subtract_interval); | ||
| 482 | } | ||
| 483 | 495 | ||
| 484 | BufferId buffer_a; | 496 | BufferId buffer_a; |
| 485 | BufferId buffer_b; | 497 | BufferId buffer_b; |
| @@ -510,12 +522,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 510 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 522 | ForEachWrittenRange(*cpu_src_address, amount, mirror); |
| 511 | // This subtraction in this order is important for overlapping copies. | 523 | // This subtraction in this order is important for overlapping copies. |
| 512 | common_ranges.subtract(subtract_interval); | 524 | common_ranges.subtract(subtract_interval); |
| 525 | bool atleast_1_download = tmp_intervals.size() != 0; | ||
| 513 | for (const IntervalType add_interval : tmp_intervals) { | 526 | for (const IntervalType add_interval : tmp_intervals) { |
| 514 | common_ranges.add(add_interval); | 527 | common_ranges.add(add_interval); |
| 515 | } | 528 | } |
| 516 | 529 | ||
| 517 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 530 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 518 | if (source_dirty) { | 531 | if (atleast_1_download) { |
| 519 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 532 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 520 | } | 533 | } |
| 521 | std::vector<u8> tmp_buffer(amount); | 534 | std::vector<u8> tmp_buffer(amount); |
| @@ -525,6 +538,33 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 525 | } | 538 | } |
| 526 | 539 | ||
| 527 | template <class P> | 540 | template <class P> |
| 541 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | ||
| 542 | const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); | ||
| 543 | if (!cpu_dst_address) { | ||
| 544 | return false; | ||
| 545 | } | ||
| 546 | const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount); | ||
| 547 | if (!dest_dirty) { | ||
| 548 | return false; | ||
| 549 | } | ||
| 550 | |||
| 551 | const size_t size = amount * sizeof(u32); | ||
| 552 | const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; | ||
| 553 | ClearDownload(subtract_interval); | ||
| 554 | common_ranges.subtract(subtract_interval); | ||
| 555 | |||
| 556 | BufferId buffer; | ||
| 557 | do { | ||
| 558 | has_deleted_buffers = false; | ||
| 559 | buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | ||
| 560 | } while (has_deleted_buffers); | ||
| 561 | auto& dest_buffer = slot_buffers[buffer]; | ||
| 562 | const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); | ||
| 563 | runtime.ClearBuffer(dest_buffer, offset, size, value); | ||
| 564 | return true; | ||
| 565 | } | ||
| 566 | |||
| 567 | template <class P> | ||
| 528 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 568 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 529 | u32 size) { | 569 | u32 size) { |
| 530 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 570 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| @@ -782,6 +822,27 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 782 | } | 822 | } |
| 783 | 823 | ||
| 784 | template <class P> | 824 | template <class P> |
| 825 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | ||
| 826 | const VAddr end_addr = addr + size; | ||
| 827 | const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 828 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | ||
| 829 | const BufferId buffer_id = page_table[page]; | ||
| 830 | if (!buffer_id) { | ||
| 831 | ++page; | ||
| 832 | continue; | ||
| 833 | } | ||
| 834 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 835 | const VAddr buf_start_addr = buffer.CpuAddr(); | ||
| 836 | const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); | ||
| 837 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | ||
| 838 | return true; | ||
| 839 | } | ||
| 840 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 841 | } | ||
| 842 | return false; | ||
| 843 | } | ||
| 844 | |||
| 845 | template <class P> | ||
| 785 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 846 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { |
| 786 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); | 847 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); |
| 787 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | 848 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { |
| @@ -1425,6 +1486,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1425 | const VAddr end_address = start_address + range_size; | 1486 | const VAddr end_address = start_address + range_size; |
| 1426 | ForEachWrittenRange(start_address, range_size, add_download); | 1487 | ForEachWrittenRange(start_address, range_size, add_download); |
| 1427 | const IntervalType subtract_interval{start_address, end_address}; | 1488 | const IntervalType subtract_interval{start_address, end_address}; |
| 1489 | ClearDownload(subtract_interval); | ||
| 1428 | common_ranges.subtract(subtract_interval); | 1490 | common_ranges.subtract(subtract_interval); |
| 1429 | }); | 1491 | }); |
| 1430 | if (total_size_bytes == 0) { | 1492 | if (total_size_bytes == 0) { |