diff options
| author | 2021-07-13 03:33:08 +0200 | |
|---|---|---|
| committer | 2021-07-13 03:49:47 +0200 | |
| commit | b780d5b5c580a65a670de73140b743072efc0fd2 (patch) | |
| tree | ef6371d8107ab18932ac2514f3d498c62f1323e6 /src/video_core/buffer_cache | |
| parent | Merge pull request #6597 from FernandoS27/accelerate-dma (diff) | |
| download | yuzu-b780d5b5c580a65a670de73140b743072efc0fd2.tar.gz yuzu-b780d5b5c580a65a670de73140b743072efc0fd2.tar.xz yuzu-b780d5b5c580a65a670de73140b743072efc0fd2.zip | |
DMAEngine: Accelerate BufferClear
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 65 |
1 files changed, 61 insertions, 4 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2871682f6..5f5a59bba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -164,11 +164,16 @@ public: | |||
| 164 | /// Pop asynchronous downloads | 164 | /// Pop asynchronous downloads |
| 165 | void PopAsyncFlushes(); | 165 | void PopAsyncFlushes(); |
| 166 | 166 | ||
| 167 | [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | 167 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); |
| 168 | |||
| 169 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 168 | 170 | ||
| 169 | /// Return true when a CPU region is modified from the GPU | 171 | /// Return true when a CPU region is modified from the GPU |
| 170 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 172 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 171 | 173 | ||
| 174 | /// Return true when a region is registered on the cache | ||
| 175 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 176 | |||
| 172 | /// Return true when a CPU region is modified from the CPU | 177 | /// Return true when a CPU region is modified from the CPU |
| 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 178 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |
| 174 | 179 | ||
| @@ -469,8 +474,8 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 469 | if (!cpu_src_address || !cpu_dest_address) { | 474 | if (!cpu_src_address || !cpu_dest_address) { |
| 470 | return false; | 475 | return false; |
| 471 | } | 476 | } |
| 472 | const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | 477 | const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount); |
| 473 | const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | 478 | const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount); |
| 474 | if (!source_dirty && !dest_dirty) { | 479 | if (!source_dirty && !dest_dirty) { |
| 475 | return false; | 480 | return false; |
| 476 | } | 481 | } |
| @@ -515,7 +520,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 515 | } | 520 | } |
| 516 | 521 | ||
| 517 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 522 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 518 | if (source_dirty) { | 523 | if (IsRegionGpuModified(*cpu_src_address, amount)) { |
| 519 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 524 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 520 | } | 525 | } |
| 521 | std::vector<u8> tmp_buffer(amount); | 526 | std::vector<u8> tmp_buffer(amount); |
| @@ -525,6 +530,37 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 525 | } | 530 | } |
| 526 | 531 | ||
| 527 | template <class P> | 532 | template <class P> |
| 533 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | ||
| 534 | const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); | ||
| 535 | if (!cpu_dst_address) { | ||
| 536 | return false; | ||
| 537 | } | ||
| 538 | const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount); | ||
| 539 | if (!dest_dirty) { | ||
| 540 | return false; | ||
| 541 | } | ||
| 542 | |||
| 543 | const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; | ||
| 544 | uncommitted_ranges.subtract(subtract_interval); | ||
| 545 | for (auto& interval_set : committed_ranges) { | ||
| 546 | interval_set.subtract(subtract_interval); | ||
| 547 | } | ||
| 548 | common_ranges.subtract(subtract_interval); | ||
| 549 | |||
| 550 | const size_t size = amount * sizeof(u32); | ||
| 551 | BufferId buffer; | ||
| 552 | do { | ||
| 553 | has_deleted_buffers = false; | ||
| 554 | buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | ||
| 555 | } while (has_deleted_buffers); | ||
| 556 | |||
| 557 | auto& dest_buffer = slot_buffers[buffer]; | ||
| 558 | const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); | ||
| 559 | runtime.ClearBuffer(dest_buffer, offset, size, value); | ||
| 560 | return true; | ||
| 561 | } | ||
| 562 | |||
| 563 | template <class P> | ||
| 528 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 564 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 529 | u32 size) { | 565 | u32 size) { |
| 530 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 566 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| @@ -782,6 +818,27 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 782 | } | 818 | } |
| 783 | 819 | ||
| 784 | template <class P> | 820 | template <class P> |
| 821 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | ||
| 822 | const VAddr end_addr = addr + size; | ||
| 823 | const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 824 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | ||
| 825 | const BufferId buffer_id = page_table[page]; | ||
| 826 | if (!buffer_id) { | ||
| 827 | ++page; | ||
| 828 | continue; | ||
| 829 | } | ||
| 830 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 831 | const VAddr buf_start_addr = buffer.CpuAddr(); | ||
| 832 | const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); | ||
| 833 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | ||
| 834 | return true; | ||
| 835 | } | ||
| 836 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 837 | } | ||
| 838 | return false; | ||
| 839 | } | ||
| 840 | |||
| 841 | template <class P> | ||
| 785 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 842 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { |
| 786 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); | 843 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); |
| 787 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { | 844 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { |