summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-20 17:35:05 -0400
committerGravatar GitHub2021-07-20 17:35:05 -0400
commitc53b688411d916192820520ef5280ad5e378aa11 (patch)
tree6b319d4eaa3b7807b9590268f308b1c03fb5d3a1 /src/video_core/buffer_cache
parentMerge pull request #6658 from Morph1984/render-window-fix (diff)
parentBuffer cache: Fixes, Clang and Feedback. (diff)
downloadyuzu-c53b688411d916192820520ef5280ad5e378aa11.tar.gz
yuzu-c53b688411d916192820520ef5280ad5e378aa11.tar.xz
yuzu-c53b688411d916192820520ef5280ad5e378aa11.zip
Merge pull request #6629 from FernandoS27/accel-dma-2
DMAEngine: Accelerate BufferClear [accelerateDMA Part 2]
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h78
1 files changed, 70 insertions, 8 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2871682f6..7373cb62d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -164,11 +164,16 @@ public:
164 /// Pop asynchronous downloads 164 /// Pop asynchronous downloads
165 void PopAsyncFlushes(); 165 void PopAsyncFlushes();
166 166
167 [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); 167 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
168
169 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
168 170
169 /// Return true when a CPU region is modified from the GPU 171 /// Return true when a CPU region is modified from the GPU
170 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 172 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
171 173
174 /// Return true when a region is registered on the cache
175 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
176
172 /// Return true when a CPU region is modified from the CPU 177 /// Return true when a CPU region is modified from the CPU
173 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 178 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
174 179
@@ -324,6 +329,8 @@ private:
324 329
325 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; 330 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
326 331
332 void ClearDownload(IntervalType subtract_interval);
333
327 VideoCore::RasterizerInterface& rasterizer; 334 VideoCore::RasterizerInterface& rasterizer;
328 Tegra::Engines::Maxwell3D& maxwell3d; 335 Tegra::Engines::Maxwell3D& maxwell3d;
329 Tegra::Engines::KeplerCompute& kepler_compute; 336 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -463,23 +470,28 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
463} 470}
464 471
465template <class P> 472template <class P>
473void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
474 uncommitted_ranges.subtract(subtract_interval);
475 for (auto& interval_set : committed_ranges) {
476 interval_set.subtract(subtract_interval);
477 }
478}
479
480template <class P>
466bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 481bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
467 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); 482 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
468 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); 483 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
469 if (!cpu_src_address || !cpu_dest_address) { 484 if (!cpu_src_address || !cpu_dest_address) {
470 return false; 485 return false;
471 } 486 }
472 const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); 487 const bool source_dirty = IsRegionRegistered(*cpu_src_address, amount);
473 const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); 488 const bool dest_dirty = IsRegionRegistered(*cpu_dest_address, amount);
474 if (!source_dirty && !dest_dirty) { 489 if (!source_dirty && !dest_dirty) {
475 return false; 490 return false;
476 } 491 }
477 492
478 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 493 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
479 uncommitted_ranges.subtract(subtract_interval); 494 ClearDownload(subtract_interval);
480 for (auto& interval_set : committed_ranges) {
481 interval_set.subtract(subtract_interval);
482 }
483 495
484 BufferId buffer_a; 496 BufferId buffer_a;
485 BufferId buffer_b; 497 BufferId buffer_b;
@@ -510,12 +522,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
510 ForEachWrittenRange(*cpu_src_address, amount, mirror); 522 ForEachWrittenRange(*cpu_src_address, amount, mirror);
511 // This subtraction in this order is important for overlapping copies. 523 // This subtraction in this order is important for overlapping copies.
512 common_ranges.subtract(subtract_interval); 524 common_ranges.subtract(subtract_interval);
525 bool atleast_1_download = tmp_intervals.size() != 0;
513 for (const IntervalType add_interval : tmp_intervals) { 526 for (const IntervalType add_interval : tmp_intervals) {
514 common_ranges.add(add_interval); 527 common_ranges.add(add_interval);
515 } 528 }
516 529
517 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 530 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
518 if (source_dirty) { 531 if (atleast_1_download) {
519 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); 532 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
520 } 533 }
521 std::vector<u8> tmp_buffer(amount); 534 std::vector<u8> tmp_buffer(amount);
@@ -525,6 +538,33 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
525} 538}
526 539
527template <class P> 540template <class P>
541bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
542 const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
543 if (!cpu_dst_address) {
544 return false;
545 }
546 const bool dest_dirty = IsRegionRegistered(*cpu_dst_address, amount);
547 if (!dest_dirty) {
548 return false;
549 }
550
551 const size_t size = amount * sizeof(u32);
552 const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size};
553 ClearDownload(subtract_interval);
554 common_ranges.subtract(subtract_interval);
555
556 BufferId buffer;
557 do {
558 has_deleted_buffers = false;
559 buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
560 } while (has_deleted_buffers);
561 auto& dest_buffer = slot_buffers[buffer];
562 const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr());
563 runtime.ClearBuffer(dest_buffer, offset, size, value);
564 return true;
565}
566
567template <class P>
528void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 568void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
529 u32 size) { 569 u32 size) {
530 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 570 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -782,6 +822,27 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
782} 822}
783 823
784template <class P> 824template <class P>
825bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
826 const VAddr end_addr = addr + size;
827 const u64 page_end = Common::DivCeil(end_addr, PAGE_SIZE);
828 for (u64 page = addr >> PAGE_BITS; page < page_end;) {
829 const BufferId buffer_id = page_table[page];
830 if (!buffer_id) {
831 ++page;
832 continue;
833 }
834 Buffer& buffer = slot_buffers[buffer_id];
835 const VAddr buf_start_addr = buffer.CpuAddr();
836 const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
837 if (buf_start_addr < end_addr && addr < buf_end_addr) {
838 return true;
839 }
840 page = Common::DivCeil(end_addr, PAGE_SIZE);
841 }
842 return false;
843}
844
845template <class P>
785bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 846bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
786 const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); 847 const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
787 for (u64 page = addr >> PAGE_BITS; page < page_end;) { 848 for (u64 page = addr >> PAGE_BITS; page < page_end;) {
@@ -1425,6 +1486,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1425 const VAddr end_address = start_address + range_size; 1486 const VAddr end_address = start_address + range_size;
1426 ForEachWrittenRange(start_address, range_size, add_download); 1487 ForEachWrittenRange(start_address, range_size, add_download);
1427 const IntervalType subtract_interval{start_address, end_address}; 1488 const IntervalType subtract_interval{start_address, end_address};
1489 ClearDownload(subtract_interval);
1428 common_ranges.subtract(subtract_interval); 1490 common_ranges.subtract(subtract_interval);
1429 }); 1491 });
1430 if (total_size_bytes == 0) { 1492 if (total_size_bytes == 0) {