summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-07-10 18:19:10 +0200
committerGravatar Fernando Sahmkow2021-07-11 01:33:17 +0200
commitbe1a3f7a0fb330b7cc5ac007ccb2cb73d4795602 (patch)
tree1e32f15d4c4ad9917c6fc86ddad53fdd85603859 /src/video_core/buffer_cache
parentMerge pull request #6557 from FernandoS27/staceys-mom-has-got-it-goin-on (diff)
downloadyuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.gz
yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.xz
yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.zip
accelerateDMA: Accelerate Buffer Copies.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h81
1 files changed, 80 insertions, 1 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 502feddba..c73ebb1f4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -164,6 +164,8 @@ public:
164 /// Pop asynchronous downloads 164 /// Pop asynchronous downloads
165 void PopAsyncFlushes(); 165 void PopAsyncFlushes();
166 166
167 [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
168
167 /// Return true when a CPU region is modified from the GPU 169 /// Return true when a CPU region is modified from the GPU
168 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 170 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
169 171
@@ -431,6 +433,83 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
431} 433}
432 434
433template <class P> 435template <class P>
436bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
437 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
438 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
439 if (!cpu_src_address || !cpu_dest_address) {
440 return false;
441 }
442 const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount);
443 const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount);
444 if (!(source_dirty || dest_dirty)) {
445 return false;
446 }
447
448 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
449 common_ranges.subtract(subtract_interval);
450
451 BufferId buffer_a;
452 BufferId buffer_b;
453 do {
454 has_deleted_buffers = false;
455 buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
456 buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
457 } while (has_deleted_buffers);
458 auto& src_buffer = slot_buffers[buffer_a];
459 auto& dest_buffer = slot_buffers[buffer_b];
460 SynchronizeBuffer(src_buffer, *cpu_src_address, amount);
461 SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount);
462 std::array copies{BufferCopy{
463 .src_offset = src_buffer.Offset(*cpu_src_address),
464 .dst_offset = dest_buffer.Offset(*cpu_dest_address),
465 .size = amount,
466 }};
467
468 auto mirror = [&](VAddr base_address, u64 size) {
469 VAddr diff = base_address - *cpu_src_address;
470 VAddr new_base_address = *cpu_dest_address + diff;
471 const IntervalType add_interval{new_base_address, new_base_address + size};
472 common_ranges.add(add_interval);
473 };
474
475 const VAddr start_address = *cpu_src_address;
476 const VAddr end_address = start_address + amount;
477 const IntervalType search_interval{start_address - amount, 1};
478 auto it = common_ranges.lower_bound(search_interval);
479 if (it == common_ranges.end()) {
480 it = common_ranges.begin();
481 }
482 while (it != common_ranges.end()) {
483 VAddr inter_addr_end = it->upper();
484 VAddr inter_addr = it->lower();
485 if (inter_addr >= end_address) {
486 break;
487 }
488 if (inter_addr_end <= start_address) {
489 it++;
490 continue;
491 }
492 if (inter_addr_end > end_address) {
493 inter_addr_end = end_address;
494 }
495 if (inter_addr < start_address) {
496 inter_addr = start_address;
497 }
498 mirror(inter_addr, inter_addr_end - inter_addr);
499 it++;
500 }
501
502 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
503 if (source_dirty) {
504 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
505 }
506 std::vector<u8> tmp_buffer(amount);
507 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
508 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
509 return true;
510}
511
512template <class P>
434void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 513void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
435 u32 size) { 514 u32 size) {
436 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 515 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -951,7 +1030,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
951 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1030 const GPUVAddr gpu_addr_end = index_array.EndAddress();
952 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1031 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
953 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1032 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
954 const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); 1033 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
955 const u32 size = std::min(address_size, draw_size); 1034 const u32 size = std::min(address_size, draw_size);
956 if (size == 0 || !cpu_addr) { 1035 if (size == 0 || !cpu_addr) {
957 index_buffer = NULL_BINDING; 1036 index_buffer = NULL_BINDING;