From be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow
Date: Sat, 10 Jul 2021 18:19:10 +0200
Subject: accelerateDMA: Accelerate Buffer Copies.
---
src/video_core/buffer_cache/buffer_cache.h | 81 +++++++++++++++++++++++++++++-
1 file changed, 80 insertions(+), 1 deletion(-)
(limited to 'src/video_core/buffer_cache')
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 502feddba..c73ebb1f4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -164,6 +164,8 @@ public:
/// Pop asynchronous downloads
void PopAsyncFlushes();
+ [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
+
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
@@ -430,6 +432,83 @@ void BufferCache
::DownloadMemory(VAddr cpu_addr, u64 size) {
});
}
+template
+bool BufferCache::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ const std::optional cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
+ const std::optional cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+ if (!cpu_src_address || !cpu_dest_address) {
+ return false;
+ }
+ const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount);
+ const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount);
+ if (!(source_dirty || dest_dirty)) {
+ return false;
+ }
+
+ const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
+ common_ranges.subtract(subtract_interval);
+
+ BufferId buffer_a;
+ BufferId buffer_b;
+ do {
+ has_deleted_buffers = false;
+ buffer_a = FindBuffer(*cpu_src_address, static_cast(amount));
+ buffer_b = FindBuffer(*cpu_dest_address, static_cast(amount));
+ } while (has_deleted_buffers);
+ auto& src_buffer = slot_buffers[buffer_a];
+ auto& dest_buffer = slot_buffers[buffer_b];
+ SynchronizeBuffer(src_buffer, *cpu_src_address, amount);
+ SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount);
+ std::array copies{BufferCopy{
+ .src_offset = src_buffer.Offset(*cpu_src_address),
+ .dst_offset = dest_buffer.Offset(*cpu_dest_address),
+ .size = amount,
+ }};
+
+ auto mirror = [&](VAddr base_address, u64 size) {
+ VAddr diff = base_address - *cpu_src_address;
+ VAddr new_base_address = *cpu_dest_address + diff;
+ const IntervalType add_interval{new_base_address, new_base_address + size};
+ common_ranges.add(add_interval);
+ };
+
+ const VAddr start_address = *cpu_src_address;
+ const VAddr end_address = start_address + amount;
+ const IntervalType search_interval{start_address - amount, 1};
+ auto it = common_ranges.lower_bound(search_interval);
+ if (it == common_ranges.end()) {
+ it = common_ranges.begin();
+ }
+ while (it != common_ranges.end()) {
+ VAddr inter_addr_end = it->upper();
+ VAddr inter_addr = it->lower();
+ if (inter_addr >= end_address) {
+ break;
+ }
+ if (inter_addr_end <= start_address) {
+ it++;
+ continue;
+ }
+ if (inter_addr_end > end_address) {
+ inter_addr_end = end_address;
+ }
+ if (inter_addr < start_address) {
+ inter_addr = start_address;
+ }
+ mirror(inter_addr, inter_addr_end - inter_addr);
+ it++;
+ }
+
+ runtime.CopyBuffer(dest_buffer, src_buffer, copies);
+ if (source_dirty) {
+ dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
+ }
+ std::vector tmp_buffer(amount);
+ cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
+ cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
+ return true;
+}
+
template
void BufferCache::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
@@ -951,7 +1030,7 @@ void BufferCache
::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_end = index_array.EndAddress();
const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin);
- const u32 draw_size = index_array.count * index_array.FormatSizeInBytes();
+ const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
if (size == 0 || !cpu_addr) {
index_buffer = NULL_BINDING;
--
cgit v1.2.3