summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Ameer J2021-07-12 12:49:11 -0400
committerGravatar GitHub2021-07-12 12:49:11 -0400
commit776f391ff6cb37e63241d3513a140662c5a69f08 (patch)
tree9cf19af1b1826261a27a75a744c6415271dadd2d /src/video_core/buffer_cache
parentMerge pull request #6577 from ReinUsesLisp/precommit (diff)
parentaccelerateDMA: Fixes and feedback. (diff)
downloadyuzu-776f391ff6cb37e63241d3513a140662c5a69f08.tar.gz
yuzu-776f391ff6cb37e63241d3513a140662c5a69f08.tar.xz
yuzu-776f391ff6cb37e63241d3513a140662c5a69f08.zip
Merge pull request #6597 from FernandoS27/accelerate-dma
DMAEngine: Introduce Accelerate DMA.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h150
1 files changed, 100 insertions, 50 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 502feddba..2871682f6 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -164,6 +164,8 @@ public:
164 /// Pop asynchronous downloads 164 /// Pop asynchronous downloads
165 void PopAsyncFlushes(); 165 void PopAsyncFlushes();
166 166
167 [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
168
167 /// Return true when a CPU region is modified from the GPU 169 /// Return true when a CPU region is modified from the GPU
168 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 170 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
169 171
@@ -200,6 +202,36 @@ private:
200 } 202 }
201 } 203 }
202 204
205 template <typename Func>
206 void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
207 const VAddr start_address = cpu_addr;
208 const VAddr end_address = start_address + size;
209 const VAddr search_base =
210 static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
211 const IntervalType search_interval{search_base, search_base + 1};
212 auto it = common_ranges.lower_bound(search_interval);
213 if (it == common_ranges.end()) {
214 it = common_ranges.begin();
215 }
216 for (; it != common_ranges.end(); it++) {
217 VAddr inter_addr_end = it->upper();
218 VAddr inter_addr = it->lower();
219 if (inter_addr >= end_address) {
220 break;
221 }
222 if (inter_addr_end <= start_address) {
223 continue;
224 }
225 if (inter_addr_end > end_address) {
226 inter_addr_end = end_address;
227 }
228 if (inter_addr < start_address) {
229 inter_addr = start_address;
230 }
231 func(inter_addr, inter_addr_end);
232 }
233 }
234
203 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 235 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
204 return (cpu_addr & ~Core::Memory::PAGE_MASK) == 236 return (cpu_addr & ~Core::Memory::PAGE_MASK) ==
205 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); 237 ((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
@@ -431,6 +463,68 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
431} 463}
432 464
433template <class P> 465template <class P>
466bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
467 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
468 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
469 if (!cpu_src_address || !cpu_dest_address) {
470 return false;
471 }
472 const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount);
473 const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount);
474 if (!source_dirty && !dest_dirty) {
475 return false;
476 }
477
478 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
479 uncommitted_ranges.subtract(subtract_interval);
480 for (auto& interval_set : committed_ranges) {
481 interval_set.subtract(subtract_interval);
482 }
483
484 BufferId buffer_a;
485 BufferId buffer_b;
486 do {
487 has_deleted_buffers = false;
488 buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
489 buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
490 } while (has_deleted_buffers);
491 auto& src_buffer = slot_buffers[buffer_a];
492 auto& dest_buffer = slot_buffers[buffer_b];
493 SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
494 SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
495 std::array copies{BufferCopy{
496 .src_offset = src_buffer.Offset(*cpu_src_address),
497 .dst_offset = dest_buffer.Offset(*cpu_dest_address),
498 .size = amount,
499 }};
500
501 boost::container::small_vector<IntervalType, 4> tmp_intervals;
502 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
503 const u64 size = base_address_end - base_address;
504 const VAddr diff = base_address - *cpu_src_address;
505 const VAddr new_base_address = *cpu_dest_address + diff;
506 const IntervalType add_interval{new_base_address, new_base_address + size};
507 uncommitted_ranges.add(add_interval);
508 tmp_intervals.push_back(add_interval);
509 };
510 ForEachWrittenRange(*cpu_src_address, amount, mirror);
511 // This subtraction in this order is important for overlapping copies.
512 common_ranges.subtract(subtract_interval);
513 for (const IntervalType add_interval : tmp_intervals) {
514 common_ranges.add(add_interval);
515 }
516
517 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
518 if (source_dirty) {
519 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount);
520 }
521 std::vector<u8> tmp_buffer(amount);
522 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
523 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
524 return true;
525}
526
527template <class P>
434void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 528void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
435 u32 size) { 529 u32 size) {
436 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 530 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
@@ -616,30 +710,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
616 710
617 const VAddr start_address = buffer_addr + range_offset; 711 const VAddr start_address = buffer_addr + range_offset;
618 const VAddr end_address = start_address + range_size; 712 const VAddr end_address = start_address + range_size;
619 const IntervalType search_interval{cpu_addr, 1}; 713 ForEachWrittenRange(start_address, range_size, add_download);
620 auto it = common_ranges.lower_bound(search_interval);
621 if (it == common_ranges.end()) {
622 it = common_ranges.begin();
623 }
624 while (it != common_ranges.end()) {
625 VAddr inter_addr_end = it->upper();
626 VAddr inter_addr = it->lower();
627 if (inter_addr >= end_address) {
628 break;
629 }
630 if (inter_addr_end <= start_address) {
631 it++;
632 continue;
633 }
634 if (inter_addr_end > end_address) {
635 inter_addr_end = end_address;
636 }
637 if (inter_addr < start_address) {
638 inter_addr = start_address;
639 }
640 add_download(inter_addr, inter_addr_end);
641 it++;
642 }
643 const IntervalType subtract_interval{start_address, end_address}; 714 const IntervalType subtract_interval{start_address, end_address};
644 common_ranges.subtract(subtract_interval); 715 common_ranges.subtract(subtract_interval);
645 }); 716 });
@@ -737,7 +808,9 @@ void BufferCache<P>::BindHostIndexBuffer() {
737 const u32 size = index_buffer.size; 808 const u32 size = index_buffer.size;
738 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 809 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
739 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 810 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
740 runtime.BindIndexBuffer(buffer, offset, size); 811 const u32 new_offset = offset + maxwell3d.regs.index_array.first *
812 maxwell3d.regs.index_array.FormatSizeInBytes();
813 runtime.BindIndexBuffer(buffer, new_offset, size);
741 } else { 814 } else {
742 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, 815 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
743 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, 816 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
@@ -951,7 +1024,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
951 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1024 const GPUVAddr gpu_addr_end = index_array.EndAddress();
952 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1025 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
953 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1026 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
954 const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); 1027 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
955 const u32 size = std::min(address_size, draw_size); 1028 const u32 size = std::min(address_size, draw_size);
956 if (size == 0 || !cpu_addr) { 1029 if (size == 0 || !cpu_addr) {
957 index_buffer = NULL_BINDING; 1030 index_buffer = NULL_BINDING;
@@ -1350,30 +1423,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1350 1423
1351 const VAddr start_address = buffer_addr + range_offset; 1424 const VAddr start_address = buffer_addr + range_offset;
1352 const VAddr end_address = start_address + range_size; 1425 const VAddr end_address = start_address + range_size;
1353 const IntervalType search_interval{start_address - range_size, 1}; 1426 ForEachWrittenRange(start_address, range_size, add_download);
1354 auto it = common_ranges.lower_bound(search_interval);
1355 if (it == common_ranges.end()) {
1356 it = common_ranges.begin();
1357 }
1358 while (it != common_ranges.end()) {
1359 VAddr inter_addr_end = it->upper();
1360 VAddr inter_addr = it->lower();
1361 if (inter_addr >= end_address) {
1362 break;
1363 }
1364 if (inter_addr_end <= start_address) {
1365 it++;
1366 continue;
1367 }
1368 if (inter_addr_end > end_address) {
1369 inter_addr_end = end_address;
1370 }
1371 if (inter_addr < start_address) {
1372 inter_addr = start_address;
1373 }
1374 add_download(inter_addr, inter_addr_end);
1375 it++;
1376 }
1377 const IntervalType subtract_interval{start_address, end_address}; 1427 const IntervalType subtract_interval{start_address, end_address};
1378 common_ranges.subtract(subtract_interval); 1428 common_ranges.subtract(subtract_interval);
1379 }); 1429 });