diff options
| author | 2021-07-12 12:49:11 -0400 | |
|---|---|---|
| committer | 2021-07-12 12:49:11 -0400 | |
| commit | 776f391ff6cb37e63241d3513a140662c5a69f08 (patch) | |
| tree | 9cf19af1b1826261a27a75a744c6415271dadd2d /src/video_core/buffer_cache | |
| parent | Merge pull request #6577 from ReinUsesLisp/precommit (diff) | |
| parent | accelerateDMA: Fixes and feedback. (diff) | |
| download | yuzu-776f391ff6cb37e63241d3513a140662c5a69f08.tar.gz yuzu-776f391ff6cb37e63241d3513a140662c5a69f08.tar.xz yuzu-776f391ff6cb37e63241d3513a140662c5a69f08.zip | |
Merge pull request #6597 from FernandoS27/accelerate-dma
DMAEngine: Introduce Accelerate DMA.
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 150 |
1 files changed, 100 insertions, 50 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 502feddba..2871682f6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -164,6 +164,8 @@ public: | |||
| 164 | /// Pop asynchronous downloads | 164 | /// Pop asynchronous downloads |
| 165 | void PopAsyncFlushes(); | 165 | void PopAsyncFlushes(); |
| 166 | 166 | ||
| 167 | [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 168 | |||
| 167 | /// Return true when a CPU region is modified from the GPU | 169 | /// Return true when a CPU region is modified from the GPU |
| 168 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 170 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 169 | 171 | ||
| @@ -200,6 +202,36 @@ private: | |||
| 200 | } | 202 | } |
| 201 | } | 203 | } |
| 202 | 204 | ||
| 205 | template <typename Func> | ||
| 206 | void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 207 | const VAddr start_address = cpu_addr; | ||
| 208 | const VAddr end_address = start_address + size; | ||
| 209 | const VAddr search_base = | ||
| 210 | static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size))); | ||
| 211 | const IntervalType search_interval{search_base, search_base + 1}; | ||
| 212 | auto it = common_ranges.lower_bound(search_interval); | ||
| 213 | if (it == common_ranges.end()) { | ||
| 214 | it = common_ranges.begin(); | ||
| 215 | } | ||
| 216 | for (; it != common_ranges.end(); it++) { | ||
| 217 | VAddr inter_addr_end = it->upper(); | ||
| 218 | VAddr inter_addr = it->lower(); | ||
| 219 | if (inter_addr >= end_address) { | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | if (inter_addr_end <= start_address) { | ||
| 223 | continue; | ||
| 224 | } | ||
| 225 | if (inter_addr_end > end_address) { | ||
| 226 | inter_addr_end = end_address; | ||
| 227 | } | ||
| 228 | if (inter_addr < start_address) { | ||
| 229 | inter_addr = start_address; | ||
| 230 | } | ||
| 231 | func(inter_addr, inter_addr_end); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 203 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | 235 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |
| 204 | return (cpu_addr & ~Core::Memory::PAGE_MASK) == | 236 | return (cpu_addr & ~Core::Memory::PAGE_MASK) == |
| 205 | ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); | 237 | ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); |
| @@ -431,6 +463,68 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | |||
| 431 | } | 463 | } |
| 432 | 464 | ||
| 433 | template <class P> | 465 | template <class P> |
| 466 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||
| 467 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | ||
| 468 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | ||
| 469 | if (!cpu_src_address || !cpu_dest_address) { | ||
| 470 | return false; | ||
| 471 | } | ||
| 472 | const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | ||
| 473 | const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | ||
| 474 | if (!source_dirty && !dest_dirty) { | ||
| 475 | return false; | ||
| 476 | } | ||
| 477 | |||
| 478 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | ||
| 479 | uncommitted_ranges.subtract(subtract_interval); | ||
| 480 | for (auto& interval_set : committed_ranges) { | ||
| 481 | interval_set.subtract(subtract_interval); | ||
| 482 | } | ||
| 483 | |||
| 484 | BufferId buffer_a; | ||
| 485 | BufferId buffer_b; | ||
| 486 | do { | ||
| 487 | has_deleted_buffers = false; | ||
| 488 | buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); | ||
| 489 | buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); | ||
| 490 | } while (has_deleted_buffers); | ||
| 491 | auto& src_buffer = slot_buffers[buffer_a]; | ||
| 492 | auto& dest_buffer = slot_buffers[buffer_b]; | ||
| 493 | SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); | ||
| 494 | SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount)); | ||
| 495 | std::array copies{BufferCopy{ | ||
| 496 | .src_offset = src_buffer.Offset(*cpu_src_address), | ||
| 497 | .dst_offset = dest_buffer.Offset(*cpu_dest_address), | ||
| 498 | .size = amount, | ||
| 499 | }}; | ||
| 500 | |||
| 501 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | ||
| 502 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | ||
| 503 | const u64 size = base_address_end - base_address; | ||
| 504 | const VAddr diff = base_address - *cpu_src_address; | ||
| 505 | const VAddr new_base_address = *cpu_dest_address + diff; | ||
| 506 | const IntervalType add_interval{new_base_address, new_base_address + size}; | ||
| 507 | uncommitted_ranges.add(add_interval); | ||
| 508 | tmp_intervals.push_back(add_interval); | ||
| 509 | }; | ||
| 510 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | ||
| 511 | // This subtraction in this order is important for overlapping copies. | ||
| 512 | common_ranges.subtract(subtract_interval); | ||
| 513 | for (const IntervalType add_interval : tmp_intervals) { | ||
| 514 | common_ranges.add(add_interval); | ||
| 515 | } | ||
| 516 | |||
| 517 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | ||
| 518 | if (source_dirty) { | ||
| 519 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | ||
| 520 | } | ||
| 521 | std::vector<u8> tmp_buffer(amount); | ||
| 522 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | ||
| 523 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | ||
| 524 | return true; | ||
| 525 | } | ||
| 526 | |||
| 527 | template <class P> | ||
| 434 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 528 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 435 | u32 size) { | 529 | u32 size) { |
| 436 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 530 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| @@ -616,30 +710,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 616 | 710 | ||
| 617 | const VAddr start_address = buffer_addr + range_offset; | 711 | const VAddr start_address = buffer_addr + range_offset; |
| 618 | const VAddr end_address = start_address + range_size; | 712 | const VAddr end_address = start_address + range_size; |
| 619 | const IntervalType search_interval{cpu_addr, 1}; | 713 | ForEachWrittenRange(start_address, range_size, add_download); |
| 620 | auto it = common_ranges.lower_bound(search_interval); | ||
| 621 | if (it == common_ranges.end()) { | ||
| 622 | it = common_ranges.begin(); | ||
| 623 | } | ||
| 624 | while (it != common_ranges.end()) { | ||
| 625 | VAddr inter_addr_end = it->upper(); | ||
| 626 | VAddr inter_addr = it->lower(); | ||
| 627 | if (inter_addr >= end_address) { | ||
| 628 | break; | ||
| 629 | } | ||
| 630 | if (inter_addr_end <= start_address) { | ||
| 631 | it++; | ||
| 632 | continue; | ||
| 633 | } | ||
| 634 | if (inter_addr_end > end_address) { | ||
| 635 | inter_addr_end = end_address; | ||
| 636 | } | ||
| 637 | if (inter_addr < start_address) { | ||
| 638 | inter_addr = start_address; | ||
| 639 | } | ||
| 640 | add_download(inter_addr, inter_addr_end); | ||
| 641 | it++; | ||
| 642 | } | ||
| 643 | const IntervalType subtract_interval{start_address, end_address}; | 714 | const IntervalType subtract_interval{start_address, end_address}; |
| 644 | common_ranges.subtract(subtract_interval); | 715 | common_ranges.subtract(subtract_interval); |
| 645 | }); | 716 | }); |
| @@ -737,7 +808,9 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 737 | const u32 size = index_buffer.size; | 808 | const u32 size = index_buffer.size; |
| 738 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 809 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| 739 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 810 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 740 | runtime.BindIndexBuffer(buffer, offset, size); | 811 | const u32 new_offset = offset + maxwell3d.regs.index_array.first * |
| 812 | maxwell3d.regs.index_array.FormatSizeInBytes(); | ||
| 813 | runtime.BindIndexBuffer(buffer, new_offset, size); | ||
| 741 | } else { | 814 | } else { |
| 742 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | 815 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, |
| 743 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | 816 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, |
| @@ -951,7 +1024,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 951 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1024 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); |
| 952 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1025 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); |
| 953 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1026 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 954 | const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); | 1027 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); |
| 955 | const u32 size = std::min(address_size, draw_size); | 1028 | const u32 size = std::min(address_size, draw_size); |
| 956 | if (size == 0 || !cpu_addr) { | 1029 | if (size == 0 || !cpu_addr) { |
| 957 | index_buffer = NULL_BINDING; | 1030 | index_buffer = NULL_BINDING; |
| @@ -1350,30 +1423,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1350 | 1423 | ||
| 1351 | const VAddr start_address = buffer_addr + range_offset; | 1424 | const VAddr start_address = buffer_addr + range_offset; |
| 1352 | const VAddr end_address = start_address + range_size; | 1425 | const VAddr end_address = start_address + range_size; |
| 1353 | const IntervalType search_interval{start_address - range_size, 1}; | 1426 | ForEachWrittenRange(start_address, range_size, add_download); |
| 1354 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1355 | if (it == common_ranges.end()) { | ||
| 1356 | it = common_ranges.begin(); | ||
| 1357 | } | ||
| 1358 | while (it != common_ranges.end()) { | ||
| 1359 | VAddr inter_addr_end = it->upper(); | ||
| 1360 | VAddr inter_addr = it->lower(); | ||
| 1361 | if (inter_addr >= end_address) { | ||
| 1362 | break; | ||
| 1363 | } | ||
| 1364 | if (inter_addr_end <= start_address) { | ||
| 1365 | it++; | ||
| 1366 | continue; | ||
| 1367 | } | ||
| 1368 | if (inter_addr_end > end_address) { | ||
| 1369 | inter_addr_end = end_address; | ||
| 1370 | } | ||
| 1371 | if (inter_addr < start_address) { | ||
| 1372 | inter_addr = start_address; | ||
| 1373 | } | ||
| 1374 | add_download(inter_addr, inter_addr_end); | ||
| 1375 | it++; | ||
| 1376 | } | ||
| 1377 | const IntervalType subtract_interval{start_address, end_address}; | 1427 | const IntervalType subtract_interval{start_address, end_address}; |
| 1378 | common_ranges.subtract(subtract_interval); | 1428 | common_ranges.subtract(subtract_interval); |
| 1379 | }); | 1429 | }); |