diff options
| author | 2022-01-29 22:00:49 +0100 | |
|---|---|---|
| committer | 2022-01-29 22:53:27 +0100 | |
| commit | 4258d515e69d5f2d40301a1d1592304c12fd8126 (patch) | |
| tree | 7b9ac154bb75d9ea79e0425fda2ee3db587d6d2e /src/video_core/buffer_cache | |
| parent | Inline2Memory: Flush before writting buffer. (diff) | |
| download | yuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.tar.gz yuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.tar.xz yuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.zip | |
Rasterizer: Implement Inline2Memory Acceleration.
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 048dba4f3..792c3820a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -131,6 +131,8 @@ public: | |||
| 131 | 131 | ||
| 132 | void DownloadMemory(VAddr cpu_addr, u64 size); | 132 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 133 | 133 | ||
| 134 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); | ||
| 135 | |||
| 134 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 136 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 135 | 137 | ||
| 136 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | 138 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); |
| @@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 808 | return; | 810 | return; |
| 809 | } | 811 | } |
| 810 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 812 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 813 | const bool is_accuracy_normal = | ||
| 814 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 811 | 815 | ||
| 812 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 816 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
| 813 | u64 total_size_bytes = 0; | 817 | u64 total_size_bytes = 0; |
| @@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 819 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 823 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 820 | buffer.ForEachDownloadRangeAndClear( | 824 | buffer.ForEachDownloadRangeAndClear( |
| 821 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 825 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
| 826 | if (is_accuracy_normal) { | ||
| 827 | return; | ||
| 828 | } | ||
| 822 | const VAddr buffer_addr = buffer.CpuAddr(); | 829 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 823 | const auto add_download = [&](VAddr start, VAddr end) { | 830 | const auto add_download = [&](VAddr start, VAddr end) { |
| 824 | const u64 new_offset = start - buffer_addr; | 831 | const u64 new_offset = start - buffer_addr; |
| @@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
| 1417 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1424 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
| 1418 | common_ranges.add(base_interval); | 1425 | common_ranges.add(base_interval); |
| 1419 | 1426 | ||
| 1420 | const bool is_accuracy_high = | ||
| 1421 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 1422 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | 1427 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| 1423 | if (!is_async && !is_accuracy_high) { | 1428 | if (!is_async) { |
| 1424 | return; | 1429 | return; |
| 1425 | } | 1430 | } |
| 1426 | uncommitted_ranges.add(base_interval); | 1431 | uncommitted_ranges.add(base_interval); |
| @@ -1644,6 +1649,41 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1644 | } | 1649 | } |
| 1645 | 1650 | ||
| 1646 | template <class P> | 1651 | template <class P> |
| 1652 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | ||
| 1653 | std::span<u8> inlined_buffer) { | ||
| 1654 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | ||
| 1655 | if (!is_dirty) { | ||
| 1656 | return false; | ||
| 1657 | } | ||
| 1658 | if (!IsRegionGpuModified(dest_address, copy_size)) { | ||
| 1659 | return false; | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | ||
| 1663 | ClearDownload(subtract_interval); | ||
| 1664 | |||
| 1665 | BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); | ||
| 1666 | auto& buffer = slot_buffers[buffer_id]; | ||
| 1667 | SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); | ||
| 1668 | |||
| 1669 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1670 | std::array copies{BufferCopy{ | ||
| 1671 | .src_offset = 0, | ||
| 1672 | .dst_offset = buffer.Offset(dest_address), | ||
| 1673 | .size = copy_size, | ||
| 1674 | }}; | ||
| 1675 | auto upload_staging = runtime.UploadStagingBuffer(copy_size); | ||
| 1676 | u8* const src_pointer = upload_staging.mapped_span.data(); | ||
| 1677 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | ||
| 1678 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1679 | } else { | ||
| 1680 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer); | ||
| 1681 | } | ||
| 1682 | |||
| 1683 | return true; | ||
| 1684 | } | ||
| 1685 | |||
| 1686 | template <class P> | ||
| 1647 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | 1687 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { |
| 1648 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | 1688 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); |
| 1649 | } | 1689 | } |