summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-01-29 22:00:49 +0100
committerGravatar Fernando Sahmkow2022-01-29 22:53:27 +0100
commit4258d515e69d5f2d40301a1d1592304c12fd8126 (patch)
tree7b9ac154bb75d9ea79e0425fda2ee3db587d6d2e /src/video_core/buffer_cache
parentInline2Memory: Flush before writting buffer. (diff)
downloadyuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.tar.gz
yuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.tar.xz
yuzu-4258d515e69d5f2d40301a1d1592304c12fd8126.zip
Rasterizer: Implement Inline2Memory Acceleration.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h46
1 files changed, 43 insertions, 3 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 048dba4f3..792c3820a 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -131,6 +131,8 @@ public:
131 131
132 void DownloadMemory(VAddr cpu_addr, u64 size); 132 void DownloadMemory(VAddr cpu_addr, u64 size);
133 133
134 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer);
135
134 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); 136 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
135 137
136 void DisableGraphicsUniformBuffer(size_t stage, u32 index); 138 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
@@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
808 return; 810 return;
809 } 811 }
810 MICROPROFILE_SCOPE(GPU_DownloadMemory); 812 MICROPROFILE_SCOPE(GPU_DownloadMemory);
813 const bool is_accuracy_normal =
814 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
811 815
812 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; 816 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
813 u64 total_size_bytes = 0; 817 u64 total_size_bytes = 0;
@@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
819 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 823 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
820 buffer.ForEachDownloadRangeAndClear( 824 buffer.ForEachDownloadRangeAndClear(
821 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 825 cpu_addr, size, [&](u64 range_offset, u64 range_size) {
826 if (is_accuracy_normal) {
827 return;
828 }
822 const VAddr buffer_addr = buffer.CpuAddr(); 829 const VAddr buffer_addr = buffer.CpuAddr();
823 const auto add_download = [&](VAddr start, VAddr end) { 830 const auto add_download = [&](VAddr start, VAddr end) {
824 const u64 new_offset = start - buffer_addr; 831 const u64 new_offset = start - buffer_addr;
@@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1417 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1424 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1418 common_ranges.add(base_interval); 1425 common_ranges.add(base_interval);
1419 1426
1420 const bool is_accuracy_high =
1421 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
1422 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); 1427 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
1423 if (!is_async && !is_accuracy_high) { 1428 if (!is_async) {
1424 return; 1429 return;
1425 } 1430 }
1426 uncommitted_ranges.add(base_interval); 1431 uncommitted_ranges.add(base_interval);
@@ -1644,6 +1649,41 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1644} 1649}
1645 1650
1646template <class P> 1651template <class P>
1652bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1653 std::span<u8> inlined_buffer) {
1654 const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
1655 if (!is_dirty) {
1656 return false;
1657 }
1658 if (!IsRegionGpuModified(dest_address, copy_size)) {
1659 return false;
1660 }
1661
1662 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1663 ClearDownload(subtract_interval);
1664
1665 BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
1666 auto& buffer = slot_buffers[buffer_id];
1667 SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
1668
1669 if constexpr (USE_MEMORY_MAPS) {
1670 std::array copies{BufferCopy{
1671 .src_offset = 0,
1672 .dst_offset = buffer.Offset(dest_address),
1673 .size = copy_size,
1674 }};
1675 auto upload_staging = runtime.UploadStagingBuffer(copy_size);
1676 u8* const src_pointer = upload_staging.mapped_span.data();
1677 std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
1678 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1679 } else {
1680 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer);
1681 }
1682
1683 return true;
1684}
1685
1686template <class P>
1647void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { 1687void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
1648 DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); 1688 DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
1649} 1689}