summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-16 16:20:18 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:24 -0300
commit35df1d1864ba721ea7b1cebf9a106dd771cde4f5 (patch)
tree034a8281294246e2a8eea92d1937607ad00ed428 /src/video_core/buffer_cache
parentvulkan_device: Enable robustBufferAccess (diff)
downloadyuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.gz
yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.xz
yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.zip
vk_staging_buffer_pool: Add stream buffer for small uploads
This uses a ring buffer similar to OpenGL's stream buffer for small uploads. This stops us from allocating several small buffers, reducing memory fragmentation and cache locality. It uses dedicated allocations when possible.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h28
1 files changed, 20 insertions, 8 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index e4f3c8e35..d6399bf24 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -239,8 +239,7 @@ private:
239 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, 239 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
240 std::span<const BufferCopy> copies); 240 std::span<const BufferCopy> copies);
241 241
242 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 242 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
243 std::span<const BufferCopy> copies);
244 243
245 void DeleteBuffer(BufferId buffer_id); 244 void DeleteBuffer(BufferId buffer_id);
246 245
@@ -362,11 +361,17 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
362 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 361 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
363 const u8* const mapped_memory = download_staging.mapped_span.data(); 362 const u8* const mapped_memory = download_staging.mapped_span.data();
364 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); 363 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
364 for (BufferCopy& copy : copies) {
365 // Modify copies to have the staging offset in mind
366 copy.dst_offset += download_staging.offset;
367 }
365 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); 368 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
366 runtime.Finish(); 369 runtime.Finish();
367 for (const BufferCopy& copy : copies) { 370 for (const BufferCopy& copy : copies) {
368 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 371 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
369 const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; 372 // Undo the modified offset
373 const u64 dst_offset = copy.dst_offset - download_staging.offset;
374 const u8* copy_mapped_memory = mapped_memory + dst_offset;
370 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); 375 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
371 } 376 }
372 } else { 377 } else {
@@ -554,7 +559,9 @@ void BufferCache<P>::PopAsyncFlushes() {
554 } 559 }
555 if constexpr (USE_MEMORY_MAPS) { 560 if constexpr (USE_MEMORY_MAPS) {
556 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 561 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
557 for (const auto [copy, buffer_id] : downloads) { 562 for (auto& [copy, buffer_id] : downloads) {
563 // Have in mind the staging buffer offset for the copy
564 copy.dst_offset += download_staging.offset;
558 const std::array copies{copy}; 565 const std::array copies{copy};
559 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); 566 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
560 } 567 }
@@ -562,7 +569,9 @@ void BufferCache<P>::PopAsyncFlushes() {
562 for (const auto [copy, buffer_id] : downloads) { 569 for (const auto [copy, buffer_id] : downloads) {
563 const Buffer& buffer = slot_buffers[buffer_id]; 570 const Buffer& buffer = slot_buffers[buffer_id];
564 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 571 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
565 const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; 572 // Undo the modified offset
573 const u64 dst_offset = copy.dst_offset - download_staging.offset;
574 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
566 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 575 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
567 } 576 }
568 } else { 577 } else {
@@ -1117,13 +1126,16 @@ void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
1117 1126
1118template <class P> 1127template <class P>
1119void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 1128void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1120 std::span<const BufferCopy> copies) { 1129 std::span<BufferCopy> copies) {
1121 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); 1130 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1122 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1131 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1123 for (const BufferCopy& copy : copies) { 1132 for (BufferCopy& copy : copies) {
1124 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1125 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1133 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1134 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1126 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1135 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1136
1137 // Apply the staging offset
1138 copy.src_offset += upload_staging.offset;
1127 } 1139 }
1128 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1140 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1129} 1141}