diff options
| author | 2023-11-26 18:33:05 +0100 | |
|---|---|---|
| committer | 2023-11-26 18:33:05 +0100 | |
| commit | f21340f7aa60296c7b56e293b5f870b7efd3807d (patch) | |
| tree | 9e2a8f9ba77be42bf7695b051f515e52481dd490 /src/video_core/buffer_cache | |
| parent | Merge pull request #12180 from german77/cabinetmii (diff) | |
| parent | Vulkan: Add a final barrier to the upload command buffer (diff) | |
| download | yuzu-f21340f7aa60296c7b56e293b5f870b7efd3807d.tar.gz yuzu-f21340f7aa60296c7b56e293b5f870b7efd3807d.tar.xz yuzu-f21340f7aa60296c7b56e293b5f870b7efd3807d.zip | |
Merge pull request #11535 from GPUCode/upload_cmdbuf
renderer_vulkan: Introduce separate cmd buffer for uploads
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 97 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 4 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/usage_tracker.h | 79 |
3 files changed, 114 insertions, 66 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f5b10411b..90dbd352f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() { | |||
| 67 | if (!channel_state) { | 67 | if (!channel_state) { |
| 68 | return; | 68 | return; |
| 69 | } | 69 | } |
| 70 | runtime.TickFrame(slot_buffers); | ||
| 70 | 71 | ||
| 71 | // Calculate hits and shots and move hit bits to the right | 72 | // Calculate hits and shots and move hit bits to the right |
| 72 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), | 73 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), |
| @@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 230 | for (const IntervalType& add_interval : tmp_intervals) { | 231 | for (const IntervalType& add_interval : tmp_intervals) { |
| 231 | common_ranges.add(add_interval); | 232 | common_ranges.add(add_interval); |
| 232 | } | 233 | } |
| 233 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 234 | const auto& copy = copies[0]; |
| 235 | src_buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 236 | dest_buffer.MarkUsage(copy.dst_offset, copy.size); | ||
| 237 | runtime.CopyBuffer(dest_buffer, src_buffer, copies, true); | ||
| 234 | if (has_new_downloads) { | 238 | if (has_new_downloads) { |
| 235 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 236 | } | 240 | } |
| @@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 258 | common_ranges.subtract(subtract_interval); | 262 | common_ranges.subtract(subtract_interval); |
| 259 | 263 | ||
| 260 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | 264 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); |
| 261 | auto& dest_buffer = slot_buffers[buffer]; | 265 | Buffer& dest_buffer = slot_buffers[buffer]; |
| 262 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); | 266 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); |
| 263 | runtime.ClearBuffer(dest_buffer, offset, size, value); | 267 | runtime.ClearBuffer(dest_buffer, offset, size, value); |
| 268 | dest_buffer.MarkUsage(offset, size); | ||
| 264 | return true; | 269 | return true; |
| 265 | } | 270 | } |
| 266 | 271 | ||
| @@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 603 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 604 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 605 | async_downloads += std::make_pair(base_interval, 1); | 610 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 606 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 607 | normalized_copies.push_back(second_copy); | 613 | normalized_copies.push_back(second_copy); |
| 608 | } | 614 | } |
| @@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 621 | // Have in mind the staging buffer offset for the copy | 627 | // Have in mind the staging buffer offset for the copy |
| 622 | copy.dst_offset += download_staging.offset; | 628 | copy.dst_offset += download_staging.offset; |
| 623 | const std::array copies{copy}; | 629 | const std::array copies{copy}; |
| 624 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | 630 | Buffer& buffer = slot_buffers[buffer_id]; |
| 625 | false); | 631 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 632 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 626 | } | 633 | } |
| 627 | runtime.PostCopyBarrier(); | 634 | runtime.PostCopyBarrier(); |
| 628 | runtime.Finish(); | 635 | runtime.Finish(); |
| @@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 742 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; | 749 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; |
| 743 | std::memcpy(upload_staging.mapped_span.data(), | 750 | std::memcpy(upload_staging.mapped_span.data(), |
| 744 | draw_state.inline_index_draw_indexes.data(), size); | 751 | draw_state.inline_index_draw_indexes.data(), size); |
| 745 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 752 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); |
| 746 | } else { | 753 | } else { |
| 747 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 748 | } | 755 | } |
| @@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); | 761 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); |
| 755 | runtime.BindIndexBuffer(buffer, new_offset, size); | 762 | runtime.BindIndexBuffer(buffer, new_offset, size); |
| 756 | } else { | 763 | } else { |
| 764 | buffer.MarkUsage(offset, size); | ||
| 757 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, | 765 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, |
| 758 | draw_state.index_buffer.first, draw_state.index_buffer.count, | 766 | draw_state.index_buffer.first, draw_state.index_buffer.count, |
| 759 | buffer, offset, size); | 767 | buffer, offset, size); |
| @@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 790 | 798 | ||
| 791 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 792 | const u32 offset = buffer.Offset(binding.cpu_addr); | 800 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | ||
| 793 | 802 | ||
| 794 | host_bindings.buffers.push_back(&buffer); | 803 | host_bindings.buffers.push_back(&buffer); |
| 795 | host_bindings.offsets.push_back(offset); | 804 | host_bindings.offsets.push_back(offset); |
| @@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 895 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 904 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 896 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 905 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 897 | } | 906 | } |
| 907 | buffer.MarkUsage(offset, size); | ||
| 898 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 908 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 899 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); | 909 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 900 | } else { | 910 | } else { |
| @@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 913 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 914 | 924 | ||
| 915 | const u32 offset = buffer.Offset(binding.cpu_addr); | 925 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 926 | buffer.MarkUsage(offset, size); | ||
| 916 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 917 | 928 | ||
| 918 | if (is_written) { | 929 | if (is_written) { |
| @@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 943 | 954 | ||
| 944 | const u32 offset = buffer.Offset(binding.cpu_addr); | 955 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 945 | const PixelFormat format = binding.format; | 956 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | ||
| 946 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 947 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { | 959 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { |
| 948 | runtime.BindImageBuffer(buffer, offset, size, format); | 960 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 975 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); |
| 976 | 988 | ||
| 977 | const u32 offset = buffer.Offset(binding.cpu_addr); | 989 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 990 | buffer.MarkUsage(offset, size); | ||
| 978 | host_bindings.buffers.push_back(&buffer); | 991 | host_bindings.buffers.push_back(&buffer); |
| 979 | host_bindings.offsets.push_back(offset); | 992 | host_bindings.offsets.push_back(offset); |
| 980 | host_bindings.sizes.push_back(binding.size); | 993 | host_bindings.sizes.push_back(size); |
| 981 | } | 994 | } |
| 982 | if (host_bindings.buffers.size() > 0) { | 995 | if (host_bindings.buffers.size() > 0) { |
| 983 | runtime.BindTransformFeedbackBuffers(host_bindings); | 996 | runtime.BindTransformFeedbackBuffers(host_bindings); |
| @@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1001 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1002 | 1015 | ||
| 1003 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1017 | buffer.MarkUsage(offset, size); | ||
| 1004 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1005 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| 1006 | ++binding_index; | 1020 | ++binding_index; |
| @@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1021 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1022 | 1036 | ||
| 1023 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1038 | buffer.MarkUsage(offset, size); | ||
| 1024 | const bool is_written = | 1039 | const bool is_written = |
| 1025 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1026 | 1041 | ||
| @@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1053 | 1068 | ||
| 1054 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1055 | const PixelFormat format = binding.format; | 1070 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | ||
| 1056 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 1057 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { | 1073 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { |
| 1058 | runtime.BindImageBuffer(buffer, offset, size, format); | 1074 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1172 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1173 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1174 | } | 1190 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 1175 | channel_state->vertex_buffers[index] = Binding{ | 1192 | channel_state->vertex_buffers[index] = Binding{ |
| 1176 | .cpu_addr = *cpu_addr, | 1193 | .cpu_addr = *cpu_addr, |
| 1177 | .size = size, | 1194 | .size = size, |
| 1178 | .buffer_id = FindBuffer(*cpu_addr, size), | 1195 | .buffer_id = buffer_id, |
| 1179 | }; | 1196 | }; |
| 1180 | } | 1197 | } |
| 1181 | 1198 | ||
| @@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1401 | .dst_offset = dst_base_offset, | 1418 | .dst_offset = dst_base_offset, |
| 1402 | .size = overlap.SizeBytes(), | 1419 | .size = overlap.SizeBytes(), |
| 1403 | }); | 1420 | }); |
| 1404 | runtime.CopyBuffer(new_buffer, overlap, copies); | 1421 | new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); |
| 1422 | runtime.CopyBuffer(new_buffer, overlap, copies, true); | ||
| 1405 | DeleteBuffer(overlap_id, true); | 1423 | DeleteBuffer(overlap_id, true); |
| 1406 | } | 1424 | } |
| 1407 | 1425 | ||
| @@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1414 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1415 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1416 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1417 | runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); | 1435 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | ||
| 1437 | new_buffer.MarkUsage(0, size_bytes); | ||
| 1418 | for (const BufferId overlap_id : overlap.ids) { | 1438 | for (const BufferId overlap_id : overlap.ids) { |
| 1419 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1439 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1420 | } | 1440 | } |
| @@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1467 | 1487 | ||
| 1468 | template <class P> | 1488 | template <class P> |
| 1469 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1470 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | template <class P> | ||
| 1474 | bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1475 | boost::container::small_vector<BufferCopy, 4> copies; | 1490 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1476 | u64 total_size_bytes = 0; | 1491 | u64 total_size_bytes = 0; |
| 1477 | u64 largest_copy = 0; | 1492 | u64 largest_copy = 0; |
| @@ -1494,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1494 | } | 1509 | } |
| 1495 | 1510 | ||
| 1496 | template <class P> | 1511 | template <class P> |
| 1497 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1498 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1499 | u64 total_size_bytes = 0; | ||
| 1500 | u64 largest_copy = 0; | ||
| 1501 | IntervalSet found_sets{}; | ||
| 1502 | auto make_copies = [&] { | ||
| 1503 | for (auto& interval : found_sets) { | ||
| 1504 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1505 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1506 | copies.push_back(BufferCopy{ | ||
| 1507 | .src_offset = total_size_bytes, | ||
| 1508 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1509 | .size = sub_size, | ||
| 1510 | }); | ||
| 1511 | total_size_bytes += sub_size; | ||
| 1512 | largest_copy = std::max<u64>(largest_copy, sub_size); | ||
| 1513 | } | ||
| 1514 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1515 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1516 | }; | ||
| 1517 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1518 | const VAddr base_adr = cpu_addr_out; | ||
| 1519 | const VAddr end_adr = base_adr + range_size; | ||
| 1520 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1521 | found_sets.add(add_interval); | ||
| 1522 | }); | ||
| 1523 | if (found_sets.empty()) { | ||
| 1524 | return true; | ||
| 1525 | } | ||
| 1526 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1527 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1528 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1529 | if (it == common_ranges.end()) { | ||
| 1530 | make_copies(); | ||
| 1531 | return false; | ||
| 1532 | } | ||
| 1533 | while (it != it_end) { | ||
| 1534 | found_sets.subtract(*it); | ||
| 1535 | it++; | ||
| 1536 | } | ||
| 1537 | make_copies(); | ||
| 1538 | return false; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | template <class P> | ||
| 1542 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1512 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1543 | std::span<BufferCopy> copies) { | 1513 | std::span<BufferCopy> copies) { |
| 1544 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { | 1514 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| @@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1586 | // Apply the staging offset | 1556 | // Apply the staging offset |
| 1587 | copy.src_offset += upload_staging.offset; | 1557 | copy.src_offset += upload_staging.offset; |
| 1588 | } | 1558 | } |
| 1589 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1559 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1560 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1590 | } | 1561 | } |
| 1591 | } | 1562 | } |
| 1592 | 1563 | ||
| @@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_ | |||
| 1628 | }}; | 1599 | }}; |
| 1629 | u8* const src_pointer = upload_staging.mapped_span.data(); | 1600 | u8* const src_pointer = upload_staging.mapped_span.data(); |
| 1630 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | 1601 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); |
| 1631 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1602 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1603 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1632 | } else { | 1604 | } else { |
| 1633 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); | 1605 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); |
| 1634 | } | 1606 | } |
| @@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1681 | for (BufferCopy& copy : copies) { | 1653 | for (BufferCopy& copy : copies) { |
| 1682 | // Modify copies to have the staging offset in mind | 1654 | // Modify copies to have the staging offset in mind |
| 1683 | copy.dst_offset += download_staging.offset; | 1655 | copy.dst_offset += download_staging.offset; |
| 1656 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 1684 | } | 1657 | } |
| 1685 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1686 | runtime.Finish(); | 1659 | runtime.Finish(); |
| 1687 | for (const BufferCopy& copy : copies) { | 1660 | for (const BufferCopy& copy : copies) { |
| 1688 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index eed267361..d6d696d8c 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -529,10 +529,6 @@ private: | |||
| 529 | 529 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 531 | 531 | ||
| 532 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 533 | |||
| 534 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 535 | |||
| 536 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 537 | std::span<BufferCopy> copies); | 533 | std::span<BufferCopy> copies); |
| 538 | 534 | ||
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h new file mode 100644 index 000000000..ab05fe415 --- /dev/null +++ b/src/video_core/buffer_cache/usage_tracker.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/alignment.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | class UsageTracker { | ||
| 12 | static constexpr size_t BYTES_PER_BIT_SHIFT = 6; | ||
| 13 | static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT; | ||
| 14 | static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT; | ||
| 15 | |||
| 16 | public: | ||
| 17 | explicit UsageTracker(size_t size) { | ||
| 18 | const size_t num_pages = (size >> PAGE_SHIFT) + 1; | ||
| 19 | pages.resize(num_pages, 0ULL); | ||
| 20 | } | ||
| 21 | |||
| 22 | void Reset() noexcept { | ||
| 23 | std::ranges::fill(pages, 0ULL); | ||
| 24 | } | ||
| 25 | |||
| 26 | void Track(u64 offset, u64 size) noexcept { | ||
| 27 | const size_t page = offset >> PAGE_SHIFT; | ||
| 28 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 29 | TrackPage(page, offset, size); | ||
| 30 | if (page == page_end) { | ||
| 31 | return; | ||
| 32 | } | ||
| 33 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 34 | pages[i] = ~u64{0}; | ||
| 35 | } | ||
| 36 | const size_t offset_end = offset + size; | ||
| 37 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 38 | TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 39 | } | ||
| 40 | |||
| 41 | [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept { | ||
| 42 | const size_t page = offset >> PAGE_SHIFT; | ||
| 43 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 44 | if (IsPageUsed(page, offset, size)) { | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 48 | if (pages[i] != 0) { | ||
| 49 | return true; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | const size_t offset_end = offset + size; | ||
| 53 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 54 | return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | void TrackPage(u64 page, u64 offset, u64 size) noexcept { | ||
| 59 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 60 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 61 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 62 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 63 | pages[page] |= (~u64{0} & mask) << first_bit; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept { | ||
| 67 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 68 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 69 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 70 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 71 | const size_t mask2 = (~u64{0} & mask) << first_bit; | ||
| 72 | return (pages[page] & mask2) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | std::vector<u64> pages; | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace VideoCommon | ||