summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h117
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/buffer_cache/usage_tracker.h79
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp40
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h1
-rw-r--r--src/video_core/texture_cache/slot_vector.h4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp3
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h4
21 files changed, 312 insertions, 114 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index b65b9f2a2..c22c7631c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -15,6 +15,7 @@ add_library(video_core STATIC
15 buffer_cache/buffer_cache.cpp 15 buffer_cache/buffer_cache.cpp
16 buffer_cache/buffer_cache.h 16 buffer_cache/buffer_cache.h
17 buffer_cache/memory_tracker_base.h 17 buffer_cache/memory_tracker_base.h
18 buffer_cache/usage_tracker.h
18 buffer_cache/word_manager.h 19 buffer_cache/word_manager.h
19 cache_types.h 20 cache_types.h
20 cdma_pusher.cpp 21 cdma_pusher.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f5b10411b..6d1fc3887 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
67 if (!channel_state) { 67 if (!channel_state) {
68 return; 68 return;
69 } 69 }
70 runtime.TickFrame(slot_buffers);
70 71
71 // Calculate hits and shots and move hit bits to the right 72 // Calculate hits and shots and move hit bits to the right
72 const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), 73 const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
230 for (const IntervalType& add_interval : tmp_intervals) { 231 for (const IntervalType& add_interval : tmp_intervals) {
231 common_ranges.add(add_interval); 232 common_ranges.add(add_interval);
232 } 233 }
233 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 234 const auto& copy = copies[0];
235 src_buffer.MarkUsage(copy.src_offset, copy.size);
236 dest_buffer.MarkUsage(copy.dst_offset, copy.size);
237 runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
234 if (has_new_downloads) { 238 if (has_new_downloads) {
235 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
236 } 240 }
@@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
258 common_ranges.subtract(subtract_interval); 262 common_ranges.subtract(subtract_interval);
259 263
260 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); 264 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
261 auto& dest_buffer = slot_buffers[buffer]; 265 Buffer& dest_buffer = slot_buffers[buffer];
262 const u32 offset = dest_buffer.Offset(*cpu_dst_address); 266 const u32 offset = dest_buffer.Offset(*cpu_dst_address);
263 runtime.ClearBuffer(dest_buffer, offset, size, value); 267 runtime.ClearBuffer(dest_buffer, offset, size, value);
268 dest_buffer.MarkUsage(offset, size);
264 return true; 269 return true;
265} 270}
266 271
@@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
603 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 608 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
604 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 609 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
605 async_downloads += std::make_pair(base_interval, 1); 610 async_downloads += std::make_pair(base_interval, 1);
611 buffer.MarkUsage(copy.src_offset, copy.size);
606 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 612 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
607 normalized_copies.push_back(second_copy); 613 normalized_copies.push_back(second_copy);
608 } 614 }
@@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
621 // Have in mind the staging buffer offset for the copy 627 // Have in mind the staging buffer offset for the copy
622 copy.dst_offset += download_staging.offset; 628 copy.dst_offset += download_staging.offset;
623 const std::array copies{copy}; 629 const std::array copies{copy};
624 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, 630 Buffer& buffer = slot_buffers[buffer_id];
625 false); 631 buffer.MarkUsage(copy.src_offset, copy.size);
632 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
626 } 633 }
627 runtime.PostCopyBarrier(); 634 runtime.PostCopyBarrier();
628 runtime.Finish(); 635 runtime.Finish();
@@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
742 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; 749 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
743 std::memcpy(upload_staging.mapped_span.data(), 750 std::memcpy(upload_staging.mapped_span.data(),
744 draw_state.inline_index_draw_indexes.data(), size); 751 draw_state.inline_index_draw_indexes.data(), size);
745 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 752 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
746 } else { 753 } else {
747 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 754 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
748 } 755 }
@@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
754 offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); 761 offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
755 runtime.BindIndexBuffer(buffer, new_offset, size); 762 runtime.BindIndexBuffer(buffer, new_offset, size);
756 } else { 763 } else {
764 buffer.MarkUsage(offset, size);
757 runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, 765 runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
758 draw_state.index_buffer.first, draw_state.index_buffer.count, 766 draw_state.index_buffer.first, draw_state.index_buffer.count,
759 buffer, offset, size); 767 buffer, offset, size);
@@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
790 798
791 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 799 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
792 const u32 offset = buffer.Offset(binding.cpu_addr); 800 const u32 offset = buffer.Offset(binding.cpu_addr);
801 buffer.MarkUsage(offset, binding.size);
793 802
794 host_bindings.buffers.push_back(&buffer); 803 host_bindings.buffers.push_back(&buffer);
795 host_bindings.offsets.push_back(offset); 804 host_bindings.offsets.push_back(offset);
@@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
895 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 904 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
896 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; 905 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
897 } 906 }
907 buffer.MarkUsage(offset, size);
898 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 908 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
899 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 909 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
900 } else { 910 } else {
@@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
913 SynchronizeBuffer(buffer, binding.cpu_addr, size); 923 SynchronizeBuffer(buffer, binding.cpu_addr, size);
914 924
915 const u32 offset = buffer.Offset(binding.cpu_addr); 925 const u32 offset = buffer.Offset(binding.cpu_addr);
926 buffer.MarkUsage(offset, size);
916 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; 927 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
917 928
918 if (is_written) { 929 if (is_written) {
@@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
943 954
944 const u32 offset = buffer.Offset(binding.cpu_addr); 955 const u32 offset = buffer.Offset(binding.cpu_addr);
945 const PixelFormat format = binding.format; 956 const PixelFormat format = binding.format;
957 buffer.MarkUsage(offset, size);
946 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 958 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
947 if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { 959 if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
948 runtime.BindImageBuffer(buffer, offset, size, format); 960 runtime.BindImageBuffer(buffer, offset, size, format);
@@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
975 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 987 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
976 988
977 const u32 offset = buffer.Offset(binding.cpu_addr); 989 const u32 offset = buffer.Offset(binding.cpu_addr);
990 buffer.MarkUsage(offset, size);
978 host_bindings.buffers.push_back(&buffer); 991 host_bindings.buffers.push_back(&buffer);
979 host_bindings.offsets.push_back(offset); 992 host_bindings.offsets.push_back(offset);
980 host_bindings.sizes.push_back(binding.size); 993 host_bindings.sizes.push_back(size);
981 } 994 }
982 if (host_bindings.buffers.size() > 0) { 995 if (host_bindings.buffers.size() > 0) {
983 runtime.BindTransformFeedbackBuffers(host_bindings); 996 runtime.BindTransformFeedbackBuffers(host_bindings);
@@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1001 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1014 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1002 1015
1003 const u32 offset = buffer.Offset(binding.cpu_addr); 1016 const u32 offset = buffer.Offset(binding.cpu_addr);
1017 buffer.MarkUsage(offset, size);
1004 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1018 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
1005 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); 1019 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
1006 ++binding_index; 1020 ++binding_index;
@@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1021 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1035 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1022 1036
1023 const u32 offset = buffer.Offset(binding.cpu_addr); 1037 const u32 offset = buffer.Offset(binding.cpu_addr);
1038 buffer.MarkUsage(offset, size);
1024 const bool is_written = 1039 const bool is_written =
1025 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; 1040 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
1026 1041
@@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1053 1068
1054 const u32 offset = buffer.Offset(binding.cpu_addr); 1069 const u32 offset = buffer.Offset(binding.cpu_addr);
1055 const PixelFormat format = binding.format; 1070 const PixelFormat format = binding.format;
1071 buffer.MarkUsage(offset, size);
1056 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 1072 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1057 if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { 1073 if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
1058 runtime.BindImageBuffer(buffer, offset, size, format); 1074 runtime.BindImageBuffer(buffer, offset, size, format);
@@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1172 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1188 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1173 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1189 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1174 } 1190 }
1191 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
1175 channel_state->vertex_buffers[index] = Binding{ 1192 channel_state->vertex_buffers[index] = Binding{
1176 .cpu_addr = *cpu_addr, 1193 .cpu_addr = *cpu_addr,
1177 .size = size, 1194 .size = size,
1178 .buffer_id = FindBuffer(*cpu_addr, size), 1195 .buffer_id = buffer_id,
1179 }; 1196 };
1180} 1197}
1181 1198
@@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1401 .dst_offset = dst_base_offset, 1418 .dst_offset = dst_base_offset,
1402 .size = overlap.SizeBytes(), 1419 .size = overlap.SizeBytes(),
1403 }); 1420 });
1404 runtime.CopyBuffer(new_buffer, overlap, copies); 1421 new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
1422 runtime.CopyBuffer(new_buffer, overlap, copies, true);
1405 DeleteBuffer(overlap_id, true); 1423 DeleteBuffer(overlap_id, true);
1406} 1424}
1407 1425
@@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1414 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1432 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1415 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1433 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1416 auto& new_buffer = slot_buffers[new_buffer_id]; 1434 auto& new_buffer = slot_buffers[new_buffer_id];
1417 runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); 1435 const size_t size_bytes = new_buffer.SizeBytes();
1436 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
1437 new_buffer.MarkUsage(0, size_bytes);
1418 for (const BufferId overlap_id : overlap.ids) { 1438 for (const BufferId overlap_id : overlap.ids) {
1419 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1439 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1420 } 1440 }
@@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1467 1487
1468template <class P> 1488template <class P>
1469bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1489bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1470 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1471}
1472
1473template <class P>
1474bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
1475 boost::container::small_vector<BufferCopy, 4> copies; 1490 boost::container::small_vector<BufferCopy, 4> copies;
1476 u64 total_size_bytes = 0; 1491 u64 total_size_bytes = 0;
1477 u64 largest_copy = 0; 1492 u64 largest_copy = 0;
@@ -1494,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1494} 1509}
1495 1510
1496template <class P> 1511template <class P>
1497bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1498 boost::container::small_vector<BufferCopy, 4> copies;
1499 u64 total_size_bytes = 0;
1500 u64 largest_copy = 0;
1501 IntervalSet found_sets{};
1502 auto make_copies = [&] {
1503 for (auto& interval : found_sets) {
1504 const std::size_t sub_size = interval.upper() - interval.lower();
1505 const VAddr cpu_addr_ = interval.lower();
1506 copies.push_back(BufferCopy{
1507 .src_offset = total_size_bytes,
1508 .dst_offset = cpu_addr_ - buffer.CpuAddr(),
1509 .size = sub_size,
1510 });
1511 total_size_bytes += sub_size;
1512 largest_copy = std::max<u64>(largest_copy, sub_size);
1513 }
1514 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1515 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1516 };
1517 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1518 const VAddr base_adr = cpu_addr_out;
1519 const VAddr end_adr = base_adr + range_size;
1520 const IntervalType add_interval{base_adr, end_adr};
1521 found_sets.add(add_interval);
1522 });
1523 if (found_sets.empty()) {
1524 return true;
1525 }
1526 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1527 auto it = common_ranges.lower_bound(search_interval);
1528 auto it_end = common_ranges.upper_bound(search_interval);
1529 if (it == common_ranges.end()) {
1530 make_copies();
1531 return false;
1532 }
1533 while (it != it_end) {
1534 found_sets.subtract(*it);
1535 it++;
1536 }
1537 make_copies();
1538 return false;
1539}
1540
1541template <class P>
1542void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1512void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1543 std::span<BufferCopy> copies) { 1513 std::span<BufferCopy> copies) {
1544 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { 1514 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
@@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1586 // Apply the staging offset 1556 // Apply the staging offset
1587 copy.src_offset += upload_staging.offset; 1557 copy.src_offset += upload_staging.offset;
1588 } 1558 }
1589 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1559 const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
1560 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
1590 } 1561 }
1591} 1562}
1592 1563
@@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
1628 }}; 1599 }};
1629 u8* const src_pointer = upload_staging.mapped_span.data(); 1600 u8* const src_pointer = upload_staging.mapped_span.data();
1630 std::memcpy(src_pointer, inlined_buffer.data(), copy_size); 1601 std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
1631 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1602 const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
1603 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
1632 } else { 1604 } else {
1633 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); 1605 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
1634 } 1606 }
@@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1681 for (BufferCopy& copy : copies) { 1653 for (BufferCopy& copy : copies) {
1682 // Modify copies to have the staging offset in mind 1654 // Modify copies to have the staging offset in mind
1683 copy.dst_offset += download_staging.offset; 1655 copy.dst_offset += download_staging.offset;
1656 buffer.MarkUsage(copy.src_offset, copy.size);
1684 } 1657 }
1685 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); 1658 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
1686 runtime.Finish(); 1659 runtime.Finish();
1687 for (const BufferCopy& copy : copies) { 1660 for (const BufferCopy& copy : copies) {
1688 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1661 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
@@ -1780,15 +1753,25 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1780 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); 1753 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
1781 return std::min(memory_layout_size, static_cast<u32>(8_MiB)); 1754 return std::min(memory_layout_size, static_cast<u32>(8_MiB));
1782 }(); 1755 }();
1783 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1756 // Alignment only applies to the offset of the buffer
1784 if (!cpu_addr || size == 0) { 1757 const u32 alignment = runtime.GetStorageBufferAlignment();
1758 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1759 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
1760
1761 const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1762 if (!aligned_cpu_addr || size == 0) {
1785 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1763 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
1786 return NULL_BINDING; 1764 return NULL_BINDING;
1787 } 1765 }
1788 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); 1766 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1767 ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}",
1768 cbuf_index);
1769 // The end address used for size calculation does not need to be aligned
1770 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
1771
1789 const Binding binding{ 1772 const Binding binding{
1790 .cpu_addr = *cpu_addr, 1773 .cpu_addr = *aligned_cpu_addr,
1791 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1774 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr),
1792 .buffer_id = BufferId{}, 1775 .buffer_id = BufferId{},
1793 }; 1776 };
1794 return binding; 1777 return binding;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index eed267361..d6d696d8c 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -529,10 +529,6 @@ private:
529 529
530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
531 531
532 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
533
534 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
535
536 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 532 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
537 std::span<BufferCopy> copies); 533 std::span<BufferCopy> copies);
538 534
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h
new file mode 100644
index 000000000..ab05fe415
--- /dev/null
+++ b/src/video_core/buffer_cache/usage_tracker.h
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/alignment.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11class UsageTracker {
12 static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
13 static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
14 static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
15
16public:
17 explicit UsageTracker(size_t size) {
18 const size_t num_pages = (size >> PAGE_SHIFT) + 1;
19 pages.resize(num_pages, 0ULL);
20 }
21
22 void Reset() noexcept {
23 std::ranges::fill(pages, 0ULL);
24 }
25
26 void Track(u64 offset, u64 size) noexcept {
27 const size_t page = offset >> PAGE_SHIFT;
28 const size_t page_end = (offset + size) >> PAGE_SHIFT;
29 TrackPage(page, offset, size);
30 if (page == page_end) {
31 return;
32 }
33 for (size_t i = page + 1; i < page_end; i++) {
34 pages[i] = ~u64{0};
35 }
36 const size_t offset_end = offset + size;
37 const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
38 TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
39 }
40
41 [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
42 const size_t page = offset >> PAGE_SHIFT;
43 const size_t page_end = (offset + size) >> PAGE_SHIFT;
44 if (IsPageUsed(page, offset, size)) {
45 return true;
46 }
47 for (size_t i = page + 1; i < page_end; i++) {
48 if (pages[i] != 0) {
49 return true;
50 }
51 }
52 const size_t offset_end = offset + size;
53 const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
54 return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
55 }
56
57private:
58 void TrackPage(u64 page, u64 offset, u64 size) noexcept {
59 const size_t offset_in_page = offset % PAGE_BYTES;
60 const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
61 const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
62 const size_t mask = ~u64{0} >> (64 - num_bits);
63 pages[page] |= (~u64{0} & mask) << first_bit;
64 }
65
66 bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
67 const size_t offset_in_page = offset % PAGE_BYTES;
68 const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
69 const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
70 const size_t mask = ~u64{0} >> (64 - num_bits);
71 const size_t mask2 = (~u64{0} & mask) << first_bit;
72 return (pages[page] & mask2) != 0;
73 }
74
75private:
76 std::vector<u64> pages;
77};
78
79} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 38d553d3c..dfd696de6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
178} 178}
179 179
180void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, 180void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
181 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 181 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
182 bool) {
182 CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); 183 CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
183} 184}
184 185
185void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 186void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
186 std::span<const VideoCommon::BufferCopy> copies) { 187 std::span<const VideoCommon::BufferCopy> copies, bool) {
187 CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); 188 CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
188} 189}
189 190
190void BufferCacheRuntime::PreCopyBarrier() { 191void BufferCacheRuntime::PreCopyBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 41b746f3b..000f29a82 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -30,6 +30,8 @@ public:
30 30
31 void MakeResident(GLenum access) noexcept; 31 void MakeResident(GLenum access) noexcept;
32 32
33 void MarkUsage(u64 offset, u64 size) {}
34
33 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); 35 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
34 36
35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 37 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@@ -66,22 +68,29 @@ public:
66 68
67 [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); 69 [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
68 70
71 bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
72 return false;
73 }
74
69 void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, 75 void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
70 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 76 std::span<const VideoCommon::BufferCopy> copies, bool barrier);
71 77
72 void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, 78 void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
73 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 79 std::span<const VideoCommon::BufferCopy> copies, bool barrier);
74 80
75 void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, 81 void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
76 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 82 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
83 bool can_reorder_upload = false);
77 84
78 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 85 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
79 std::span<const VideoCommon::BufferCopy> copies); 86 std::span<const VideoCommon::BufferCopy> copies, bool);
80 87
81 void PreCopyBarrier(); 88 void PreCopyBarrier();
82 void PostCopyBarrier(); 89 void PostCopyBarrier();
83 void Finish(); 90 void Finish();
84 91
92 void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
93
85 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); 94 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
86 95
87 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); 96 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@@ -182,6 +191,10 @@ public:
182 return device.CanReportMemoryUsage(); 191 return device.CanReportMemoryUsage();
183 } 192 }
184 193
194 u32 GetStorageBufferAlignment() const {
195 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
196 }
197
185private: 198private:
186 static constexpr std::array PABO_LUT{ 199 static constexpr std::array PABO_LUT{
187 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 200 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 2888e0238..26f2d0ea7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
232 .has_gl_bool_ref_bug = device.HasBoolRefBug(), 232 .has_gl_bool_ref_bug = device.HasBoolRefBug(),
233 .ignore_nan_fp_comparisons = true, 233 .ignore_nan_fp_comparisons = true,
234 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), 234 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
235 .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(),
235 }, 236 },
236 host_info{ 237 host_info{
237 .support_float64 = true, 238 .support_float64 = true,
@@ -240,6 +241,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
240 .needs_demote_reorder = device.IsAmd(), 241 .needs_demote_reorder = device.IsAmd(),
241 .support_snorm_render_buffer = false, 242 .support_snorm_render_buffer = false,
242 .support_viewport_index_layer = device.HasVertexViewportLayer(), 243 .support_viewport_index_layer = device.HasVertexViewportLayer(),
244 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
243 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), 245 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
244 .support_conditional_barrier = device.SupportsConditionalBarriers(), 246 .support_conditional_barrier = device.SupportsConditionalBarriers(),
245 } { 247 } {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8148e89a..5958f52f7 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
79} // Anonymous namespace 79} // Anonymous namespace
80 80
81Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 81Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} 82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
83 83
84Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 84Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
85 VAddr cpu_addr_, u64 size_bytes_) 85 VAddr cpu_addr_, u64 size_bytes_)
86 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), 86 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
87 device{&runtime.device}, buffer{ 87 device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
88 CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { 88 tracker{SizeBytes()} {
89 if (runtime.device.HasDebuggingToolAttached()) { 89 if (runtime.device.HasDebuggingToolAttached()) {
90 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 90 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
91 } 91 }
@@ -355,12 +355,35 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
355 return device.CanReportMemoryUsage(); 355 return device.CanReportMemoryUsage();
356} 356}
357 357
358u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
359 return static_cast<u32>(device.GetStorageBufferAlignment());
360}
361
362void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
363 for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
364 it->ResetUsageTracking();
365 }
366}
367
358void BufferCacheRuntime::Finish() { 368void BufferCacheRuntime::Finish() {
359 scheduler.Finish(); 369 scheduler.Finish();
360} 370}
361 371
372bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
373 std::span<const VideoCommon::BufferCopy> copies) {
374 if (Settings::values.disable_buffer_reorder) {
375 return false;
376 }
377 const bool can_use_upload_cmdbuf =
378 std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
379 return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
380 });
381 return can_use_upload_cmdbuf;
382}
383
362void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, 384void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
363 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 385 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
386 bool can_reorder_upload) {
364 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { 387 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
365 return; 388 return;
366 } 389 }
@@ -376,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
376 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 399 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
377 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, 400 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
378 }; 401 };
402
379 // Measuring a popular game, this number never exceeds the specified size once data is warmed up 403 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
380 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); 404 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
381 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); 405 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
406 if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
407 scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
408 vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
409 upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
410 });
411 return;
412 }
413
382 scheduler.RequestOutsideRenderPassOperationContext(); 414 scheduler.RequestOutsideRenderPassOperationContext();
383 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { 415 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
384 if (barrier) { 416 if (barrier) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 95446c732..0b3fbd6d0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,6 +5,7 @@
5 5
6#include "video_core/buffer_cache/buffer_cache_base.h" 6#include "video_core/buffer_cache/buffer_cache_base.h"
7#include "video_core/buffer_cache/memory_tracker_base.h" 7#include "video_core/buffer_cache/memory_tracker_base.h"
8#include "video_core/buffer_cache/usage_tracker.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h" 10#include "video_core/renderer_vulkan/vk_compute_pass.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -34,6 +35,18 @@ public:
34 return *buffer; 35 return *buffer;
35 } 36 }
36 37
38 [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
39 return tracker.IsUsed(offset, size);
40 }
41
42 void MarkUsage(u64 offset, u64 size) noexcept {
43 tracker.Track(offset, size);
44 }
45
46 void ResetUsageTracking() noexcept {
47 tracker.Reset();
48 }
49
37 operator VkBuffer() const noexcept { 50 operator VkBuffer() const noexcept {
38 return *buffer; 51 return *buffer;
39 } 52 }
@@ -49,6 +62,7 @@ private:
49 const Device* device{}; 62 const Device* device{};
50 vk::Buffer buffer; 63 vk::Buffer buffer;
51 std::vector<BufferView> views; 64 std::vector<BufferView> views;
65 VideoCommon::UsageTracker tracker;
52}; 66};
53 67
54class QuadArrayIndexBuffer; 68class QuadArrayIndexBuffer;
@@ -67,6 +81,8 @@ public:
67 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 81 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
68 DescriptorPool& descriptor_pool); 82 DescriptorPool& descriptor_pool);
69 83
84 void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
85
70 void Finish(); 86 void Finish();
71 87
72 u64 GetDeviceLocalMemory() const; 88 u64 GetDeviceLocalMemory() const;
@@ -75,16 +91,21 @@ public:
75 91
76 bool CanReportMemoryUsage() const; 92 bool CanReportMemoryUsage() const;
77 93
94 u32 GetStorageBufferAlignment() const;
95
78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 96 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
79 97
80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); 98 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
81 99
100 bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
101
82 void FreeDeferredStagingBuffer(StagingBufferRef& ref); 102 void FreeDeferredStagingBuffer(StagingBufferRef& ref);
83 103
84 void PreCopyBarrier(); 104 void PreCopyBarrier();
85 105
86 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, 106 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
87 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 107 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
108 bool can_reorder_upload = false);
88 109
89 void PostCopyBarrier(); 110 void PostCopyBarrier();
90 111
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 6b288b994..ac8b6e838 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
100 Refresh(); 100 Refresh();
101} 101}
102 102
103VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 103VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
104 VkSemaphore wait_semaphore, u64 host_tick) { 104 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
105 u64 host_tick) {
105 if (semaphore) { 106 if (semaphore) {
106 return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); 107 return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
108 host_tick);
107 } else { 109 } else {
108 return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); 110 return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
109 } 111 }
110} 112}
111 113
@@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
115}; 117};
116 118
117VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, 119VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
120 vk::CommandBuffer& upload_cmdbuf,
118 VkSemaphore signal_semaphore, 121 VkSemaphore signal_semaphore,
119 VkSemaphore wait_semaphore, u64 host_tick) { 122 VkSemaphore wait_semaphore, u64 host_tick) {
120 const VkSemaphore timeline_semaphore = *semaphore; 123 const VkSemaphore timeline_semaphore = *semaphore;
@@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
123 const std::array signal_values{host_tick, u64(0)}; 126 const std::array signal_values{host_tick, u64(0)};
124 const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; 127 const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
125 128
129 const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
130
126 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; 131 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
127 const VkTimelineSemaphoreSubmitInfo timeline_si{ 132 const VkTimelineSemaphoreSubmitInfo timeline_si{
128 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, 133 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
138 .waitSemaphoreCount = num_wait_semaphores, 143 .waitSemaphoreCount = num_wait_semaphores,
139 .pWaitSemaphores = &wait_semaphore, 144 .pWaitSemaphores = &wait_semaphore,
140 .pWaitDstStageMask = wait_stage_masks.data(), 145 .pWaitDstStageMask = wait_stage_masks.data(),
141 .commandBufferCount = 1, 146 .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
142 .pCommandBuffers = cmdbuf.address(), 147 .pCommandBuffers = cmdbuffers.data(),
143 .signalSemaphoreCount = num_signal_semaphores, 148 .signalSemaphoreCount = num_signal_semaphores,
144 .pSignalSemaphores = signal_semaphores.data(), 149 .pSignalSemaphores = signal_semaphores.data(),
145 }; 150 };
@@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
147 return device.GetGraphicsQueue().Submit(submit_info); 152 return device.GetGraphicsQueue().Submit(submit_info);
148} 153}
149 154
150VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 155VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
151 VkSemaphore wait_semaphore, u64 host_tick) { 156 vk::CommandBuffer& upload_cmdbuf,
157 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
158 u64 host_tick) {
152 const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; 159 const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
153 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; 160 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
154 161
162 const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
163
155 const VkSubmitInfo submit_info{ 164 const VkSubmitInfo submit_info{
156 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 165 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
157 .pNext = nullptr, 166 .pNext = nullptr,
158 .waitSemaphoreCount = num_wait_semaphores, 167 .waitSemaphoreCount = num_wait_semaphores,
159 .pWaitSemaphores = &wait_semaphore, 168 .pWaitSemaphores = &wait_semaphore,
160 .pWaitDstStageMask = wait_stage_masks.data(), 169 .pWaitDstStageMask = wait_stage_masks.data(),
161 .commandBufferCount = 1, 170 .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
162 .pCommandBuffers = cmdbuf.address(), 171 .pCommandBuffers = cmdbuffers.data(),
163 .signalSemaphoreCount = num_signal_semaphores, 172 .signalSemaphoreCount = num_signal_semaphores,
164 .pSignalSemaphores = &signal_semaphore, 173 .pSignalSemaphores = &signal_semaphore,
165 }; 174 };
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 3f599d7bd..7dfb93ffb 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -52,14 +52,16 @@ public:
52 void Wait(u64 tick); 52 void Wait(u64 tick);
53 53
54 /// Submits the device graphics queue, updating the tick as necessary 54 /// Submits the device graphics queue, updating the tick as necessary
55 VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 55 VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
56 VkSemaphore wait_semaphore, u64 host_tick); 56 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
57 57
58private: 58private:
59 VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 59 VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
60 VkSemaphore wait_semaphore, u64 host_tick); 60 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
61 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 61 u64 host_tick);
62 VkSemaphore wait_semaphore, u64 host_tick); 62 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
63 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
64 u64 host_tick);
63 65
64 void WaitThread(std::stop_token token); 66 void WaitThread(std::stop_token token);
65 67
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 89b455bff..2a13b2a72 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -373,6 +373,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
373 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, 373 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
374 .has_broken_robust = 374 .has_broken_robust =
375 device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, 375 device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal,
376 .min_ssbo_alignment = device.GetStorageBufferAlignment(),
376 }; 377 };
377 378
378 host_info = Shader::HostTranslateInfo{ 379 host_info = Shader::HostTranslateInfo{
@@ -383,6 +384,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
383 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 384 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
384 .support_snorm_render_buffer = true, 385 .support_snorm_render_buffer = true,
385 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 386 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
387 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
386 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), 388 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
387 .support_conditional_barrier = device.SupportsConditionalBarriers(), 389 .support_conditional_barrier = device.SupportsConditionalBarriers(),
388 }; 390 };
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 3be7837f4..146923db4 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -22,11 +22,12 @@ namespace Vulkan {
22 22
23MICROPROFILE_DECLARE(Vulkan_WaitForWorker); 23MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
24 24
25void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { 25void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
26 vk::CommandBuffer upload_cmdbuf) {
26 auto command = first; 27 auto command = first;
27 while (command != nullptr) { 28 while (command != nullptr) {
28 auto next = command->GetNext(); 29 auto next = command->GetNext();
29 command->Execute(cmdbuf); 30 command->Execute(cmdbuf, upload_cmdbuf);
30 command->~Command(); 31 command->~Command();
31 command = next; 32 command = next;
32 } 33 }
@@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
180 // Perform the work, tracking whether the chunk was a submission 181 // Perform the work, tracking whether the chunk was a submission
181 // before executing. 182 // before executing.
182 const bool has_submit = work->HasSubmit(); 183 const bool has_submit = work->HasSubmit();
183 work->ExecuteAll(current_cmdbuf); 184 work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
184 185
185 // If the chunk was a submission, reallocate the command buffer. 186 // If the chunk was a submission, reallocate the command buffer.
186 if (has_submit) { 187 if (has_submit) {
@@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
205 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 206 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
206 .pInheritanceInfo = nullptr, 207 .pInheritanceInfo = nullptr,
207 }); 208 });
209 current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
210 current_upload_cmdbuf.Begin({
211 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
212 .pNext = nullptr,
213 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
214 .pInheritanceInfo = nullptr,
215 });
208} 216}
209 217
210u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 218u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
212 InvalidateState(); 220 InvalidateState();
213 221
214 const u64 signal_value = master_semaphore->NextTick(); 222 const u64 signal_value = master_semaphore->NextTick();
215 Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { 223 RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
224 this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
225 static constexpr VkMemoryBarrier WRITE_BARRIER{
226 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
227 .pNext = nullptr,
228 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
229 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
230 };
231 upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
232 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
233 upload_cmdbuf.End();
216 cmdbuf.End(); 234 cmdbuf.End();
217 235
218 if (on_submit) { 236 if (on_submit) {
@@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
221 239
222 std::scoped_lock lock{submit_mutex}; 240 std::scoped_lock lock{submit_mutex};
223 switch (const VkResult result = master_semaphore->SubmitQueue( 241 switch (const VkResult result = master_semaphore->SubmitQueue(
224 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { 242 cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
225 case VK_SUCCESS: 243 case VK_SUCCESS:
226 break; 244 break;
227 case VK_ERROR_DEVICE_LOST: 245 case VK_ERROR_DEVICE_LOST:
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index da03803aa..f8d8ca80a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -80,7 +80,8 @@ public:
80 80
81 /// Send work to a separate thread. 81 /// Send work to a separate thread.
82 template <typename T> 82 template <typename T>
83 void Record(T&& command) { 83 requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
84 void RecordWithUploadBuffer(T&& command) {
84 if (chunk->Record(command)) { 85 if (chunk->Record(command)) {
85 return; 86 return;
86 } 87 }
@@ -88,6 +89,15 @@ public:
88 (void)chunk->Record(command); 89 (void)chunk->Record(command);
89 } 90 }
90 91
92 template <typename T>
93 requires std::is_invocable_v<T, vk::CommandBuffer>
94 void Record(T&& c) {
95 this->RecordWithUploadBuffer(
96 [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
97 command(cmdbuf);
98 });
99 }
100
91 /// Returns the current command buffer tick. 101 /// Returns the current command buffer tick.
92 [[nodiscard]] u64 CurrentTick() const noexcept { 102 [[nodiscard]] u64 CurrentTick() const noexcept {
93 return master_semaphore->CurrentTick(); 103 return master_semaphore->CurrentTick();
@@ -119,7 +129,7 @@ private:
119 public: 129 public:
120 virtual ~Command() = default; 130 virtual ~Command() = default;
121 131
122 virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; 132 virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
123 133
124 Command* GetNext() const { 134 Command* GetNext() const {
125 return next; 135 return next;
@@ -142,8 +152,8 @@ private:
142 TypedCommand(TypedCommand&&) = delete; 152 TypedCommand(TypedCommand&&) = delete;
143 TypedCommand& operator=(TypedCommand&&) = delete; 153 TypedCommand& operator=(TypedCommand&&) = delete;
144 154
145 void Execute(vk::CommandBuffer cmdbuf) const override { 155 void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
146 command(cmdbuf); 156 command(cmdbuf, upload_cmdbuf);
147 } 157 }
148 158
149 private: 159 private:
@@ -152,7 +162,7 @@ private:
152 162
153 class CommandChunk final { 163 class CommandChunk final {
154 public: 164 public:
155 void ExecuteAll(vk::CommandBuffer cmdbuf); 165 void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
156 166
157 template <typename T> 167 template <typename T>
158 bool Record(T& command) { 168 bool Record(T& command) {
@@ -228,6 +238,7 @@ private:
228 VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; 238 VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
229 239
230 vk::CommandBuffer current_cmdbuf; 240 vk::CommandBuffer current_cmdbuf;
241 vk::CommandBuffer current_upload_cmdbuf;
231 242
232 std::unique_ptr<CommandChunk> chunk; 243 std::unique_ptr<CommandChunk> chunk;
233 std::function<void()> on_submit; 244 std::function<void()> on_submit;
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index 5efd7d66e..70644ea82 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
672 UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, 672 UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
673 VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); 673 VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
674 674
675 scheduler.Record([&](vk::CommandBuffer& cmdbuf) { 675 scheduler.Record([&](vk::CommandBuffer cmdbuf) {
676 for (auto& images : m_dynamic_images) { 676 for (auto& images : m_dynamic_images) {
677 for (size_t i = 0; i < MaxDynamicImage; i++) { 677 for (size_t i = 0; i < MaxDynamicImage; i++) {
678 ClearColorImage(cmdbuf, *images.images[i]); 678 ClearColorImage(cmdbuf, *images.images[i]);
@@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
707 UpdateDescriptorSets(source_image_view, image_index); 707 UpdateDescriptorSets(source_image_view, image_index);
708 708
709 scheduler.RequestOutsideRenderPassOperationContext(); 709 scheduler.RequestOutsideRenderPassOperationContext();
710 scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { 710 scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
711 TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); 711 TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
712 TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); 712 TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
713 BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, 713 BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d3deb9072..f63a20327 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -36,6 +36,10 @@ public:
36 StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); 36 StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
37 void FreeDeferred(StagingBufferRef& ref); 37 void FreeDeferred(StagingBufferRef& ref);
38 38
39 [[nodiscard]] VkBuffer StreamBuf() const noexcept {
40 return *stream_buffer;
41 }
42
39 void TickFrame(); 43 void TickFrame();
40 44
41private: 45private:
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index de34f6d49..5dbec2e62 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1785 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, 1785 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
1786 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} 1786 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1787 1787
1788ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) 1788ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
1789 : VideoCommon::ImageViewBase{params} {} 1789 : VideoCommon::ImageViewBase{params}, device{&runtime.device} {
1790 if (device->HasNullDescriptor()) {
1791 return;
1792 }
1793
1794 // Handle fallback for devices without nullDescriptor
1795 ImageInfo info{};
1796 info.format = PixelFormat::A8B8G8R8_UNORM;
1797
1798 null_image = MakeImage(*device, runtime.memory_allocator, info, {});
1799 image_handle = *null_image;
1800 for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) {
1801 image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT);
1802 }
1803}
1790 1804
1791ImageView::~ImageView() = default; 1805ImageView::~ImageView() = default;
1792 1806
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 7a0807709..edf5d7635 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -267,6 +267,7 @@ private:
267 vk::ImageView depth_view; 267 vk::ImageView depth_view;
268 vk::ImageView stencil_view; 268 vk::ImageView stencil_view;
269 vk::ImageView color_view; 269 vk::ImageView color_view;
270 vk::Image null_image;
270 VkImage image_handle = VK_NULL_HANDLE; 271 VkImage image_handle = VK_NULL_HANDLE;
271 VkImageView render_target = VK_NULL_HANDLE; 272 VkImageView render_target = VK_NULL_HANDLE;
272 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 273 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 9df6a2903..3ffa2a661 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -138,6 +138,10 @@ public:
138 return Iterator(this, SlotId{SlotId::INVALID_INDEX}); 138 return Iterator(this, SlotId{SlotId::INVALID_INDEX});
139 } 139 }
140 140
141 [[nodiscard]] size_t size() const noexcept {
142 return values_capacity - free_list.size();
143 }
144
141private: 145private:
142 struct NonTrivialDummy { 146 struct NonTrivialDummy {
143 NonTrivialDummy() noexcept {} 147 NonTrivialDummy() noexcept {}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 4ffd122fc..188ceeed7 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -899,7 +899,8 @@ bool Device::ShouldBoostClocks() const {
899 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || 899 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA ||
900 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; 900 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP;
901 901
902 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; 902 const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) ||
903 (vendor_id == 0x1002 && device_id == 0x1435);
903 904
904 const bool is_debugging = this->HasDebuggingToolAttached(); 905 const bool is_debugging = this->HasDebuggingToolAttached();
905 906
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 0487cd3b6..a0c70797f 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -1101,6 +1101,10 @@ public:
1101 return &handle; 1101 return &handle;
1102 } 1102 }
1103 1103
1104 VkCommandBuffer operator*() const noexcept {
1105 return handle;
1106 }
1107
1104 void Begin(const VkCommandBufferBeginInfo& begin_info) const { 1108 void Begin(const VkCommandBufferBeginInfo& begin_info) const {
1105 Check(dld->vkBeginCommandBuffer(handle, &begin_info)); 1109 Check(dld->vkBeginCommandBuffer(handle, &begin_info));
1106 } 1110 }