diff options
Diffstat (limited to 'src/video_core')
21 files changed, 312 insertions, 114 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b65b9f2a2..c22c7631c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -15,6 +15,7 @@ add_library(video_core STATIC | |||
| 15 | buffer_cache/buffer_cache.cpp | 15 | buffer_cache/buffer_cache.cpp |
| 16 | buffer_cache/buffer_cache.h | 16 | buffer_cache/buffer_cache.h |
| 17 | buffer_cache/memory_tracker_base.h | 17 | buffer_cache/memory_tracker_base.h |
| 18 | buffer_cache/usage_tracker.h | ||
| 18 | buffer_cache/word_manager.h | 19 | buffer_cache/word_manager.h |
| 19 | cache_types.h | 20 | cache_types.h |
| 20 | cdma_pusher.cpp | 21 | cdma_pusher.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f5b10411b..6d1fc3887 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() { | |||
| 67 | if (!channel_state) { | 67 | if (!channel_state) { |
| 68 | return; | 68 | return; |
| 69 | } | 69 | } |
| 70 | runtime.TickFrame(slot_buffers); | ||
| 70 | 71 | ||
| 71 | // Calculate hits and shots and move hit bits to the right | 72 | // Calculate hits and shots and move hit bits to the right |
| 72 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), | 73 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), |
| @@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 230 | for (const IntervalType& add_interval : tmp_intervals) { | 231 | for (const IntervalType& add_interval : tmp_intervals) { |
| 231 | common_ranges.add(add_interval); | 232 | common_ranges.add(add_interval); |
| 232 | } | 233 | } |
| 233 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 234 | const auto& copy = copies[0]; |
| 235 | src_buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 236 | dest_buffer.MarkUsage(copy.dst_offset, copy.size); | ||
| 237 | runtime.CopyBuffer(dest_buffer, src_buffer, copies, true); | ||
| 234 | if (has_new_downloads) { | 238 | if (has_new_downloads) { |
| 235 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 236 | } | 240 | } |
| @@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 258 | common_ranges.subtract(subtract_interval); | 262 | common_ranges.subtract(subtract_interval); |
| 259 | 263 | ||
| 260 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | 264 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); |
| 261 | auto& dest_buffer = slot_buffers[buffer]; | 265 | Buffer& dest_buffer = slot_buffers[buffer]; |
| 262 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); | 266 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); |
| 263 | runtime.ClearBuffer(dest_buffer, offset, size, value); | 267 | runtime.ClearBuffer(dest_buffer, offset, size, value); |
| 268 | dest_buffer.MarkUsage(offset, size); | ||
| 264 | return true; | 269 | return true; |
| 265 | } | 270 | } |
| 266 | 271 | ||
| @@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 603 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 604 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 605 | async_downloads += std::make_pair(base_interval, 1); | 610 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 606 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 607 | normalized_copies.push_back(second_copy); | 613 | normalized_copies.push_back(second_copy); |
| 608 | } | 614 | } |
| @@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 621 | // Have in mind the staging buffer offset for the copy | 627 | // Have in mind the staging buffer offset for the copy |
| 622 | copy.dst_offset += download_staging.offset; | 628 | copy.dst_offset += download_staging.offset; |
| 623 | const std::array copies{copy}; | 629 | const std::array copies{copy}; |
| 624 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | 630 | Buffer& buffer = slot_buffers[buffer_id]; |
| 625 | false); | 631 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 632 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 626 | } | 633 | } |
| 627 | runtime.PostCopyBarrier(); | 634 | runtime.PostCopyBarrier(); |
| 628 | runtime.Finish(); | 635 | runtime.Finish(); |
| @@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 742 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; | 749 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; |
| 743 | std::memcpy(upload_staging.mapped_span.data(), | 750 | std::memcpy(upload_staging.mapped_span.data(), |
| 744 | draw_state.inline_index_draw_indexes.data(), size); | 751 | draw_state.inline_index_draw_indexes.data(), size); |
| 745 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 752 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); |
| 746 | } else { | 753 | } else { |
| 747 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 748 | } | 755 | } |
| @@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); | 761 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); |
| 755 | runtime.BindIndexBuffer(buffer, new_offset, size); | 762 | runtime.BindIndexBuffer(buffer, new_offset, size); |
| 756 | } else { | 763 | } else { |
| 764 | buffer.MarkUsage(offset, size); | ||
| 757 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, | 765 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, |
| 758 | draw_state.index_buffer.first, draw_state.index_buffer.count, | 766 | draw_state.index_buffer.first, draw_state.index_buffer.count, |
| 759 | buffer, offset, size); | 767 | buffer, offset, size); |
| @@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 790 | 798 | ||
| 791 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 792 | const u32 offset = buffer.Offset(binding.cpu_addr); | 800 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | ||
| 793 | 802 | ||
| 794 | host_bindings.buffers.push_back(&buffer); | 803 | host_bindings.buffers.push_back(&buffer); |
| 795 | host_bindings.offsets.push_back(offset); | 804 | host_bindings.offsets.push_back(offset); |
| @@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 895 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 904 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 896 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 905 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 897 | } | 906 | } |
| 907 | buffer.MarkUsage(offset, size); | ||
| 898 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 908 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 899 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); | 909 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 900 | } else { | 910 | } else { |
| @@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 913 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 914 | 924 | ||
| 915 | const u32 offset = buffer.Offset(binding.cpu_addr); | 925 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 926 | buffer.MarkUsage(offset, size); | ||
| 916 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 917 | 928 | ||
| 918 | if (is_written) { | 929 | if (is_written) { |
| @@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 943 | 954 | ||
| 944 | const u32 offset = buffer.Offset(binding.cpu_addr); | 955 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 945 | const PixelFormat format = binding.format; | 956 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | ||
| 946 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 947 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { | 959 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { |
| 948 | runtime.BindImageBuffer(buffer, offset, size, format); | 960 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 975 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); |
| 976 | 988 | ||
| 977 | const u32 offset = buffer.Offset(binding.cpu_addr); | 989 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 990 | buffer.MarkUsage(offset, size); | ||
| 978 | host_bindings.buffers.push_back(&buffer); | 991 | host_bindings.buffers.push_back(&buffer); |
| 979 | host_bindings.offsets.push_back(offset); | 992 | host_bindings.offsets.push_back(offset); |
| 980 | host_bindings.sizes.push_back(binding.size); | 993 | host_bindings.sizes.push_back(size); |
| 981 | } | 994 | } |
| 982 | if (host_bindings.buffers.size() > 0) { | 995 | if (host_bindings.buffers.size() > 0) { |
| 983 | runtime.BindTransformFeedbackBuffers(host_bindings); | 996 | runtime.BindTransformFeedbackBuffers(host_bindings); |
| @@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1001 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1002 | 1015 | ||
| 1003 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1017 | buffer.MarkUsage(offset, size); | ||
| 1004 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1005 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| 1006 | ++binding_index; | 1020 | ++binding_index; |
| @@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1021 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1022 | 1036 | ||
| 1023 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1038 | buffer.MarkUsage(offset, size); | ||
| 1024 | const bool is_written = | 1039 | const bool is_written = |
| 1025 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1026 | 1041 | ||
| @@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1053 | 1068 | ||
| 1054 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1055 | const PixelFormat format = binding.format; | 1070 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | ||
| 1056 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 1057 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { | 1073 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { |
| 1058 | runtime.BindImageBuffer(buffer, offset, size, format); | 1074 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1172 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1173 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1174 | } | 1190 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 1175 | channel_state->vertex_buffers[index] = Binding{ | 1192 | channel_state->vertex_buffers[index] = Binding{ |
| 1176 | .cpu_addr = *cpu_addr, | 1193 | .cpu_addr = *cpu_addr, |
| 1177 | .size = size, | 1194 | .size = size, |
| 1178 | .buffer_id = FindBuffer(*cpu_addr, size), | 1195 | .buffer_id = buffer_id, |
| 1179 | }; | 1196 | }; |
| 1180 | } | 1197 | } |
| 1181 | 1198 | ||
| @@ -1401,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1401 | .dst_offset = dst_base_offset, | 1418 | .dst_offset = dst_base_offset, |
| 1402 | .size = overlap.SizeBytes(), | 1419 | .size = overlap.SizeBytes(), |
| 1403 | }); | 1420 | }); |
| 1404 | runtime.CopyBuffer(new_buffer, overlap, copies); | 1421 | new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); |
| 1422 | runtime.CopyBuffer(new_buffer, overlap, copies, true); | ||
| 1405 | DeleteBuffer(overlap_id, true); | 1423 | DeleteBuffer(overlap_id, true); |
| 1406 | } | 1424 | } |
| 1407 | 1425 | ||
| @@ -1414,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1414 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1415 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1416 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1417 | runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); | 1435 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | ||
| 1437 | new_buffer.MarkUsage(0, size_bytes); | ||
| 1418 | for (const BufferId overlap_id : overlap.ids) { | 1438 | for (const BufferId overlap_id : overlap.ids) { |
| 1419 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1439 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1420 | } | 1440 | } |
| @@ -1467,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1467 | 1487 | ||
| 1468 | template <class P> | 1488 | template <class P> |
| 1469 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1470 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | ||
| 1471 | } | ||
| 1472 | |||
| 1473 | template <class P> | ||
| 1474 | bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1475 | boost::container::small_vector<BufferCopy, 4> copies; | 1490 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1476 | u64 total_size_bytes = 0; | 1491 | u64 total_size_bytes = 0; |
| 1477 | u64 largest_copy = 0; | 1492 | u64 largest_copy = 0; |
| @@ -1494,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1494 | } | 1509 | } |
| 1495 | 1510 | ||
| 1496 | template <class P> | 1511 | template <class P> |
| 1497 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1498 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1499 | u64 total_size_bytes = 0; | ||
| 1500 | u64 largest_copy = 0; | ||
| 1501 | IntervalSet found_sets{}; | ||
| 1502 | auto make_copies = [&] { | ||
| 1503 | for (auto& interval : found_sets) { | ||
| 1504 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1505 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1506 | copies.push_back(BufferCopy{ | ||
| 1507 | .src_offset = total_size_bytes, | ||
| 1508 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1509 | .size = sub_size, | ||
| 1510 | }); | ||
| 1511 | total_size_bytes += sub_size; | ||
| 1512 | largest_copy = std::max<u64>(largest_copy, sub_size); | ||
| 1513 | } | ||
| 1514 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1515 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1516 | }; | ||
| 1517 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1518 | const VAddr base_adr = cpu_addr_out; | ||
| 1519 | const VAddr end_adr = base_adr + range_size; | ||
| 1520 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1521 | found_sets.add(add_interval); | ||
| 1522 | }); | ||
| 1523 | if (found_sets.empty()) { | ||
| 1524 | return true; | ||
| 1525 | } | ||
| 1526 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1527 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1528 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1529 | if (it == common_ranges.end()) { | ||
| 1530 | make_copies(); | ||
| 1531 | return false; | ||
| 1532 | } | ||
| 1533 | while (it != it_end) { | ||
| 1534 | found_sets.subtract(*it); | ||
| 1535 | it++; | ||
| 1536 | } | ||
| 1537 | make_copies(); | ||
| 1538 | return false; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | template <class P> | ||
| 1542 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1512 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1543 | std::span<BufferCopy> copies) { | 1513 | std::span<BufferCopy> copies) { |
| 1544 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { | 1514 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| @@ -1586,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1586 | // Apply the staging offset | 1556 | // Apply the staging offset |
| 1587 | copy.src_offset += upload_staging.offset; | 1557 | copy.src_offset += upload_staging.offset; |
| 1588 | } | 1558 | } |
| 1589 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1559 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1560 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1590 | } | 1561 | } |
| 1591 | } | 1562 | } |
| 1592 | 1563 | ||
| @@ -1628,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_ | |||
| 1628 | }}; | 1599 | }}; |
| 1629 | u8* const src_pointer = upload_staging.mapped_span.data(); | 1600 | u8* const src_pointer = upload_staging.mapped_span.data(); |
| 1630 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | 1601 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); |
| 1631 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1602 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1603 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1632 | } else { | 1604 | } else { |
| 1633 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); | 1605 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); |
| 1634 | } | 1606 | } |
| @@ -1681,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1681 | for (BufferCopy& copy : copies) { | 1653 | for (BufferCopy& copy : copies) { |
| 1682 | // Modify copies to have the staging offset in mind | 1654 | // Modify copies to have the staging offset in mind |
| 1683 | copy.dst_offset += download_staging.offset; | 1655 | copy.dst_offset += download_staging.offset; |
| 1656 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 1684 | } | 1657 | } |
| 1685 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1686 | runtime.Finish(); | 1659 | runtime.Finish(); |
| 1687 | for (const BufferCopy& copy : copies) { | 1660 | for (const BufferCopy& copy : copies) { |
| 1688 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| @@ -1780,15 +1753,25 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1780 | const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); | 1753 | const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); |
| 1781 | return std::min(memory_layout_size, static_cast<u32>(8_MiB)); | 1754 | return std::min(memory_layout_size, static_cast<u32>(8_MiB)); |
| 1782 | }(); | 1755 | }(); |
| 1783 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1756 | // Alignment only applies to the offset of the buffer |
| 1784 | if (!cpu_addr || size == 0) { | 1757 | const u32 alignment = runtime.GetStorageBufferAlignment(); |
| 1758 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); | ||
| 1759 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; | ||
| 1760 | |||
| 1761 | const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | ||
| 1762 | if (!aligned_cpu_addr || size == 0) { | ||
| 1785 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1763 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 1786 | return NULL_BINDING; | 1764 | return NULL_BINDING; |
| 1787 | } | 1765 | } |
| 1788 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); | 1766 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1767 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", | ||
| 1768 | cbuf_index); | ||
| 1769 | // The end address used for size calculation does not need to be aligned | ||
| 1770 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | ||
| 1771 | |||
| 1789 | const Binding binding{ | 1772 | const Binding binding{ |
| 1790 | .cpu_addr = *cpu_addr, | 1773 | .cpu_addr = *aligned_cpu_addr, |
| 1791 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), | 1774 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), |
| 1792 | .buffer_id = BufferId{}, | 1775 | .buffer_id = BufferId{}, |
| 1793 | }; | 1776 | }; |
| 1794 | return binding; | 1777 | return binding; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index eed267361..d6d696d8c 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -529,10 +529,6 @@ private: | |||
| 529 | 529 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 531 | 531 | ||
| 532 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 533 | |||
| 534 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 535 | |||
| 536 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 537 | std::span<BufferCopy> copies); | 533 | std::span<BufferCopy> copies); |
| 538 | 534 | ||
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h new file mode 100644 index 000000000..ab05fe415 --- /dev/null +++ b/src/video_core/buffer_cache/usage_tracker.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/alignment.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | class UsageTracker { | ||
| 12 | static constexpr size_t BYTES_PER_BIT_SHIFT = 6; | ||
| 13 | static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT; | ||
| 14 | static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT; | ||
| 15 | |||
| 16 | public: | ||
| 17 | explicit UsageTracker(size_t size) { | ||
| 18 | const size_t num_pages = (size >> PAGE_SHIFT) + 1; | ||
| 19 | pages.resize(num_pages, 0ULL); | ||
| 20 | } | ||
| 21 | |||
| 22 | void Reset() noexcept { | ||
| 23 | std::ranges::fill(pages, 0ULL); | ||
| 24 | } | ||
| 25 | |||
| 26 | void Track(u64 offset, u64 size) noexcept { | ||
| 27 | const size_t page = offset >> PAGE_SHIFT; | ||
| 28 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 29 | TrackPage(page, offset, size); | ||
| 30 | if (page == page_end) { | ||
| 31 | return; | ||
| 32 | } | ||
| 33 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 34 | pages[i] = ~u64{0}; | ||
| 35 | } | ||
| 36 | const size_t offset_end = offset + size; | ||
| 37 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 38 | TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 39 | } | ||
| 40 | |||
| 41 | [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept { | ||
| 42 | const size_t page = offset >> PAGE_SHIFT; | ||
| 43 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 44 | if (IsPageUsed(page, offset, size)) { | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 48 | if (pages[i] != 0) { | ||
| 49 | return true; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | const size_t offset_end = offset + size; | ||
| 53 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 54 | return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | void TrackPage(u64 page, u64 offset, u64 size) noexcept { | ||
| 59 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 60 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 61 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 62 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 63 | pages[page] |= (~u64{0} & mask) << first_bit; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept { | ||
| 67 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 68 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 69 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 70 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 71 | const size_t mask2 = (~u64{0} & mask) << first_bit; | ||
| 72 | return (pages[page] & mask2) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | std::vector<u64> pages; | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 38d553d3c..dfd696de6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | |||
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | 180 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, |
| 181 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | 181 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 182 | bool) { | ||
| 182 | CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); | 183 | CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); |
| 183 | } | 184 | } |
| 184 | 185 | ||
| 185 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 186 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 186 | std::span<const VideoCommon::BufferCopy> copies) { | 187 | std::span<const VideoCommon::BufferCopy> copies, bool) { |
| 187 | CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); | 188 | CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true); |
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | void BufferCacheRuntime::PreCopyBarrier() { | 191 | void BufferCacheRuntime::PreCopyBarrier() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 41b746f3b..000f29a82 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -30,6 +30,8 @@ public: | |||
| 30 | 30 | ||
| 31 | void MakeResident(GLenum access) noexcept; | 31 | void MakeResident(GLenum access) noexcept; |
| 32 | 32 | ||
| 33 | void MarkUsage(u64 offset, u64 size) {} | ||
| 34 | |||
| 33 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | 35 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); |
| 34 | 36 | ||
| 35 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | 37 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| @@ -66,22 +68,29 @@ public: | |||
| 66 | 68 | ||
| 67 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); |
| 68 | 70 | ||
| 71 | bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 69 | void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, | 75 | void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, |
| 70 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 76 | std::span<const VideoCommon::BufferCopy> copies, bool barrier); |
| 71 | 77 | ||
| 72 | void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | 78 | void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, |
| 73 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 79 | std::span<const VideoCommon::BufferCopy> copies, bool barrier); |
| 74 | 80 | ||
| 75 | void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | 81 | void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, |
| 76 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 82 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 83 | bool can_reorder_upload = false); | ||
| 77 | 84 | ||
| 78 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 85 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 79 | std::span<const VideoCommon::BufferCopy> copies); | 86 | std::span<const VideoCommon::BufferCopy> copies, bool); |
| 80 | 87 | ||
| 81 | void PreCopyBarrier(); | 88 | void PreCopyBarrier(); |
| 82 | void PostCopyBarrier(); | 89 | void PostCopyBarrier(); |
| 83 | void Finish(); | 90 | void Finish(); |
| 84 | 91 | ||
| 92 | void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {} | ||
| 93 | |||
| 85 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); | 94 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); |
| 86 | 95 | ||
| 87 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | 96 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); |
| @@ -182,6 +191,10 @@ public: | |||
| 182 | return device.CanReportMemoryUsage(); | 191 | return device.CanReportMemoryUsage(); |
| 183 | } | 192 | } |
| 184 | 193 | ||
| 194 | u32 GetStorageBufferAlignment() const { | ||
| 195 | return static_cast<u32>(device.GetShaderStorageBufferAlignment()); | ||
| 196 | } | ||
| 197 | |||
| 185 | private: | 198 | private: |
| 186 | static constexpr std::array PABO_LUT{ | 199 | static constexpr std::array PABO_LUT{ |
| 187 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 200 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2888e0238..26f2d0ea7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 232 | .has_gl_bool_ref_bug = device.HasBoolRefBug(), | 232 | .has_gl_bool_ref_bug = device.HasBoolRefBug(), |
| 233 | .ignore_nan_fp_comparisons = true, | 233 | .ignore_nan_fp_comparisons = true, |
| 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |
| 235 | .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), | ||
| 235 | }, | 236 | }, |
| 236 | host_info{ | 237 | host_info{ |
| 237 | .support_float64 = true, | 238 | .support_float64 = true, |
| @@ -240,6 +241,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 240 | .needs_demote_reorder = device.IsAmd(), | 241 | .needs_demote_reorder = device.IsAmd(), |
| 241 | .support_snorm_render_buffer = false, | 242 | .support_snorm_render_buffer = false, |
| 242 | .support_viewport_index_layer = device.HasVertexViewportLayer(), | 243 | .support_viewport_index_layer = device.HasVertexViewportLayer(), |
| 244 | .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), | ||
| 243 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), | 245 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), |
| 244 | .support_conditional_barrier = device.SupportsConditionalBarriers(), | 246 | .support_conditional_barrier = device.SupportsConditionalBarriers(), |
| 245 | } { | 247 | } { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8148e89a..5958f52f7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 79 | } // Anonymous namespace | 79 | } // Anonymous namespace |
| 80 | 80 | ||
| 81 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | 81 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {} |
| 83 | 83 | ||
| 84 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 84 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 85 | VAddr cpu_addr_, u64 size_bytes_) | 85 | VAddr cpu_addr_, u64 size_bytes_) |
| 86 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | 86 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), |
| 87 | device{&runtime.device}, buffer{ | 87 | device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, |
| 88 | CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { | 88 | tracker{SizeBytes()} { |
| 89 | if (runtime.device.HasDebuggingToolAttached()) { | 89 | if (runtime.device.HasDebuggingToolAttached()) { |
| 90 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 90 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 91 | } | 91 | } |
| @@ -355,12 +355,35 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { | |||
| 355 | return device.CanReportMemoryUsage(); | 355 | return device.CanReportMemoryUsage(); |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | u32 BufferCacheRuntime::GetStorageBufferAlignment() const { | ||
| 359 | return static_cast<u32>(device.GetStorageBufferAlignment()); | ||
| 360 | } | ||
| 361 | |||
| 362 | void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept { | ||
| 363 | for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { | ||
| 364 | it->ResetUsageTracking(); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 358 | void BufferCacheRuntime::Finish() { | 368 | void BufferCacheRuntime::Finish() { |
| 359 | scheduler.Finish(); | 369 | scheduler.Finish(); |
| 360 | } | 370 | } |
| 361 | 371 | ||
| 372 | bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer, | ||
| 373 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 374 | if (Settings::values.disable_buffer_reorder) { | ||
| 375 | return false; | ||
| 376 | } | ||
| 377 | const bool can_use_upload_cmdbuf = | ||
| 378 | std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) { | ||
| 379 | return !buffer.IsRegionUsed(copy.dst_offset, copy.size); | ||
| 380 | }); | ||
| 381 | return can_use_upload_cmdbuf; | ||
| 382 | } | ||
| 383 | |||
| 362 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | 384 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 363 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | 385 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 386 | bool can_reorder_upload) { | ||
| 364 | if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { | 387 | if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { |
| 365 | return; | 388 | return; |
| 366 | } | 389 | } |
| @@ -376,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 376 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 399 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 377 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | 400 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 378 | }; | 401 | }; |
| 402 | |||
| 379 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up | 403 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 380 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); | 404 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); |
| 381 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | 405 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 406 | if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) { | ||
| 407 | scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies]( | ||
| 408 | vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { | ||
| 409 | upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 410 | }); | ||
| 411 | return; | ||
| 412 | } | ||
| 413 | |||
| 382 | scheduler.RequestOutsideRenderPassOperationContext(); | 414 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 383 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { | 415 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { |
| 384 | if (barrier) { | 416 | if (barrier) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 95446c732..0b3fbd6d0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include "video_core/buffer_cache/buffer_cache_base.h" | 6 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 7 | #include "video_core/buffer_cache/memory_tracker_base.h" | 7 | #include "video_core/buffer_cache/memory_tracker_base.h" |
| 8 | #include "video_core/buffer_cache/usage_tracker.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 10 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -34,6 +35,18 @@ public: | |||
| 34 | return *buffer; | 35 | return *buffer; |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 38 | [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept { | ||
| 39 | return tracker.IsUsed(offset, size); | ||
| 40 | } | ||
| 41 | |||
| 42 | void MarkUsage(u64 offset, u64 size) noexcept { | ||
| 43 | tracker.Track(offset, size); | ||
| 44 | } | ||
| 45 | |||
| 46 | void ResetUsageTracking() noexcept { | ||
| 47 | tracker.Reset(); | ||
| 48 | } | ||
| 49 | |||
| 37 | operator VkBuffer() const noexcept { | 50 | operator VkBuffer() const noexcept { |
| 38 | return *buffer; | 51 | return *buffer; |
| 39 | } | 52 | } |
| @@ -49,6 +62,7 @@ private: | |||
| 49 | const Device* device{}; | 62 | const Device* device{}; |
| 50 | vk::Buffer buffer; | 63 | vk::Buffer buffer; |
| 51 | std::vector<BufferView> views; | 64 | std::vector<BufferView> views; |
| 65 | VideoCommon::UsageTracker tracker; | ||
| 52 | }; | 66 | }; |
| 53 | 67 | ||
| 54 | class QuadArrayIndexBuffer; | 68 | class QuadArrayIndexBuffer; |
| @@ -67,6 +81,8 @@ public: | |||
| 67 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 81 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 68 | DescriptorPool& descriptor_pool); | 82 | DescriptorPool& descriptor_pool); |
| 69 | 83 | ||
| 84 | void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept; | ||
| 85 | |||
| 70 | void Finish(); | 86 | void Finish(); |
| 71 | 87 | ||
| 72 | u64 GetDeviceLocalMemory() const; | 88 | u64 GetDeviceLocalMemory() const; |
| @@ -75,16 +91,21 @@ public: | |||
| 75 | 91 | ||
| 76 | bool CanReportMemoryUsage() const; | 92 | bool CanReportMemoryUsage() const; |
| 77 | 93 | ||
| 94 | u32 GetStorageBufferAlignment() const; | ||
| 95 | |||
| 78 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 96 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 79 | 97 | ||
| 80 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); | 98 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
| 81 | 99 | ||
| 100 | bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies); | ||
| 101 | |||
| 82 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); | 102 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); |
| 83 | 103 | ||
| 84 | void PreCopyBarrier(); | 104 | void PreCopyBarrier(); |
| 85 | 105 | ||
| 86 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | 106 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, |
| 87 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 107 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 108 | bool can_reorder_upload = false); | ||
| 88 | 109 | ||
| 89 | void PostCopyBarrier(); | 110 | void PostCopyBarrier(); |
| 90 | 111 | ||
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 6b288b994..ac8b6e838 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp | |||
| @@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) { | |||
| 100 | Refresh(); | 100 | Refresh(); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 103 | VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 104 | VkSemaphore wait_semaphore, u64 host_tick) { | 104 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, |
| 105 | u64 host_tick) { | ||
| 105 | if (semaphore) { | 106 | if (semaphore) { |
| 106 | return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); | 107 | return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, |
| 108 | host_tick); | ||
| 107 | } else { | 109 | } else { |
| 108 | return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); | 110 | return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick); |
| 109 | } | 111 | } |
| 110 | } | 112 | } |
| 111 | 113 | ||
| @@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ | |||
| 115 | }; | 117 | }; |
| 116 | 118 | ||
| 117 | VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | 119 | VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, |
| 120 | vk::CommandBuffer& upload_cmdbuf, | ||
| 118 | VkSemaphore signal_semaphore, | 121 | VkSemaphore signal_semaphore, |
| 119 | VkSemaphore wait_semaphore, u64 host_tick) { | 122 | VkSemaphore wait_semaphore, u64 host_tick) { |
| 120 | const VkSemaphore timeline_semaphore = *semaphore; | 123 | const VkSemaphore timeline_semaphore = *semaphore; |
| @@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 123 | const std::array signal_values{host_tick, u64(0)}; | 126 | const std::array signal_values{host_tick, u64(0)}; |
| 124 | const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; | 127 | const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; |
| 125 | 128 | ||
| 129 | const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; | ||
| 130 | |||
| 126 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; | 131 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; |
| 127 | const VkTimelineSemaphoreSubmitInfo timeline_si{ | 132 | const VkTimelineSemaphoreSubmitInfo timeline_si{ |
| 128 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, | 133 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, |
| @@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 138 | .waitSemaphoreCount = num_wait_semaphores, | 143 | .waitSemaphoreCount = num_wait_semaphores, |
| 139 | .pWaitSemaphores = &wait_semaphore, | 144 | .pWaitSemaphores = &wait_semaphore, |
| 140 | .pWaitDstStageMask = wait_stage_masks.data(), | 145 | .pWaitDstStageMask = wait_stage_masks.data(), |
| 141 | .commandBufferCount = 1, | 146 | .commandBufferCount = static_cast<u32>(cmdbuffers.size()), |
| 142 | .pCommandBuffers = cmdbuf.address(), | 147 | .pCommandBuffers = cmdbuffers.data(), |
| 143 | .signalSemaphoreCount = num_signal_semaphores, | 148 | .signalSemaphoreCount = num_signal_semaphores, |
| 144 | .pSignalSemaphores = signal_semaphores.data(), | 149 | .pSignalSemaphores = signal_semaphores.data(), |
| 145 | }; | 150 | }; |
| @@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 147 | return device.GetGraphicsQueue().Submit(submit_info); | 152 | return device.GetGraphicsQueue().Submit(submit_info); |
| 148 | } | 153 | } |
| 149 | 154 | ||
| 150 | VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 155 | VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, |
| 151 | VkSemaphore wait_semaphore, u64 host_tick) { | 156 | vk::CommandBuffer& upload_cmdbuf, |
| 157 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, | ||
| 158 | u64 host_tick) { | ||
| 152 | const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; | 159 | const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; |
| 153 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; | 160 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; |
| 154 | 161 | ||
| 162 | const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; | ||
| 163 | |||
| 155 | const VkSubmitInfo submit_info{ | 164 | const VkSubmitInfo submit_info{ |
| 156 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | 165 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, |
| 157 | .pNext = nullptr, | 166 | .pNext = nullptr, |
| 158 | .waitSemaphoreCount = num_wait_semaphores, | 167 | .waitSemaphoreCount = num_wait_semaphores, |
| 159 | .pWaitSemaphores = &wait_semaphore, | 168 | .pWaitSemaphores = &wait_semaphore, |
| 160 | .pWaitDstStageMask = wait_stage_masks.data(), | 169 | .pWaitDstStageMask = wait_stage_masks.data(), |
| 161 | .commandBufferCount = 1, | 170 | .commandBufferCount = static_cast<u32>(cmdbuffers.size()), |
| 162 | .pCommandBuffers = cmdbuf.address(), | 171 | .pCommandBuffers = cmdbuffers.data(), |
| 163 | .signalSemaphoreCount = num_signal_semaphores, | 172 | .signalSemaphoreCount = num_signal_semaphores, |
| 164 | .pSignalSemaphores = &signal_semaphore, | 173 | .pSignalSemaphores = &signal_semaphore, |
| 165 | }; | 174 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 3f599d7bd..7dfb93ffb 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -52,14 +52,16 @@ public: | |||
| 52 | void Wait(u64 tick); | 52 | void Wait(u64 tick); |
| 53 | 53 | ||
| 54 | /// Submits the device graphics queue, updating the tick as necessary | 54 | /// Submits the device graphics queue, updating the tick as necessary |
| 55 | VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 55 | VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 56 | VkSemaphore wait_semaphore, u64 host_tick); | 56 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick); |
| 57 | 57 | ||
| 58 | private: | 58 | private: |
| 59 | VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 59 | VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 60 | VkSemaphore wait_semaphore, u64 host_tick); | 60 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, |
| 61 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 61 | u64 host_tick); |
| 62 | VkSemaphore wait_semaphore, u64 host_tick); | 62 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 63 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, | ||
| 64 | u64 host_tick); | ||
| 63 | 65 | ||
| 64 | void WaitThread(std::stop_token token); | 66 | void WaitThread(std::stop_token token); |
| 65 | 67 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 89b455bff..2a13b2a72 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -373,6 +373,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 373 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | 373 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |
| 374 | .has_broken_robust = | 374 | .has_broken_robust = |
| 375 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, | 375 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, |
| 376 | .min_ssbo_alignment = device.GetStorageBufferAlignment(), | ||
| 376 | }; | 377 | }; |
| 377 | 378 | ||
| 378 | host_info = Shader::HostTranslateInfo{ | 379 | host_info = Shader::HostTranslateInfo{ |
| @@ -383,6 +384,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 383 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, | 384 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, |
| 384 | .support_snorm_render_buffer = true, | 385 | .support_snorm_render_buffer = true, |
| 385 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), | 386 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), |
| 387 | .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), | ||
| 386 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 388 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
| 387 | .support_conditional_barrier = device.SupportsConditionalBarriers(), | 389 | .support_conditional_barrier = device.SupportsConditionalBarriers(), |
| 388 | }; | 390 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3be7837f4..146923db4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -22,11 +22,12 @@ namespace Vulkan { | |||
| 22 | 22 | ||
| 23 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); | 23 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); |
| 24 | 24 | ||
| 25 | void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { | 25 | void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, |
| 26 | vk::CommandBuffer upload_cmdbuf) { | ||
| 26 | auto command = first; | 27 | auto command = first; |
| 27 | while (command != nullptr) { | 28 | while (command != nullptr) { |
| 28 | auto next = command->GetNext(); | 29 | auto next = command->GetNext(); |
| 29 | command->Execute(cmdbuf); | 30 | command->Execute(cmdbuf, upload_cmdbuf); |
| 30 | command->~Command(); | 31 | command->~Command(); |
| 31 | command = next; | 32 | command = next; |
| 32 | } | 33 | } |
| @@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) { | |||
| 180 | // Perform the work, tracking whether the chunk was a submission | 181 | // Perform the work, tracking whether the chunk was a submission |
| 181 | // before executing. | 182 | // before executing. |
| 182 | const bool has_submit = work->HasSubmit(); | 183 | const bool has_submit = work->HasSubmit(); |
| 183 | work->ExecuteAll(current_cmdbuf); | 184 | work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf); |
| 184 | 185 | ||
| 185 | // If the chunk was a submission, reallocate the command buffer. | 186 | // If the chunk was a submission, reallocate the command buffer. |
| 186 | if (has_submit) { | 187 | if (has_submit) { |
| @@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() { | |||
| 205 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | 206 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, |
| 206 | .pInheritanceInfo = nullptr, | 207 | .pInheritanceInfo = nullptr, |
| 207 | }); | 208 | }); |
| 209 | current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 210 | current_upload_cmdbuf.Begin({ | ||
| 211 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 212 | .pNext = nullptr, | ||
| 213 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 214 | .pInheritanceInfo = nullptr, | ||
| 215 | }); | ||
| 208 | } | 216 | } |
| 209 | 217 | ||
| 210 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 218 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| @@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se | |||
| 212 | InvalidateState(); | 220 | InvalidateState(); |
| 213 | 221 | ||
| 214 | const u64 signal_value = master_semaphore->NextTick(); | 222 | const u64 signal_value = master_semaphore->NextTick(); |
| 215 | Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { | 223 | RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value, |
| 224 | this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) { | ||
| 225 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 226 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 227 | .pNext = nullptr, | ||
| 228 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 229 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 230 | }; | ||
| 231 | upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 232 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 233 | upload_cmdbuf.End(); | ||
| 216 | cmdbuf.End(); | 234 | cmdbuf.End(); |
| 217 | 235 | ||
| 218 | if (on_submit) { | 236 | if (on_submit) { |
| @@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se | |||
| 221 | 239 | ||
| 222 | std::scoped_lock lock{submit_mutex}; | 240 | std::scoped_lock lock{submit_mutex}; |
| 223 | switch (const VkResult result = master_semaphore->SubmitQueue( | 241 | switch (const VkResult result = master_semaphore->SubmitQueue( |
| 224 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { | 242 | cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { |
| 225 | case VK_SUCCESS: | 243 | case VK_SUCCESS: |
| 226 | break; | 244 | break; |
| 227 | case VK_ERROR_DEVICE_LOST: | 245 | case VK_ERROR_DEVICE_LOST: |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index da03803aa..f8d8ca80a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -80,7 +80,8 @@ public: | |||
| 80 | 80 | ||
| 81 | /// Send work to a separate thread. | 81 | /// Send work to a separate thread. |
| 82 | template <typename T> | 82 | template <typename T> |
| 83 | void Record(T&& command) { | 83 | requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer> |
| 84 | void RecordWithUploadBuffer(T&& command) { | ||
| 84 | if (chunk->Record(command)) { | 85 | if (chunk->Record(command)) { |
| 85 | return; | 86 | return; |
| 86 | } | 87 | } |
| @@ -88,6 +89,15 @@ public: | |||
| 88 | (void)chunk->Record(command); | 89 | (void)chunk->Record(command); |
| 89 | } | 90 | } |
| 90 | 91 | ||
| 92 | template <typename T> | ||
| 93 | requires std::is_invocable_v<T, vk::CommandBuffer> | ||
| 94 | void Record(T&& c) { | ||
| 95 | this->RecordWithUploadBuffer( | ||
| 96 | [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { | ||
| 97 | command(cmdbuf); | ||
| 98 | }); | ||
| 99 | } | ||
| 100 | |||
| 91 | /// Returns the current command buffer tick. | 101 | /// Returns the current command buffer tick. |
| 92 | [[nodiscard]] u64 CurrentTick() const noexcept { | 102 | [[nodiscard]] u64 CurrentTick() const noexcept { |
| 93 | return master_semaphore->CurrentTick(); | 103 | return master_semaphore->CurrentTick(); |
| @@ -119,7 +129,7 @@ private: | |||
| 119 | public: | 129 | public: |
| 120 | virtual ~Command() = default; | 130 | virtual ~Command() = default; |
| 121 | 131 | ||
| 122 | virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; | 132 | virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0; |
| 123 | 133 | ||
| 124 | Command* GetNext() const { | 134 | Command* GetNext() const { |
| 125 | return next; | 135 | return next; |
| @@ -142,8 +152,8 @@ private: | |||
| 142 | TypedCommand(TypedCommand&&) = delete; | 152 | TypedCommand(TypedCommand&&) = delete; |
| 143 | TypedCommand& operator=(TypedCommand&&) = delete; | 153 | TypedCommand& operator=(TypedCommand&&) = delete; |
| 144 | 154 | ||
| 145 | void Execute(vk::CommandBuffer cmdbuf) const override { | 155 | void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override { |
| 146 | command(cmdbuf); | 156 | command(cmdbuf, upload_cmdbuf); |
| 147 | } | 157 | } |
| 148 | 158 | ||
| 149 | private: | 159 | private: |
| @@ -152,7 +162,7 @@ private: | |||
| 152 | 162 | ||
| 153 | class CommandChunk final { | 163 | class CommandChunk final { |
| 154 | public: | 164 | public: |
| 155 | void ExecuteAll(vk::CommandBuffer cmdbuf); | 165 | void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf); |
| 156 | 166 | ||
| 157 | template <typename T> | 167 | template <typename T> |
| 158 | bool Record(T& command) { | 168 | bool Record(T& command) { |
| @@ -228,6 +238,7 @@ private: | |||
| 228 | VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; | 238 | VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; |
| 229 | 239 | ||
| 230 | vk::CommandBuffer current_cmdbuf; | 240 | vk::CommandBuffer current_cmdbuf; |
| 241 | vk::CommandBuffer current_upload_cmdbuf; | ||
| 231 | 242 | ||
| 232 | std::unique_ptr<CommandChunk> chunk; | 243 | std::unique_ptr<CommandChunk> chunk; |
| 233 | std::function<void()> on_submit; | 244 | std::function<void()> on_submit; |
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 5efd7d66e..70644ea82 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp | |||
| @@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) { | |||
| 672 | UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, | 672 | UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, |
| 673 | VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); | 673 | VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); |
| 674 | 674 | ||
| 675 | scheduler.Record([&](vk::CommandBuffer& cmdbuf) { | 675 | scheduler.Record([&](vk::CommandBuffer cmdbuf) { |
| 676 | for (auto& images : m_dynamic_images) { | 676 | for (auto& images : m_dynamic_images) { |
| 677 | for (size_t i = 0; i < MaxDynamicImage; i++) { | 677 | for (size_t i = 0; i < MaxDynamicImage; i++) { |
| 678 | ClearColorImage(cmdbuf, *images.images[i]); | 678 | ClearColorImage(cmdbuf, *images.images[i]); |
| @@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ | |||
| 707 | UpdateDescriptorSets(source_image_view, image_index); | 707 | UpdateDescriptorSets(source_image_view, image_index); |
| 708 | 708 | ||
| 709 | scheduler.RequestOutsideRenderPassOperationContext(); | 709 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 710 | scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { | 710 | scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { |
| 711 | TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); | 711 | TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); |
| 712 | TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); | 712 | TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); |
| 713 | BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, | 713 | BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d3deb9072..f63a20327 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -36,6 +36,10 @@ public: | |||
| 36 | StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); | 36 | StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); |
| 37 | void FreeDeferred(StagingBufferRef& ref); | 37 | void FreeDeferred(StagingBufferRef& ref); |
| 38 | 38 | ||
| 39 | [[nodiscard]] VkBuffer StreamBuf() const noexcept { | ||
| 40 | return *stream_buffer; | ||
| 41 | } | ||
| 42 | |||
| 39 | void TickFrame(); | 43 | void TickFrame(); |
| 40 | 44 | ||
| 41 | private: | 45 | private: |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index de34f6d49..5dbec2e62 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | |||
| 1785 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, | 1785 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, |
| 1786 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | 1786 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} |
| 1787 | 1787 | ||
| 1788 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) | 1788 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) |
| 1789 | : VideoCommon::ImageViewBase{params} {} | 1789 | : VideoCommon::ImageViewBase{params}, device{&runtime.device} { |
| 1790 | if (device->HasNullDescriptor()) { | ||
| 1791 | return; | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | // Handle fallback for devices without nullDescriptor | ||
| 1795 | ImageInfo info{}; | ||
| 1796 | info.format = PixelFormat::A8B8G8R8_UNORM; | ||
| 1797 | |||
| 1798 | null_image = MakeImage(*device, runtime.memory_allocator, info, {}); | ||
| 1799 | image_handle = *null_image; | ||
| 1800 | for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) { | ||
| 1801 | image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT); | ||
| 1802 | } | ||
| 1803 | } | ||
| 1790 | 1804 | ||
| 1791 | ImageView::~ImageView() = default; | 1805 | ImageView::~ImageView() = default; |
| 1792 | 1806 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 7a0807709..edf5d7635 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -267,6 +267,7 @@ private: | |||
| 267 | vk::ImageView depth_view; | 267 | vk::ImageView depth_view; |
| 268 | vk::ImageView stencil_view; | 268 | vk::ImageView stencil_view; |
| 269 | vk::ImageView color_view; | 269 | vk::ImageView color_view; |
| 270 | vk::Image null_image; | ||
| 270 | VkImage image_handle = VK_NULL_HANDLE; | 271 | VkImage image_handle = VK_NULL_HANDLE; |
| 271 | VkImageView render_target = VK_NULL_HANDLE; | 272 | VkImageView render_target = VK_NULL_HANDLE; |
| 272 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 273 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 9df6a2903..3ffa2a661 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -138,6 +138,10 @@ public: | |||
| 138 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); | 138 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); |
| 139 | } | 139 | } |
| 140 | 140 | ||
| 141 | [[nodiscard]] size_t size() const noexcept { | ||
| 142 | return values_capacity - free_list.size(); | ||
| 143 | } | ||
| 144 | |||
| 141 | private: | 145 | private: |
| 142 | struct NonTrivialDummy { | 146 | struct NonTrivialDummy { |
| 143 | NonTrivialDummy() noexcept {} | 147 | NonTrivialDummy() noexcept {} |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4ffd122fc..188ceeed7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -899,7 +899,8 @@ bool Device::ShouldBoostClocks() const { | |||
| 899 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || | 899 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || |
| 900 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; | 900 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; |
| 901 | 901 | ||
| 902 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; | 902 | const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) || |
| 903 | (vendor_id == 0x1002 && device_id == 0x1435); | ||
| 903 | 904 | ||
| 904 | const bool is_debugging = this->HasDebuggingToolAttached(); | 905 | const bool is_debugging = this->HasDebuggingToolAttached(); |
| 905 | 906 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 0487cd3b6..a0c70797f 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -1101,6 +1101,10 @@ public: | |||
| 1101 | return &handle; | 1101 | return &handle; |
| 1102 | } | 1102 | } |
| 1103 | 1103 | ||
| 1104 | VkCommandBuffer operator*() const noexcept { | ||
| 1105 | return handle; | ||
| 1106 | } | ||
| 1107 | |||
| 1104 | void Begin(const VkCommandBufferBeginInfo& begin_info) const { | 1108 | void Begin(const VkCommandBufferBeginInfo& begin_info) const { |
| 1105 | Check(dld->vkBeginCommandBuffer(handle, &begin_info)); | 1109 | Check(dld->vkBeginCommandBuffer(handle, &begin_info)); |
| 1106 | } | 1110 | } |