summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Ameer J2023-11-26 21:08:53 -0500
committerGravatar GitHub2023-11-26 21:08:53 -0500
commit1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a (patch)
treec219aacab776c0a1e3956614b60a01fa2f6164cb /src/video_core
parentshader_recompiler: Align SSBO offsets in GlobalMemory functions (diff)
parentMerge pull request #11535 from GPUCode/upload_cmdbuf (diff)
downloadyuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.gz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.xz
yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.zip
Merge branch 'master' into ssbo-align
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h102
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/buffer_cache/usage_tracker.h79
-rw-r--r--src/video_core/engines/fermi_2d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/fence_manager.h5
-rw-r--r--src/video_core/host1x/codecs/codec.cpp329
-rw-r--r--src/video_core/host1x/codecs/codec.h39
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/host1x/codecs/h264.h1
-rw-r--r--src/video_core/host1x/ffmpeg/ffmpeg.cpp419
-rw-r--r--src/video_core/host1x/ffmpeg/ffmpeg.h213
-rw-r--r--src/video_core/host1x/nvdec.cpp2
-rw-r--r--src/video_core/host1x/nvdec.h2
-rw-r--r--src/video_core/host1x/vic.cpp62
-rw-r--r--src/video_core/host1x/vic.h4
-rw-r--r--src/video_core/query_cache/query_cache.h2
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h17
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h1
-rw-r--r--src/video_core/texture_cache/slot_vector.h4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp9
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h11
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h4
46 files changed, 1168 insertions, 596 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cf9266d54..c22c7631c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,7 +4,7 @@
4add_subdirectory(host_shaders) 4add_subdirectory(host_shaders)
5 5
6if(LIBVA_FOUND) 6if(LIBVA_FOUND)
7 set_source_files_properties(host1x/codecs/codec.cpp 7 set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp
8 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) 8 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
9 list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) 9 list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
10endif() 10endif()
@@ -15,6 +15,7 @@ add_library(video_core STATIC
15 buffer_cache/buffer_cache.cpp 15 buffer_cache/buffer_cache.cpp
16 buffer_cache/buffer_cache.h 16 buffer_cache/buffer_cache.h
17 buffer_cache/memory_tracker_base.h 17 buffer_cache/memory_tracker_base.h
18 buffer_cache/usage_tracker.h
18 buffer_cache/word_manager.h 19 buffer_cache/word_manager.h
19 cache_types.h 20 cache_types.h
20 cdma_pusher.cpp 21 cdma_pusher.cpp
@@ -66,6 +67,8 @@ add_library(video_core STATIC
66 host1x/codecs/vp9.cpp 67 host1x/codecs/vp9.cpp
67 host1x/codecs/vp9.h 68 host1x/codecs/vp9.h
68 host1x/codecs/vp9_types.h 69 host1x/codecs/vp9_types.h
70 host1x/ffmpeg/ffmpeg.cpp
71 host1x/ffmpeg/ffmpeg.h
69 host1x/control.cpp 72 host1x/control.cpp
70 host1x/control.h 73 host1x/control.h
71 host1x/host1x.cpp 74 host1x/host1x.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2648970b6..6d1fc3887 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() {
67 if (!channel_state) { 67 if (!channel_state) {
68 return; 68 return;
69 } 69 }
70 runtime.TickFrame(slot_buffers);
70 71
71 // Calculate hits and shots and move hit bits to the right 72 // Calculate hits and shots and move hit bits to the right
72 const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), 73 const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
@@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
230 for (const IntervalType& add_interval : tmp_intervals) { 231 for (const IntervalType& add_interval : tmp_intervals) {
231 common_ranges.add(add_interval); 232 common_ranges.add(add_interval);
232 } 233 }
233 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 234 const auto& copy = copies[0];
235 src_buffer.MarkUsage(copy.src_offset, copy.size);
236 dest_buffer.MarkUsage(copy.dst_offset, copy.size);
237 runtime.CopyBuffer(dest_buffer, src_buffer, copies, true);
234 if (has_new_downloads) { 238 if (has_new_downloads) {
235 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
236 } 240 }
@@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
258 common_ranges.subtract(subtract_interval); 262 common_ranges.subtract(subtract_interval);
259 263
260 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); 264 const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
261 auto& dest_buffer = slot_buffers[buffer]; 265 Buffer& dest_buffer = slot_buffers[buffer];
262 const u32 offset = dest_buffer.Offset(*cpu_dst_address); 266 const u32 offset = dest_buffer.Offset(*cpu_dst_address);
263 runtime.ClearBuffer(dest_buffer, offset, size, value); 267 runtime.ClearBuffer(dest_buffer, offset, size, value);
268 dest_buffer.MarkUsage(offset, size);
264 return true; 269 return true;
265} 270}
266 271
@@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
603 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 608 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
604 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 609 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
605 async_downloads += std::make_pair(base_interval, 1); 610 async_downloads += std::make_pair(base_interval, 1);
611 buffer.MarkUsage(copy.src_offset, copy.size);
606 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 612 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
607 normalized_copies.push_back(second_copy); 613 normalized_copies.push_back(second_copy);
608 } 614 }
@@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
621 // Have in mind the staging buffer offset for the copy 627 // Have in mind the staging buffer offset for the copy
622 copy.dst_offset += download_staging.offset; 628 copy.dst_offset += download_staging.offset;
623 const std::array copies{copy}; 629 const std::array copies{copy};
624 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, 630 Buffer& buffer = slot_buffers[buffer_id];
625 false); 631 buffer.MarkUsage(copy.src_offset, copy.size);
632 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
626 } 633 }
627 runtime.PostCopyBarrier(); 634 runtime.PostCopyBarrier();
628 runtime.Finish(); 635 runtime.Finish();
@@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
742 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; 749 {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
743 std::memcpy(upload_staging.mapped_span.data(), 750 std::memcpy(upload_staging.mapped_span.data(),
744 draw_state.inline_index_draw_indexes.data(), size); 751 draw_state.inline_index_draw_indexes.data(), size);
745 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 752 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
746 } else { 753 } else {
747 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 754 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
748 } 755 }
@@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
754 offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); 761 offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes();
755 runtime.BindIndexBuffer(buffer, new_offset, size); 762 runtime.BindIndexBuffer(buffer, new_offset, size);
756 } else { 763 } else {
764 buffer.MarkUsage(offset, size);
757 runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, 765 runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format,
758 draw_state.index_buffer.first, draw_state.index_buffer.count, 766 draw_state.index_buffer.first, draw_state.index_buffer.count,
759 buffer, offset, size); 767 buffer, offset, size);
@@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
790 798
791 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 799 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
792 const u32 offset = buffer.Offset(binding.cpu_addr); 800 const u32 offset = buffer.Offset(binding.cpu_addr);
801 buffer.MarkUsage(offset, binding.size);
793 802
794 host_bindings.buffers.push_back(&buffer); 803 host_bindings.buffers.push_back(&buffer);
795 host_bindings.offsets.push_back(offset); 804 host_bindings.offsets.push_back(offset);
@@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
895 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 904 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
896 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; 905 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
897 } 906 }
907 buffer.MarkUsage(offset, size);
898 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 908 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
899 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 909 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
900 } else { 910 } else {
@@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
913 SynchronizeBuffer(buffer, binding.cpu_addr, size); 923 SynchronizeBuffer(buffer, binding.cpu_addr, size);
914 924
915 const u32 offset = buffer.Offset(binding.cpu_addr); 925 const u32 offset = buffer.Offset(binding.cpu_addr);
926 buffer.MarkUsage(offset, size);
916 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; 927 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
917 928
918 if (is_written) { 929 if (is_written) {
@@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
943 954
944 const u32 offset = buffer.Offset(binding.cpu_addr); 955 const u32 offset = buffer.Offset(binding.cpu_addr);
945 const PixelFormat format = binding.format; 956 const PixelFormat format = binding.format;
957 buffer.MarkUsage(offset, size);
946 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 958 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
947 if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { 959 if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
948 runtime.BindImageBuffer(buffer, offset, size, format); 960 runtime.BindImageBuffer(buffer, offset, size, format);
@@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
975 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 987 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
976 988
977 const u32 offset = buffer.Offset(binding.cpu_addr); 989 const u32 offset = buffer.Offset(binding.cpu_addr);
990 buffer.MarkUsage(offset, size);
978 host_bindings.buffers.push_back(&buffer); 991 host_bindings.buffers.push_back(&buffer);
979 host_bindings.offsets.push_back(offset); 992 host_bindings.offsets.push_back(offset);
980 host_bindings.sizes.push_back(binding.size); 993 host_bindings.sizes.push_back(size);
981 } 994 }
982 if (host_bindings.buffers.size() > 0) { 995 if (host_bindings.buffers.size() > 0) {
983 runtime.BindTransformFeedbackBuffers(host_bindings); 996 runtime.BindTransformFeedbackBuffers(host_bindings);
@@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1001 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1014 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1002 1015
1003 const u32 offset = buffer.Offset(binding.cpu_addr); 1016 const u32 offset = buffer.Offset(binding.cpu_addr);
1017 buffer.MarkUsage(offset, size);
1004 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1018 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
1005 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); 1019 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
1006 ++binding_index; 1020 ++binding_index;
@@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1021 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1035 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1022 1036
1023 const u32 offset = buffer.Offset(binding.cpu_addr); 1037 const u32 offset = buffer.Offset(binding.cpu_addr);
1038 buffer.MarkUsage(offset, size);
1024 const bool is_written = 1039 const bool is_written =
1025 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; 1040 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
1026 1041
@@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1053 1068
1054 const u32 offset = buffer.Offset(binding.cpu_addr); 1069 const u32 offset = buffer.Offset(binding.cpu_addr);
1055 const PixelFormat format = binding.format; 1070 const PixelFormat format = binding.format;
1071 buffer.MarkUsage(offset, size);
1056 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 1072 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1057 if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { 1073 if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
1058 runtime.BindImageBuffer(buffer, offset, size, format); 1074 runtime.BindImageBuffer(buffer, offset, size, format);
@@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1172 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1188 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1173 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1189 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1174 } 1190 }
1191 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
1175 channel_state->vertex_buffers[index] = Binding{ 1192 channel_state->vertex_buffers[index] = Binding{
1176 .cpu_addr = *cpu_addr, 1193 .cpu_addr = *cpu_addr,
1177 .size = size, 1194 .size = size,
1178 .buffer_id = FindBuffer(*cpu_addr, size), 1195 .buffer_id = buffer_id,
1179 }; 1196 };
1180} 1197}
1181 1198
@@ -1192,11 +1209,6 @@ void BufferCache<P>::UpdateDrawIndirect() {
1192 .size = static_cast<u32>(size), 1209 .size = static_cast<u32>(size),
1193 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), 1210 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
1194 }; 1211 };
1195 VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64);
1196 VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64);
1197 IntervalType interval{cpu_addr_start, cpu_addr_end};
1198 ClearDownload(interval);
1199 common_ranges.subtract(interval);
1200 }; 1212 };
1201 if (current_draw_indirect->include_count) { 1213 if (current_draw_indirect->include_count) {
1202 update(current_draw_indirect->count_start_address, sizeof(u32), 1214 update(current_draw_indirect->count_start_address, sizeof(u32),
@@ -1406,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1406 .dst_offset = dst_base_offset, 1418 .dst_offset = dst_base_offset,
1407 .size = overlap.SizeBytes(), 1419 .size = overlap.SizeBytes(),
1408 }); 1420 });
1409 runtime.CopyBuffer(new_buffer, overlap, copies); 1421 new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
1422 runtime.CopyBuffer(new_buffer, overlap, copies, true);
1410 DeleteBuffer(overlap_id, true); 1423 DeleteBuffer(overlap_id, true);
1411} 1424}
1412 1425
@@ -1419,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1419 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1432 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1420 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1433 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1421 auto& new_buffer = slot_buffers[new_buffer_id]; 1434 auto& new_buffer = slot_buffers[new_buffer_id];
1422 runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); 1435 const size_t size_bytes = new_buffer.SizeBytes();
1436 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
1437 new_buffer.MarkUsage(0, size_bytes);
1423 for (const BufferId overlap_id : overlap.ids) { 1438 for (const BufferId overlap_id : overlap.ids) {
1424 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1439 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1425 } 1440 }
@@ -1472,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1472 1487
1473template <class P> 1488template <class P>
1474bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1489bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1475 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1476}
1477
1478template <class P>
1479bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
1480 boost::container::small_vector<BufferCopy, 4> copies; 1490 boost::container::small_vector<BufferCopy, 4> copies;
1481 u64 total_size_bytes = 0; 1491 u64 total_size_bytes = 0;
1482 u64 largest_copy = 0; 1492 u64 largest_copy = 0;
@@ -1499,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1499} 1509}
1500 1510
1501template <class P> 1511template <class P>
1502bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1503 boost::container::small_vector<BufferCopy, 4> copies;
1504 u64 total_size_bytes = 0;
1505 u64 largest_copy = 0;
1506 IntervalSet found_sets{};
1507 auto make_copies = [&] {
1508 for (auto& interval : found_sets) {
1509 const std::size_t sub_size = interval.upper() - interval.lower();
1510 const VAddr cpu_addr_ = interval.lower();
1511 copies.push_back(BufferCopy{
1512 .src_offset = total_size_bytes,
1513 .dst_offset = cpu_addr_ - buffer.CpuAddr(),
1514 .size = sub_size,
1515 });
1516 total_size_bytes += sub_size;
1517 largest_copy = std::max<u64>(largest_copy, sub_size);
1518 }
1519 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1520 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1521 };
1522 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1523 const VAddr base_adr = cpu_addr_out;
1524 const VAddr end_adr = base_adr + range_size;
1525 const IntervalType add_interval{base_adr, end_adr};
1526 found_sets.add(add_interval);
1527 });
1528 if (found_sets.empty()) {
1529 return true;
1530 }
1531 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1532 auto it = common_ranges.lower_bound(search_interval);
1533 auto it_end = common_ranges.upper_bound(search_interval);
1534 if (it == common_ranges.end()) {
1535 make_copies();
1536 return false;
1537 }
1538 while (it != it_end) {
1539 found_sets.subtract(*it);
1540 it++;
1541 }
1542 make_copies();
1543 return false;
1544}
1545
1546template <class P>
1547void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1512void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1548 std::span<BufferCopy> copies) { 1513 std::span<BufferCopy> copies) {
1549 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { 1514 if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
@@ -1591,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1591 // Apply the staging offset 1556 // Apply the staging offset
1592 copy.src_offset += upload_staging.offset; 1557 copy.src_offset += upload_staging.offset;
1593 } 1558 }
1594 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1559 const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
1560 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
1595 } 1561 }
1596} 1562}
1597 1563
@@ -1633,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_
1633 }}; 1599 }};
1634 u8* const src_pointer = upload_staging.mapped_span.data(); 1600 u8* const src_pointer = upload_staging.mapped_span.data();
1635 std::memcpy(src_pointer, inlined_buffer.data(), copy_size); 1601 std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
1636 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1602 const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
1603 runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
1637 } else { 1604 } else {
1638 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); 1605 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
1639 } 1606 }
@@ -1686,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1686 for (BufferCopy& copy : copies) { 1653 for (BufferCopy& copy : copies) {
1687 // Modify copies to have the staging offset in mind 1654 // Modify copies to have the staging offset in mind
1688 copy.dst_offset += download_staging.offset; 1655 copy.dst_offset += download_staging.offset;
1656 buffer.MarkUsage(copy.src_offset, copy.size);
1689 } 1657 }
1690 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); 1658 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
1691 runtime.Finish(); 1659 runtime.Finish();
1692 for (const BufferCopy& copy : copies) { 1660 for (const BufferCopy& copy : copies) {
1693 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1661 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index eed267361..d6d696d8c 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -529,10 +529,6 @@ private:
529 529
530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
531 531
532 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
533
534 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
535
536 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 532 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
537 std::span<BufferCopy> copies); 533 std::span<BufferCopy> copies);
538 534
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h
new file mode 100644
index 000000000..ab05fe415
--- /dev/null
+++ b/src/video_core/buffer_cache/usage_tracker.h
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/alignment.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11class UsageTracker {
12 static constexpr size_t BYTES_PER_BIT_SHIFT = 6;
13 static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT;
14 static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT;
15
16public:
17 explicit UsageTracker(size_t size) {
18 const size_t num_pages = (size >> PAGE_SHIFT) + 1;
19 pages.resize(num_pages, 0ULL);
20 }
21
22 void Reset() noexcept {
23 std::ranges::fill(pages, 0ULL);
24 }
25
26 void Track(u64 offset, u64 size) noexcept {
27 const size_t page = offset >> PAGE_SHIFT;
28 const size_t page_end = (offset + size) >> PAGE_SHIFT;
29 TrackPage(page, offset, size);
30 if (page == page_end) {
31 return;
32 }
33 for (size_t i = page + 1; i < page_end; i++) {
34 pages[i] = ~u64{0};
35 }
36 const size_t offset_end = offset + size;
37 const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
38 TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
39 }
40
41 [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept {
42 const size_t page = offset >> PAGE_SHIFT;
43 const size_t page_end = (offset + size) >> PAGE_SHIFT;
44 if (IsPageUsed(page, offset, size)) {
45 return true;
46 }
47 for (size_t i = page + 1; i < page_end; i++) {
48 if (pages[i] != 0) {
49 return true;
50 }
51 }
52 const size_t offset_end = offset + size;
53 const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES);
54 return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned);
55 }
56
57private:
58 void TrackPage(u64 page, u64 offset, u64 size) noexcept {
59 const size_t offset_in_page = offset % PAGE_BYTES;
60 const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
61 const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
62 const size_t mask = ~u64{0} >> (64 - num_bits);
63 pages[page] |= (~u64{0} & mask) << first_bit;
64 }
65
66 bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept {
67 const size_t offset_in_page = offset % PAGE_BYTES;
68 const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT;
69 const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT;
70 const size_t mask = ~u64{0} >> (64 - num_bits);
71 const size_t mask2 = (~u64{0} & mask) << first_bit;
72 return (pages[page] & mask2) != 0;
73 }
74
75private:
76 std::vector<u64> pages;
77};
78
79} // namespace VideoCommon
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 02e161270..91f10aec2 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -72,7 +72,7 @@ void Fermi2D::Blit() {
72 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); 72 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
73 73
74 const auto& args = regs.pixels_from_memory; 74 const auto& args = regs.pixels_from_memory;
75 constexpr s64 null_derivate = 1ULL << 32; 75 constexpr s64 null_derivative = 1ULL << 32;
76 Surface src = regs.src; 76 Surface src = regs.src;
77 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); 77 const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
78 const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && 78 const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 &&
@@ -89,7 +89,7 @@ void Fermi2D::Blit() {
89 .operation = regs.operation, 89 .operation = regs.operation,
90 .filter = args.sample_mode.filter, 90 .filter = args.sample_mode.filter,
91 .must_accelerate = 91 .must_accelerate =
92 args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, 92 args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu,
93 .dst_x0 = args.dst_x0, 93 .dst_x0 = args.dst_x0,
94 .dst_y0 = args.dst_y0, 94 .dst_y0 = args.dst_y0,
95 .dst_x1 = args.dst_x0 + args.dst_width, 95 .dst_x1 = args.dst_x0 + args.dst_width,
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 32d767d85..592c28ba3 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -268,7 +268,7 @@ size_t Maxwell3D::EstimateIndexBufferSize() {
268 std::numeric_limits<u32>::max()}; 268 std::numeric_limits<u32>::max()};
269 const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); 269 const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
270 const size_t log2_byte_size = Common::Log2Ceil64(byte_size); 270 const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
271 const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; 271 const size_t cap{GetMaxCurrentVertices() * 4 * byte_size};
272 const size_t lower_cap = 272 const size_t lower_cap =
273 std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); 273 std::min<size_t>(static_cast<size_t>(end_address - start_address), cap);
274 return std::min<size_t>( 274 return std::min<size_t>(
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c0e6471fe..805a89900 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -86,10 +86,7 @@ public:
86 uncommitted_operations.emplace_back(std::move(func)); 86 uncommitted_operations.emplace_back(std::move(func));
87 } 87 }
88 pending_operations.emplace_back(std::move(uncommitted_operations)); 88 pending_operations.emplace_back(std::move(uncommitted_operations));
89 { 89 QueueFence(new_fence);
90 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
91 QueueFence(new_fence);
92 }
93 if (!delay_fence) { 90 if (!delay_fence) {
94 func(); 91 func();
95 } 92 }
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index dbcf508e5..1030db681 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -1,11 +1,7 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <algorithm>
5#include <fstream>
6#include <vector>
7#include "common/assert.h" 4#include "common/assert.h"
8#include "common/scope_exit.h"
9#include "common/settings.h" 5#include "common/settings.h"
10#include "video_core/host1x/codecs/codec.h" 6#include "video_core/host1x/codecs/codec.h"
11#include "video_core/host1x/codecs/h264.h" 7#include "video_core/host1x/codecs/h264.h"
@@ -14,242 +10,17 @@
14#include "video_core/host1x/host1x.h" 10#include "video_core/host1x/host1x.h"
15#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
16 12
17extern "C" {
18#include <libavfilter/buffersink.h>
19#include <libavfilter/buffersrc.h>
20#include <libavutil/opt.h>
21#ifdef LIBVA_FOUND
22// for querying VAAPI driver information
23#include <libavutil/hwcontext_vaapi.h>
24#endif
25}
26
27namespace Tegra { 13namespace Tegra {
28namespace {
29constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
30constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
31constexpr std::array PREFERRED_GPU_DECODERS = {
32 AV_HWDEVICE_TYPE_CUDA,
33#ifdef _WIN32
34 AV_HWDEVICE_TYPE_D3D11VA,
35 AV_HWDEVICE_TYPE_DXVA2,
36#elif defined(__unix__)
37 AV_HWDEVICE_TYPE_VAAPI,
38 AV_HWDEVICE_TYPE_VDPAU,
39#endif
40 // last resort for Linux Flatpak (w/ NVIDIA)
41 AV_HWDEVICE_TYPE_VULKAN,
42};
43
44void AVPacketDeleter(AVPacket* ptr) {
45 av_packet_free(&ptr);
46}
47
48using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
49
50AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
51 for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
52 if (*p == av_codec_ctx->pix_fmt) {
53 return av_codec_ctx->pix_fmt;
54 }
55 }
56 LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
57 av_buffer_unref(&av_codec_ctx->hw_device_ctx);
58 av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
59 return PREFERRED_CPU_FMT;
60}
61
62// List all the currently available hwcontext in ffmpeg
63std::vector<AVHWDeviceType> ListSupportedContexts() {
64 std::vector<AVHWDeviceType> contexts{};
65 AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
66 do {
67 current_device_type = av_hwdevice_iterate_types(current_device_type);
68 contexts.push_back(current_device_type);
69 } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
70 return contexts;
71}
72
73} // namespace
74
75void AVFrameDeleter(AVFrame* ptr) {
76 av_frame_free(&ptr);
77}
78 14
79Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) 15Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
80 : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)), 16 : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
81 vp8_decoder(std::make_unique<Decoder::VP8>(host1x)), 17 vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
82 vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {} 18 vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
83 19
84Codec::~Codec() { 20Codec::~Codec() = default;
85 if (!initialized) {
86 return;
87 }
88 // Free libav memory
89 avcodec_free_context(&av_codec_ctx);
90 av_buffer_unref(&av_gpu_decoder);
91
92 if (filters_initialized) {
93 avfilter_graph_free(&av_filter_graph);
94 }
95}
96
97bool Codec::CreateGpuAvDevice() {
98 static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
99 static const auto supported_contexts = ListSupportedContexts();
100 for (const auto& type : PREFERRED_GPU_DECODERS) {
101 if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
102 [&type](const auto& context) { return context == type; })) {
103 LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
104 continue;
105 }
106 // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
107 av_buffer_unref(&av_gpu_decoder);
108 const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
109 if (hwdevice_res < 0) {
110 LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
111 av_hwdevice_get_type_name(type), hwdevice_res);
112 continue;
113 }
114#ifdef LIBVA_FOUND
115 if (type == AV_HWDEVICE_TYPE_VAAPI) {
116 // we need to determine if this is an impersonated VAAPI driver
117 AVHWDeviceContext* hwctx =
118 static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
119 AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
120 const char* vendor_name = vaQueryVendorString(vactx->display);
121 if (strstr(vendor_name, "VDPAU backend")) {
122 // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
123 LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
124 continue;
125 } else {
126 // according to some user testing, certain vaapi driver (Intel?) could be buggy
127 // so let's log the driver name which may help the developers/supporters
128 LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
129 }
130 }
131#endif
132 for (int i = 0;; i++) {
133 const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
134 if (!config) {
135 LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
136 av_codec->name, av_hwdevice_get_type_name(type));
137 break;
138 }
139 if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
140 LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
141 av_codec_ctx->pix_fmt = config->pix_fmt;
142 return true;
143 }
144 }
145 }
146 return false;
147}
148
149void Codec::InitializeAvCodecContext() {
150 av_codec_ctx = avcodec_alloc_context3(av_codec);
151 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
152 av_codec_ctx->thread_count = 0;
153 av_codec_ctx->thread_type &= ~FF_THREAD_FRAME;
154}
155
156void Codec::InitializeGpuDecoder() {
157 if (!CreateGpuAvDevice()) {
158 av_buffer_unref(&av_gpu_decoder);
159 return;
160 }
161 auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
162 ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
163 av_codec_ctx->hw_device_ctx = hw_device_ctx;
164 av_codec_ctx->get_format = GetGpuFormat;
165}
166
167void Codec::InitializeAvFilters(AVFrame* frame) {
168 const AVFilter* buffer_src = avfilter_get_by_name("buffer");
169 const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
170 AVFilterInOut* inputs = avfilter_inout_alloc();
171 AVFilterInOut* outputs = avfilter_inout_alloc();
172 SCOPE_EXIT({
173 avfilter_inout_free(&inputs);
174 avfilter_inout_free(&outputs);
175 });
176
177 // Don't know how to get the accurate time_base but it doesn't matter for yadif filter
178 // so just use 1/1 to make buffer filter happy
179 std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width,
180 frame->height, frame->format);
181
182 av_filter_graph = avfilter_graph_alloc();
183 int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(),
184 nullptr, av_filter_graph);
185 if (ret < 0) {
186 LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret);
187 return;
188 }
189
190 ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr,
191 av_filter_graph);
192 if (ret < 0) {
193 LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret);
194 return;
195 }
196
197 inputs->name = av_strdup("out");
198 inputs->filter_ctx = av_filter_sink_ctx;
199 inputs->pad_idx = 0;
200 inputs->next = nullptr;
201
202 outputs->name = av_strdup("in");
203 outputs->filter_ctx = av_filter_src_ctx;
204 outputs->pad_idx = 0;
205 outputs->next = nullptr;
206
207 const char* description = "yadif=1:-1:0";
208 ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr);
209 if (ret < 0) {
210 LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret);
211 return;
212 }
213
214 ret = avfilter_graph_config(av_filter_graph, nullptr);
215 if (ret < 0) {
216 LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret);
217 return;
218 }
219
220 filters_initialized = true;
221}
222 21
223void Codec::Initialize() { 22void Codec::Initialize() {
224 const AVCodecID codec = [&] { 23 initialized = decode_api.Initialize(current_codec);
225 switch (current_codec) {
226 case Host1x::NvdecCommon::VideoCodec::H264:
227 return AV_CODEC_ID_H264;
228 case Host1x::NvdecCommon::VideoCodec::VP8:
229 return AV_CODEC_ID_VP8;
230 case Host1x::NvdecCommon::VideoCodec::VP9:
231 return AV_CODEC_ID_VP9;
232 default:
233 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
234 return AV_CODEC_ID_NONE;
235 }
236 }();
237 av_codec = avcodec_find_decoder(codec);
238
239 InitializeAvCodecContext();
240 if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
241 InitializeGpuDecoder();
242 }
243 if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
244 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
245 avcodec_free_context(&av_codec_ctx);
246 av_buffer_unref(&av_gpu_decoder);
247 return;
248 }
249 if (!av_codec_ctx->hw_device_ctx) {
250 LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
251 }
252 initialized = true;
253} 24}
254 25
255void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { 26void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
@@ -264,14 +35,18 @@ void Codec::Decode() {
264 if (is_first_frame) { 35 if (is_first_frame) {
265 Initialize(); 36 Initialize();
266 } 37 }
38
267 if (!initialized) { 39 if (!initialized) {
268 return; 40 return;
269 } 41 }
42
43 // Assemble bitstream.
270 bool vp9_hidden_frame = false; 44 bool vp9_hidden_frame = false;
271 const auto& frame_data = [&]() { 45 size_t configuration_size = 0;
46 const auto packet_data = [&]() {
272 switch (current_codec) { 47 switch (current_codec) {
273 case Tegra::Host1x::NvdecCommon::VideoCodec::H264: 48 case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
274 return h264_decoder->ComposeFrame(state, is_first_frame); 49 return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame);
275 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: 50 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
276 return vp8_decoder->ComposeFrame(state); 51 return vp8_decoder->ComposeFrame(state);
277 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: 52 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
@@ -283,89 +58,35 @@ void Codec::Decode() {
283 return std::span<const u8>{}; 58 return std::span<const u8>{};
284 } 59 }
285 }(); 60 }();
286 AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; 61
287 if (!packet) { 62 // Send assembled bitstream to decoder.
288 LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); 63 if (!decode_api.SendPacket(packet_data, configuration_size)) {
289 return;
290 }
291 packet->data = const_cast<u8*>(frame_data.data());
292 packet->size = static_cast<s32>(frame_data.size());
293 if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
294 LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
295 return; 64 return;
296 } 65 }
297 // Only receive/store visible frames 66
67 // Only receive/store visible frames.
298 if (vp9_hidden_frame) { 68 if (vp9_hidden_frame) {
299 return; 69 return;
300 } 70 }
301 AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
302 AVFramePtr final_frame{nullptr, AVFrameDeleter};
303 ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
304 if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
305 LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
306 return;
307 }
308 if (initial_frame->width == 0 || initial_frame->height == 0) {
309 LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
310 return;
311 }
312 bool is_interlaced = initial_frame->interlaced_frame != 0;
313 if (av_codec_ctx->hw_device_ctx) {
314 final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
315 ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
316 // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
317 // because Intel drivers crash unless using AV_PIX_FMT_NV12
318 final_frame->format = PREFERRED_GPU_FMT;
319 const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
320 ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
321 } else {
322 final_frame = std::move(initial_frame);
323 }
324 if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
325 UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
326 return;
327 }
328 if (!is_interlaced) {
329 av_frames.push(std::move(final_frame));
330 } else {
331 if (!filters_initialized) {
332 InitializeAvFilters(final_frame.get());
333 }
334 if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(),
335 AV_BUFFERSRC_FLAG_KEEP_REF);
336 ret) {
337 LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret);
338 return;
339 }
340 while (true) {
341 auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
342 71
343 int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); 72 // Receive output frames from decoder.
73 decode_api.ReceiveFrames(frames);
344 74
345 if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) 75 while (frames.size() > 10) {
346 break; 76 LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame");
347 if (ret < 0) { 77 frames.pop();
348 LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret);
349 return;
350 }
351
352 av_frames.push(std::move(filter_frame));
353 }
354 }
355 while (av_frames.size() > 10) {
356 LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
357 av_frames.pop();
358 } 78 }
359} 79}
360 80
361AVFramePtr Codec::GetCurrentFrame() { 81std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() {
362 // Sometimes VIC will request more frames than have been decoded. 82 // Sometimes VIC will request more frames than have been decoded.
363 // in this case, return a nullptr and don't overwrite previous frame data 83 // in this case, return a blank frame and don't overwrite previous data.
364 if (av_frames.empty()) { 84 if (frames.empty()) {
365 return AVFramePtr{nullptr, AVFrameDeleter}; 85 return {};
366 } 86 }
367 AVFramePtr frame = std::move(av_frames.front()); 87
368 av_frames.pop(); 88 auto frame = std::move(frames.front());
89 frames.pop();
369 return frame; 90 return frame;
370} 91}
371 92
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
index 06fe00a4b..f700ae129 100644
--- a/src/video_core/host1x/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -4,28 +4,15 @@
4#pragma once 4#pragma once
5 5
6#include <memory> 6#include <memory>
7#include <optional>
7#include <string_view> 8#include <string_view>
8#include <queue> 9#include <queue>
9#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/host1x/ffmpeg/ffmpeg.h"
10#include "video_core/host1x/nvdec_common.h" 12#include "video_core/host1x/nvdec_common.h"
11 13
12extern "C" {
13#if defined(__GNUC__) || defined(__clang__)
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#include <libavfilter/avfilter.h>
19#if defined(__GNUC__) || defined(__clang__)
20#pragma GCC diagnostic pop
21#endif
22}
23
24namespace Tegra { 14namespace Tegra {
25 15
26void AVFrameDeleter(AVFrame* ptr);
27using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
28
29namespace Decoder { 16namespace Decoder {
30class H264; 17class H264;
31class VP8; 18class VP8;
@@ -51,7 +38,7 @@ public:
51 void Decode(); 38 void Decode();
52 39
53 /// Returns next decoded frame 40 /// Returns next decoded frame
54 [[nodiscard]] AVFramePtr GetCurrentFrame(); 41 [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame();
55 42
56 /// Returns the value of current_codec 43 /// Returns the value of current_codec
57 [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; 44 [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
@@ -60,25 +47,9 @@ public:
60 [[nodiscard]] std::string_view GetCurrentCodecName() const; 47 [[nodiscard]] std::string_view GetCurrentCodecName() const;
61 48
62private: 49private:
63 void InitializeAvCodecContext();
64
65 void InitializeAvFilters(AVFrame* frame);
66
67 void InitializeGpuDecoder();
68
69 bool CreateGpuAvDevice();
70
71 bool initialized{}; 50 bool initialized{};
72 bool filters_initialized{};
73 Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; 51 Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
74 52 FFmpeg::DecodeApi decode_api;
75 const AVCodec* av_codec{nullptr};
76 AVCodecContext* av_codec_ctx{nullptr};
77 AVBufferRef* av_gpu_decoder{nullptr};
78
79 AVFilterContext* av_filter_src_ctx{nullptr};
80 AVFilterContext* av_filter_sink_ctx{nullptr};
81 AVFilterGraph* av_filter_graph{nullptr};
82 53
83 Host1x::Host1x& host1x; 54 Host1x::Host1x& host1x;
84 const Host1x::NvdecCommon::NvdecRegisters& state; 55 const Host1x::NvdecCommon::NvdecRegisters& state;
@@ -86,7 +57,7 @@ private:
86 std::unique_ptr<Decoder::VP8> vp8_decoder; 57 std::unique_ptr<Decoder::VP8> vp8_decoder;
87 std::unique_ptr<Decoder::VP9> vp9_decoder; 58 std::unique_ptr<Decoder::VP9> vp9_decoder;
88 59
89 std::queue<AVFramePtr> av_frames{}; 60 std::queue<std::unique_ptr<FFmpeg::Frame>> frames{};
90}; 61};
91 62
92} // namespace Tegra 63} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index ece79b1e2..309a7f1d5 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -30,7 +30,7 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
30H264::~H264() = default; 30H264::~H264() = default;
31 31
32std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, 32std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
33 bool is_first_frame) { 33 size_t* out_configuration_size, bool is_first_frame) {
34 H264DecoderContext context; 34 H264DecoderContext context;
35 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, 35 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
36 sizeof(H264DecoderContext)); 36 sizeof(H264DecoderContext));
@@ -39,6 +39,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
39 if (!is_first_frame && frame_number != 0) { 39 if (!is_first_frame && frame_number != 0) {
40 frame.resize_destructive(context.stream_len); 40 frame.resize_destructive(context.stream_len);
41 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); 41 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
42 *out_configuration_size = 0;
42 return frame; 43 return frame;
43 } 44 }
44 45
@@ -157,6 +158,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
157 frame.resize(encoded_header.size() + context.stream_len); 158 frame.resize(encoded_header.size() + context.stream_len);
158 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); 159 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
159 160
161 *out_configuration_size = encoded_header.size();
160 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, 162 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
161 frame.data() + encoded_header.size(), context.stream_len); 163 frame.data() + encoded_header.size(), context.stream_len);
162 164
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index d6b556322..1deaf4632 100644
--- a/src/video_core/host1x/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -67,6 +67,7 @@ public:
67 67
68 /// Compose the H264 frame for FFmpeg decoding 68 /// Compose the H264 frame for FFmpeg decoding
69 [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, 69 [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
70 size_t* out_configuration_size,
70 bool is_first_frame = false); 71 bool is_first_frame = false);
71 72
72private: 73private:
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
new file mode 100644
index 000000000..dcd07e6d2
--- /dev/null
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
@@ -0,0 +1,419 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/assert.h"
5#include "common/logging/log.h"
6#include "common/scope_exit.h"
7#include "common/settings.h"
8#include "video_core/host1x/ffmpeg/ffmpeg.h"
9
10extern "C" {
11#ifdef LIBVA_FOUND
12// for querying VAAPI driver information
13#include <libavutil/hwcontext_vaapi.h>
14#endif
15}
16
17namespace FFmpeg {
18
19namespace {
20
21constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
22constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
23constexpr std::array PreferredGpuDecoders = {
24 AV_HWDEVICE_TYPE_CUDA,
25#ifdef _WIN32
26 AV_HWDEVICE_TYPE_D3D11VA,
27 AV_HWDEVICE_TYPE_DXVA2,
28#elif defined(__unix__)
29 AV_HWDEVICE_TYPE_VAAPI,
30 AV_HWDEVICE_TYPE_VDPAU,
31#endif
32 // last resort for Linux Flatpak (w/ NVIDIA)
33 AV_HWDEVICE_TYPE_VULKAN,
34};
35
36AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
37 for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
38 if (*p == codec_context->pix_fmt) {
39 return codec_context->pix_fmt;
40 }
41 }
42
43 LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
44 av_buffer_unref(&codec_context->hw_device_ctx);
45
46 codec_context->pix_fmt = PreferredCpuFormat;
47 return codec_context->pix_fmt;
48}
49
50std::string AVError(int errnum) {
51 char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
52 av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
53 return errbuf;
54}
55
56} // namespace
57
58Packet::Packet(std::span<const u8> data) {
59 m_packet = av_packet_alloc();
60 m_packet->data = const_cast<u8*>(data.data());
61 m_packet->size = static_cast<s32>(data.size());
62}
63
64Packet::~Packet() {
65 av_packet_free(&m_packet);
66}
67
68Frame::Frame() {
69 m_frame = av_frame_alloc();
70}
71
72Frame::~Frame() {
73 av_frame_free(&m_frame);
74}
75
76Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
77 const AVCodecID av_codec = [&] {
78 switch (codec) {
79 case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
80 return AV_CODEC_ID_H264;
81 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
82 return AV_CODEC_ID_VP8;
83 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
84 return AV_CODEC_ID_VP9;
85 default:
86 UNIMPLEMENTED_MSG("Unknown codec {}", codec);
87 return AV_CODEC_ID_NONE;
88 }
89 }();
90
91 m_codec = avcodec_find_decoder(av_codec);
92}
93
94bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
95 for (int i = 0;; i++) {
96 const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
97 if (!config) {
98 LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
99 av_hwdevice_get_type_name(type));
100 break;
101 }
102 if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
103 config->device_type == type) {
104 LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
105 *out_pix_fmt = config->pix_fmt;
106 return true;
107 }
108 }
109
110 return false;
111}
112
113std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
114 std::vector<AVHWDeviceType> types;
115 AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
116
117 while (true) {
118 current_device_type = av_hwdevice_iterate_types(current_device_type);
119 if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
120 return types;
121 }
122
123 types.push_back(current_device_type);
124 }
125}
126
127HardwareContext::~HardwareContext() {
128 av_buffer_unref(&m_gpu_decoder);
129}
130
131bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
132 const Decoder& decoder) {
133 const auto supported_types = GetSupportedDeviceTypes();
134 for (const auto type : PreferredGpuDecoders) {
135 AVPixelFormat hw_pix_fmt;
136
137 if (std::ranges::find(supported_types, type) == supported_types.end()) {
138 LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
139 continue;
140 }
141
142 if (!this->InitializeWithType(type)) {
143 continue;
144 }
145
146 if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
147 decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
148 return true;
149 }
150 }
151
152 return false;
153}
154
155bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
156 av_buffer_unref(&m_gpu_decoder);
157
158 if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
159 ret < 0) {
160 LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
161 AVError(ret));
162 return false;
163 }
164
165#ifdef LIBVA_FOUND
166 if (type == AV_HWDEVICE_TYPE_VAAPI) {
167 // We need to determine if this is an impersonated VAAPI driver.
168 auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
169 auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
170 const char* vendor_name = vaQueryVendorString(vactx->display);
171 if (strstr(vendor_name, "VDPAU backend")) {
172 // VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
173 LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
174 return false;
175 } else {
176 // According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
177 // Log the driver name just in case.
178 LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
179 }
180 }
181#endif
182
183 return true;
184}
185
186DecoderContext::DecoderContext(const Decoder& decoder) {
187 m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
188 av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
189 m_codec_context->thread_count = 0;
190 m_codec_context->thread_type &= ~FF_THREAD_FRAME;
191}
192
193DecoderContext::~DecoderContext() {
194 av_buffer_unref(&m_codec_context->hw_device_ctx);
195 avcodec_free_context(&m_codec_context);
196}
197
198void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
199 AVPixelFormat hw_pix_fmt) {
200 m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
201 m_codec_context->get_format = GetGpuFormat;
202 m_codec_context->pix_fmt = hw_pix_fmt;
203}
204
205bool DecoderContext::OpenContext(const Decoder& decoder) {
206 if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
207 LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
208 return false;
209 }
210
211 if (!m_codec_context->hw_device_ctx) {
212 LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
213 }
214
215 return true;
216}
217
218bool DecoderContext::SendPacket(const Packet& packet) {
219 if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
220 LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
221 return false;
222 }
223
224 return true;
225}
226
227std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
228 auto dst_frame = std::make_unique<Frame>();
229
230 const auto ReceiveImpl = [&](AVFrame* frame) {
231 if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
232 LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
233 return false;
234 }
235
236 *out_is_interlaced = frame->interlaced_frame != 0;
237 return true;
238 };
239
240 if (m_codec_context->hw_device_ctx) {
241 // If we have a hardware context, make a separate frame here to receive the
242 // hardware result before sending it to the output.
243 Frame intermediate_frame;
244
245 if (!ReceiveImpl(intermediate_frame.GetFrame())) {
246 return {};
247 }
248
249 dst_frame->SetFormat(PreferredGpuFormat);
250 if (const int ret =
251 av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
252 ret < 0) {
253 LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
254 return {};
255 }
256 } else {
257 // Otherwise, decode the frame as normal.
258 if (!ReceiveImpl(dst_frame->GetFrame())) {
259 return {};
260 }
261 }
262
263 return dst_frame;
264}
265
266DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
267 const AVFilter* buffer_src = avfilter_get_by_name("buffer");
268 const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
269 AVFilterInOut* inputs = avfilter_inout_alloc();
270 AVFilterInOut* outputs = avfilter_inout_alloc();
271 SCOPE_EXIT({
272 avfilter_inout_free(&inputs);
273 avfilter_inout_free(&outputs);
274 });
275
276 // Don't know how to get the accurate time_base but it doesn't matter for yadif filter
277 // so just use 1/1 to make buffer filter happy
278 std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
279 frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
280
281 m_filter_graph = avfilter_graph_alloc();
282 int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
283 nullptr, m_filter_graph);
284 if (ret < 0) {
285 LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
286 return;
287 }
288
289 ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
290 m_filter_graph);
291 if (ret < 0) {
292 LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
293 return;
294 }
295
296 inputs->name = av_strdup("out");
297 inputs->filter_ctx = m_sink_context;
298 inputs->pad_idx = 0;
299 inputs->next = nullptr;
300
301 outputs->name = av_strdup("in");
302 outputs->filter_ctx = m_source_context;
303 outputs->pad_idx = 0;
304 outputs->next = nullptr;
305
306 const char* description = "yadif=1:-1:0";
307 ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
308 if (ret < 0) {
309 LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
310 return;
311 }
312
313 ret = avfilter_graph_config(m_filter_graph, nullptr);
314 if (ret < 0) {
315 LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
316 return;
317 }
318
319 m_initialized = true;
320}
321
322bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
323 if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
324 AV_BUFFERSRC_FLAG_KEEP_REF);
325 ret < 0) {
326 LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
327 return false;
328 }
329
330 return true;
331}
332
333std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
334 auto dst_frame = std::make_unique<Frame>();
335 const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
336
337 if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
338 return {};
339 }
340
341 if (ret < 0) {
342 LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
343 return {};
344 }
345
346 return dst_frame;
347}
348
349DeinterlaceFilter::~DeinterlaceFilter() {
350 avfilter_graph_free(&m_filter_graph);
351}
352
353void DecodeApi::Reset() {
354 m_deinterlace_filter.reset();
355 m_hardware_context.reset();
356 m_decoder_context.reset();
357 m_decoder.reset();
358}
359
360bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
361 this->Reset();
362 m_decoder.emplace(codec);
363 m_decoder_context.emplace(*m_decoder);
364
365 // Enable GPU decoding if requested.
366 if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
367 m_hardware_context.emplace();
368 m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
369 }
370
371 // Open the decoder context.
372 if (!m_decoder_context->OpenContext(*m_decoder)) {
373 this->Reset();
374 return false;
375 }
376
377 return true;
378}
379
380bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
381 FFmpeg::Packet packet(packet_data);
382 return m_decoder_context->SendPacket(packet);
383}
384
385void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
386 // Receive raw frame from decoder.
387 bool is_interlaced;
388 auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
389 if (!frame) {
390 return;
391 }
392
393 if (!is_interlaced) {
394 // If the frame is not interlaced, we can pend it now.
395 frame_queue.push(std::move(frame));
396 } else {
397 // Create the deinterlacer if needed.
398 if (!m_deinterlace_filter) {
399 m_deinterlace_filter.emplace(*frame);
400 }
401
402 // Add the frame we just received.
403 if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
404 return;
405 }
406
407 // Pend output fields.
408 while (true) {
409 auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
410 if (!filter_frame) {
411 break;
412 }
413
414 frame_queue.push(std::move(filter_frame));
415 }
416 }
417}
418
419} // namespace FFmpeg
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h
new file mode 100644
index 000000000..1de0bbd83
--- /dev/null
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.h
@@ -0,0 +1,213 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <optional>
8#include <span>
9#include <vector>
10#include <queue>
11
12#include "common/common_funcs.h"
13#include "common/common_types.h"
14#include "video_core/host1x/nvdec_common.h"
15
16extern "C" {
17#if defined(__GNUC__) || defined(__clang__)
18#pragma GCC diagnostic push
19#pragma GCC diagnostic ignored "-Wconversion"
20#endif
21
22#include <libavcodec/avcodec.h>
23#include <libavfilter/avfilter.h>
24#include <libavfilter/buffersink.h>
25#include <libavfilter/buffersrc.h>
26#include <libavutil/avutil.h>
27#include <libavutil/opt.h>
28
29#if defined(__GNUC__) || defined(__clang__)
30#pragma GCC diagnostic pop
31#endif
32}
33
34namespace FFmpeg {
35
36class Packet;
37class Frame;
38class Decoder;
39class HardwareContext;
40class DecoderContext;
41class DeinterlaceFilter;
42
43// Wraps an AVPacket, a container for compressed bitstream data.
44class Packet {
45public:
46 YUZU_NON_COPYABLE(Packet);
47 YUZU_NON_MOVEABLE(Packet);
48
49 explicit Packet(std::span<const u8> data);
50 ~Packet();
51
52 AVPacket* GetPacket() const {
53 return m_packet;
54 }
55
56private:
57 AVPacket* m_packet{};
58};
59
60// Wraps an AVFrame, a container for audio and video stream data.
61class Frame {
62public:
63 YUZU_NON_COPYABLE(Frame);
64 YUZU_NON_MOVEABLE(Frame);
65
66 explicit Frame();
67 ~Frame();
68
69 int GetWidth() const {
70 return m_frame->width;
71 }
72
73 int GetHeight() const {
74 return m_frame->height;
75 }
76
77 AVPixelFormat GetPixelFormat() const {
78 return static_cast<AVPixelFormat>(m_frame->format);
79 }
80
81 int GetStride(int plane) const {
82 return m_frame->linesize[plane];
83 }
84
85 int* GetStrides() const {
86 return m_frame->linesize;
87 }
88
89 u8* GetData(int plane) const {
90 return m_frame->data[plane];
91 }
92
93 u8** GetPlanes() const {
94 return m_frame->data;
95 }
96
97 void SetFormat(int format) {
98 m_frame->format = format;
99 }
100
101 AVFrame* GetFrame() const {
102 return m_frame;
103 }
104
105private:
106 AVFrame* m_frame{};
107};
108
109// Wraps an AVCodec, a type containing information about a codec.
110class Decoder {
111public:
112 YUZU_NON_COPYABLE(Decoder);
113 YUZU_NON_MOVEABLE(Decoder);
114
115 explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
116 ~Decoder() = default;
117
118 bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
119
120 const AVCodec* GetCodec() const {
121 return m_codec;
122 }
123
124private:
125 const AVCodec* m_codec{};
126};
127
128// Wraps AVBufferRef for an accelerated decoder.
129class HardwareContext {
130public:
131 YUZU_NON_COPYABLE(HardwareContext);
132 YUZU_NON_MOVEABLE(HardwareContext);
133
134 static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
135
136 explicit HardwareContext() = default;
137 ~HardwareContext();
138
139 bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
140
141 AVBufferRef* GetBufferRef() const {
142 return m_gpu_decoder;
143 }
144
145private:
146 bool InitializeWithType(AVHWDeviceType type);
147
148 AVBufferRef* m_gpu_decoder{};
149};
150
151// Wraps an AVCodecContext.
152class DecoderContext {
153public:
154 YUZU_NON_COPYABLE(DecoderContext);
155 YUZU_NON_MOVEABLE(DecoderContext);
156
157 explicit DecoderContext(const Decoder& decoder);
158 ~DecoderContext();
159
160 void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
161 bool OpenContext(const Decoder& decoder);
162 bool SendPacket(const Packet& packet);
163 std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
164
165 AVCodecContext* GetCodecContext() const {
166 return m_codec_context;
167 }
168
169private:
170 AVCodecContext* m_codec_context{};
171};
172
173// Wraps an AVFilterGraph.
174class DeinterlaceFilter {
175public:
176 YUZU_NON_COPYABLE(DeinterlaceFilter);
177 YUZU_NON_MOVEABLE(DeinterlaceFilter);
178
179 explicit DeinterlaceFilter(const Frame& frame);
180 ~DeinterlaceFilter();
181
182 bool AddSourceFrame(const Frame& frame);
183 std::unique_ptr<Frame> DrainSinkFrame();
184
185private:
186 AVFilterGraph* m_filter_graph{};
187 AVFilterContext* m_source_context{};
188 AVFilterContext* m_sink_context{};
189 bool m_initialized{};
190};
191
192class DecodeApi {
193public:
194 YUZU_NON_COPYABLE(DecodeApi);
195 YUZU_NON_MOVEABLE(DecodeApi);
196
197 DecodeApi() = default;
198 ~DecodeApi() = default;
199
200 bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
201 void Reset();
202
203 bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
204 void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
205
206private:
207 std::optional<FFmpeg::Decoder> m_decoder;
208 std::optional<FFmpeg::DecoderContext> m_decoder_context;
209 std::optional<FFmpeg::HardwareContext> m_hardware_context;
210 std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
211};
212
213} // namespace FFmpeg
diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp
index a4bd5b79f..b8f5866d3 100644
--- a/src/video_core/host1x/nvdec.cpp
+++ b/src/video_core/host1x/nvdec.cpp
@@ -28,7 +28,7 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) {
28 } 28 }
29} 29}
30 30
31AVFramePtr Nvdec::GetFrame() { 31std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() {
32 return codec->GetCurrentFrame(); 32 return codec->GetCurrentFrame();
33} 33}
34 34
diff --git a/src/video_core/host1x/nvdec.h b/src/video_core/host1x/nvdec.h
index 3949d5181..ddddb8d28 100644
--- a/src/video_core/host1x/nvdec.h
+++ b/src/video_core/host1x/nvdec.h
@@ -23,7 +23,7 @@ public:
23 void ProcessMethod(u32 method, u32 argument); 23 void ProcessMethod(u32 method, u32 argument);
24 24
25 /// Return most recently decoded frame 25 /// Return most recently decoded frame
26 [[nodiscard]] AVFramePtr GetFrame(); 26 [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame();
27 27
28private: 28private:
29 /// Invoke codec to decode a frame 29 /// Invoke codec to decode a frame
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 10d7ef884..2a5eba415 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -82,27 +82,26 @@ void Vic::Execute() {
82 return; 82 return;
83 } 83 }
84 const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; 84 const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)};
85 const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); 85 auto frame = nvdec_processor->GetFrame();
86 const auto* frame = frame_ptr.get();
87 if (!frame) { 86 if (!frame) {
88 return; 87 return;
89 } 88 }
90 const u64 surface_width = config.surface_width_minus1 + 1; 89 const u64 surface_width = config.surface_width_minus1 + 1;
91 const u64 surface_height = config.surface_height_minus1 + 1; 90 const u64 surface_height = config.surface_height_minus1 + 1;
92 if (static_cast<u64>(frame->width) != surface_width || 91 if (static_cast<u64>(frame->GetWidth()) != surface_width ||
93 static_cast<u64>(frame->height) != surface_height) { 92 static_cast<u64>(frame->GetHeight()) != surface_height) {
94 // TODO: Properly support multiple video streams with differing frame dimensions 93 // TODO: Properly support multiple video streams with differing frame dimensions
95 LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", 94 LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
96 frame->width, frame->height, surface_width, surface_height); 95 frame->GetWidth(), frame->GetHeight(), surface_width, surface_height);
97 } 96 }
98 switch (config.pixel_format) { 97 switch (config.pixel_format) {
99 case VideoPixelFormat::RGBA8: 98 case VideoPixelFormat::RGBA8:
100 case VideoPixelFormat::BGRA8: 99 case VideoPixelFormat::BGRA8:
101 case VideoPixelFormat::RGBX8: 100 case VideoPixelFormat::RGBX8:
102 WriteRGBFrame(frame, config); 101 WriteRGBFrame(std::move(frame), config);
103 break; 102 break;
104 case VideoPixelFormat::YUV420: 103 case VideoPixelFormat::YUV420:
105 WriteYUVFrame(frame, config); 104 WriteYUVFrame(std::move(frame), config);
106 break; 105 break;
107 default: 106 default:
108 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); 107 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
@@ -110,10 +109,14 @@ void Vic::Execute() {
110 } 109 }
111} 110}
112 111
113void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { 112void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
114 LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); 113 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
115 114
116 if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { 115 const auto frame_width = frame->GetWidth();
116 const auto frame_height = frame->GetHeight();
117 const auto frame_format = frame->GetPixelFormat();
118
119 if (!scaler_ctx || frame_width != scaler_width || frame_height != scaler_height) {
117 const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { 120 const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
118 switch (pixel_format) { 121 switch (pixel_format) {
119 case VideoPixelFormat::RGBA8: 122 case VideoPixelFormat::RGBA8:
@@ -129,27 +132,26 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
129 132
130 sws_freeContext(scaler_ctx); 133 sws_freeContext(scaler_ctx);
131 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format 134 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
132 scaler_ctx = sws_getContext(frame->width, frame->height, 135 scaler_ctx = sws_getContext(frame_width, frame_height, frame_format, frame_width,
133 static_cast<AVPixelFormat>(frame->format), frame->width, 136 frame_height, target_format, 0, nullptr, nullptr, nullptr);
134 frame->height, target_format, 0, nullptr, nullptr, nullptr); 137 scaler_width = frame_width;
135 scaler_width = frame->width; 138 scaler_height = frame_height;
136 scaler_height = frame->height;
137 converted_frame_buffer.reset(); 139 converted_frame_buffer.reset();
138 } 140 }
139 if (!converted_frame_buffer) { 141 if (!converted_frame_buffer) {
140 const size_t frame_size = frame->width * frame->height * 4; 142 const size_t frame_size = frame_width * frame_height * 4;
141 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; 143 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
142 } 144 }
143 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; 145 const std::array<int, 4> converted_stride{frame_width * 4, frame_height * 4, 0, 0};
144 u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; 146 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
145 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, 147 sws_scale(scaler_ctx, frame->GetPlanes(), frame->GetStrides(), 0, frame_height,
146 converted_stride.data()); 148 &converted_frame_buf_addr, converted_stride.data());
147 149
148 // Use the minimum of surface/frame dimensions to avoid buffer overflow. 150 // Use the minimum of surface/frame dimensions to avoid buffer overflow.
149 const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; 151 const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
150 const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; 152 const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
151 const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); 153 const u32 width = std::min(surface_width, static_cast<u32>(frame_width));
152 const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); 154 const u32 height = std::min(surface_height, static_cast<u32>(frame_height));
153 const u32 blk_kind = static_cast<u32>(config.block_linear_kind); 155 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
154 if (blk_kind != 0) { 156 if (blk_kind != 0) {
155 // swizzle pitch linear to block linear 157 // swizzle pitch linear to block linear
@@ -169,23 +171,23 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
169 } 171 }
170} 172}
171 173
172void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { 174void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) {
173 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); 175 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
174 176
175 const std::size_t surface_width = config.surface_width_minus1 + 1; 177 const std::size_t surface_width = config.surface_width_minus1 + 1;
176 const std::size_t surface_height = config.surface_height_minus1 + 1; 178 const std::size_t surface_height = config.surface_height_minus1 + 1;
177 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; 179 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
178 // Use the minimum of surface/frame dimensions to avoid buffer overflow. 180 // Use the minimum of surface/frame dimensions to avoid buffer overflow.
179 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); 181 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->GetWidth()));
180 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); 182 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->GetHeight()));
181 183
182 const auto stride = static_cast<size_t>(frame->linesize[0]); 184 const auto stride = static_cast<size_t>(frame->GetStride(0));
183 185
184 luma_buffer.resize_destructive(aligned_width * surface_height); 186 luma_buffer.resize_destructive(aligned_width * surface_height);
185 chroma_buffer.resize_destructive(aligned_width * surface_height / 2); 187 chroma_buffer.resize_destructive(aligned_width * surface_height / 2);
186 188
187 // Populate luma buffer 189 // Populate luma buffer
188 const u8* luma_src = frame->data[0]; 190 const u8* luma_src = frame->GetData(0);
189 for (std::size_t y = 0; y < frame_height; ++y) { 191 for (std::size_t y = 0; y < frame_height; ++y) {
190 const std::size_t src = y * stride; 192 const std::size_t src = y * stride;
191 const std::size_t dst = y * aligned_width; 193 const std::size_t dst = y * aligned_width;
@@ -196,16 +198,16 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
196 198
197 // Chroma 199 // Chroma
198 const std::size_t half_height = frame_height / 2; 200 const std::size_t half_height = frame_height / 2;
199 const auto half_stride = static_cast<size_t>(frame->linesize[1]); 201 const auto half_stride = static_cast<size_t>(frame->GetStride(1));
200 202
201 switch (frame->format) { 203 switch (frame->GetPixelFormat()) {
202 case AV_PIX_FMT_YUV420P: { 204 case AV_PIX_FMT_YUV420P: {
203 // Frame from FFmpeg software 205 // Frame from FFmpeg software
204 // Populate chroma buffer from both channels with interleaving. 206 // Populate chroma buffer from both channels with interleaving.
205 const std::size_t half_width = frame_width / 2; 207 const std::size_t half_width = frame_width / 2;
206 u8* chroma_buffer_data = chroma_buffer.data(); 208 u8* chroma_buffer_data = chroma_buffer.data();
207 const u8* chroma_b_src = frame->data[1]; 209 const u8* chroma_b_src = frame->GetData(1);
208 const u8* chroma_r_src = frame->data[2]; 210 const u8* chroma_r_src = frame->GetData(2);
209 for (std::size_t y = 0; y < half_height; ++y) { 211 for (std::size_t y = 0; y < half_height; ++y) {
210 const std::size_t src = y * half_stride; 212 const std::size_t src = y * half_stride;
211 const std::size_t dst = y * aligned_width; 213 const std::size_t dst = y * aligned_width;
@@ -219,7 +221,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
219 case AV_PIX_FMT_NV12: { 221 case AV_PIX_FMT_NV12: {
220 // Frame from VA-API hardware 222 // Frame from VA-API hardware
221 // This is already interleaved so just copy 223 // This is already interleaved so just copy
222 const u8* chroma_src = frame->data[1]; 224 const u8* chroma_src = frame->GetData(1);
223 for (std::size_t y = 0; y < half_height; ++y) { 225 for (std::size_t y = 0; y < half_height; ++y) {
224 const std::size_t src = y * stride; 226 const std::size_t src = y * stride;
225 const std::size_t dst = y * aligned_width; 227 const std::size_t dst = y * aligned_width;
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h
index 3d9753047..6c868f062 100644
--- a/src/video_core/host1x/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -39,9 +39,9 @@ public:
39private: 39private:
40 void Execute(); 40 void Execute();
41 41
42 void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); 42 void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
43 43
44 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); 44 void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config);
45 45
46 Host1x& host1x; 46 Host1x& host1x;
47 std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; 47 std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 78b42b518..efa9adf7a 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -266,7 +266,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
266 return; 266 return;
267 } 267 }
268 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { 268 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
269 UNREACHABLE(); 269 ASSERT(false);
270 return; 270 return;
271 } 271 }
272 query_base->value += streamer->GetAmmendValue(); 272 query_base->value += streamer->GetAmmendValue();
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 65cd5aa06..4f1d5b548 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -3,6 +3,7 @@
3 3
4#include "common/alignment.h" 4#include "common/alignment.h"
5#include "core/memory.h" 5#include "core/memory.h"
6#include "video_core/control/channel_state.h"
6#include "video_core/host1x/host1x.h" 7#include "video_core/host1x/host1x.h"
7#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
8#include "video_core/renderer_null/null_rasterizer.h" 9#include "video_core/renderer_null/null_rasterizer.h"
@@ -99,8 +100,14 @@ bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
99} 100}
100void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 101void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
101 const VideoCore::DiskResourceLoadCallback& callback) {} 102 const VideoCore::DiskResourceLoadCallback& callback) {}
102void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {} 103void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {
103void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {} 104 CreateChannel(channel);
104void RasterizerNull::ReleaseChannel(s32 channel_id) {} 105}
106void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {
107 BindToChannel(channel.bind_id);
108}
109void RasterizerNull::ReleaseChannel(s32 channel_id) {
110 EraseChannel(channel_id);
111}
105 112
106} // namespace Null 113} // namespace Null
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 38d553d3c..dfd696de6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
178} 178}
179 179
180void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, 180void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
181 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 181 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
182 bool) {
182 CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); 183 CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
183} 184}
184 185
185void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 186void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
186 std::span<const VideoCommon::BufferCopy> copies) { 187 std::span<const VideoCommon::BufferCopy> copies, bool) {
187 CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); 188 CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true);
188} 189}
189 190
190void BufferCacheRuntime::PreCopyBarrier() { 191void BufferCacheRuntime::PreCopyBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index e8dbbd3a2..000f29a82 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -30,6 +30,8 @@ public:
30 30
31 void MakeResident(GLenum access) noexcept; 31 void MakeResident(GLenum access) noexcept;
32 32
33 void MarkUsage(u64 offset, u64 size) {}
34
33 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); 35 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
34 36
35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 37 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
@@ -66,22 +68,29 @@ public:
66 68
67 [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); 69 [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
68 70
71 bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) {
72 return false;
73 }
74
69 void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, 75 void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
70 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 76 std::span<const VideoCommon::BufferCopy> copies, bool barrier);
71 77
72 void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, 78 void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
73 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 79 std::span<const VideoCommon::BufferCopy> copies, bool barrier);
74 80
75 void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, 81 void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
76 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 82 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
83 bool can_reorder_upload = false);
77 84
78 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, 85 void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
79 std::span<const VideoCommon::BufferCopy> copies); 86 std::span<const VideoCommon::BufferCopy> copies, bool);
80 87
81 void PreCopyBarrier(); 88 void PreCopyBarrier();
82 void PostCopyBarrier(); 89 void PostCopyBarrier();
83 void Finish(); 90 void Finish();
84 91
92 void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
93
85 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); 94 void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
86 95
87 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); 96 void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 44a771d65..af0a453ee 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -559,7 +559,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
559} 559}
560 560
561void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { 561void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
562 glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS); 562 const GLenum buffer_mode =
563 num_xfb_buffers_active == 1 ? GL_INTERLEAVED_ATTRIBS : GL_SEPARATE_ATTRIBS;
564 glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), buffer_mode);
563} 565}
564 566
565void GraphicsPipeline::GenerateTransformFeedbackState() { 567void GraphicsPipeline::GenerateTransformFeedbackState() {
@@ -567,12 +569,14 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
567 // when this is required. 569 // when this is required.
568 GLint* cursor{xfb_attribs.data()}; 570 GLint* cursor{xfb_attribs.data()};
569 571
572 num_xfb_buffers_active = 0;
570 for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { 573 for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
571 const auto& layout = key.xfb_state.layouts[feedback]; 574 const auto& layout = key.xfb_state.layouts[feedback];
572 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); 575 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
573 if (layout.varying_count == 0) { 576 if (layout.varying_count == 0) {
574 continue; 577 continue;
575 } 578 }
579 num_xfb_buffers_active++;
576 580
577 const auto& locations = key.xfb_state.varyings[feedback]; 581 const auto& locations = key.xfb_state.varyings[feedback];
578 std::optional<u32> current_index; 582 std::optional<u32> current_index;
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 74fc9cc3d..2f70c1ae9 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -154,6 +154,7 @@ private:
154 154
155 static constexpr std::size_t XFB_ENTRY_STRIDE = 3; 155 static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
156 GLsizei num_xfb_attribs{}; 156 GLsizei num_xfb_attribs{};
157 u32 num_xfb_buffers_active{};
157 std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; 158 std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
158 159
159 std::mutex built_mutex; 160 std::mutex built_mutex;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 27e2de1bf..9995b6dd4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -555,7 +555,7 @@ void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
555 } 555 }
556 { 556 {
557 std::scoped_lock lock{buffer_cache.mutex}; 557 std::scoped_lock lock{buffer_cache.mutex};
558 buffer_cache.CachedWriteMemory(addr, size); 558 buffer_cache.WriteMemory(addr, size);
559 } 559 }
560 shader_cache.InvalidateRegion(addr, size); 560 shader_cache.InvalidateRegion(addr, size);
561} 561}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7e7a80740..c4c30d807 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
132 const bool use_accelerated = 132 const bool use_accelerated =
133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
134 const bool is_srgb = use_accelerated && screen_info.is_srgb; 134 const bool is_srgb = use_accelerated && screen_info.is_srgb;
135 RenderScreenshot(*framebuffer, use_accelerated);
135 136
136 { 137 Frame* frame = present_manager.GetRenderFrame();
137 std::scoped_lock lock{rasterizer.LockCaches()}; 138 blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
138 RenderScreenshot(*framebuffer, use_accelerated); 139 scheduler.Flush(*frame->render_ready);
139 140 present_manager.Present(frame);
140 Frame* frame = present_manager.GetRenderFrame();
141 blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
142 scheduler.Flush(*frame->render_ready);
143 present_manager.Present(frame);
144 }
145 141
146 gpu.RendererFrameEndNotify(); 142 gpu.RendererFrameEndNotify();
147 rasterizer.TickFrame(); 143 rasterizer.TickFrame();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 52fc142d1..66483a900 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
137 137
138BlitScreen::~BlitScreen() = default; 138BlitScreen::~BlitScreen() = default;
139 139
140static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer,
141 const ScreenInfo& screen_info) {
142 f32 left, top, right, bottom;
143
144 if (!framebuffer.crop_rect.IsEmpty()) {
145 // If crop rectangle is not empty, apply properties from rectangle.
146 left = static_cast<f32>(framebuffer.crop_rect.left);
147 top = static_cast<f32>(framebuffer.crop_rect.top);
148 right = static_cast<f32>(framebuffer.crop_rect.right);
149 bottom = static_cast<f32>(framebuffer.crop_rect.bottom);
150 } else {
151 // Otherwise, fall back to framebuffer dimensions.
152 left = 0;
153 top = 0;
154 right = static_cast<f32>(framebuffer.width);
155 bottom = static_cast<f32>(framebuffer.height);
156 }
157
158 // Apply transformation flags.
159 auto framebuffer_transform_flags = framebuffer.transform_flags;
160
161 if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) {
162 // Switch left and right.
163 std::swap(left, right);
164 }
165 if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) {
166 // Switch top and bottom.
167 std::swap(top, bottom);
168 }
169
170 framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH;
171 framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV;
172 if (True(framebuffer_transform_flags)) {
173 UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
174 static_cast<u32>(framebuffer_transform_flags));
175 }
176
177 // Get the screen properties.
178 const f32 screen_width = static_cast<f32>(screen_info.width);
179 const f32 screen_height = static_cast<f32>(screen_info.height);
180
181 // Normalize coordinate space.
182 left /= screen_width;
183 top /= screen_height;
184 right /= screen_width;
185 bottom /= screen_height;
186
187 return Common::Rectangle<f32>(left, top, right, bottom);
188}
189
140void BlitScreen::Recreate() { 190void BlitScreen::Recreate() {
141 present_manager.WaitPresent(); 191 present_manager.WaitPresent();
142 scheduler.Finish(); 192 scheduler.Finish();
@@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
354 source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); 404 source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view);
355 } 405 }
356 if (fsr) { 406 if (fsr) {
357 auto crop_rect = framebuffer.crop_rect; 407 const auto crop_rect = NormalizeCrop(framebuffer, screen_info);
358 if (crop_rect.GetWidth() == 0) { 408 const VkExtent2D fsr_input_size{
359 crop_rect.right = framebuffer.width; 409 .width = Settings::values.resolution_info.ScaleUp(screen_info.width),
360 } 410 .height = Settings::values.resolution_info.ScaleUp(screen_info.height),
361 if (crop_rect.GetHeight() == 0) {
362 crop_rect.bottom = framebuffer.height;
363 }
364 crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
365 VkExtent2D fsr_input_size{
366 .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
367 .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
368 }; 411 };
369 VkImageView fsr_image_view = 412 VkImageView fsr_image_view =
370 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); 413 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
@@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou
1397 1440
1398void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, 1441void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
1399 const Layout::FramebufferLayout layout) const { 1442 const Layout::FramebufferLayout layout) const {
1400 const auto& framebuffer_transform_flags = framebuffer.transform_flags; 1443 f32 left, top, right, bottom;
1401 const auto& framebuffer_crop_rect = framebuffer.crop_rect;
1402
1403 static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
1404 auto left = texcoords.left;
1405 auto right = texcoords.right;
1406
1407 switch (framebuffer_transform_flags) {
1408 case Service::android::BufferTransformFlags::Unset:
1409 break;
1410 case Service::android::BufferTransformFlags::FlipV:
1411 // Flip the framebuffer vertically
1412 left = texcoords.right;
1413 right = texcoords.left;
1414 break;
1415 default:
1416 UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
1417 static_cast<u32>(framebuffer_transform_flags));
1418 break;
1419 }
1420 1444
1421 UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); 1445 if (fsr) {
1422 1446 // FSR has already applied the crop, so we just want to render the image
1423 f32 left_start{}; 1447 // it has produced.
1424 if (framebuffer_crop_rect.Top() > 0) { 1448 left = 0;
1425 left_start = static_cast<f32>(framebuffer_crop_rect.Top()) / 1449 top = 0;
1426 static_cast<f32>(framebuffer_crop_rect.Bottom()); 1450 right = 1;
1427 } 1451 bottom = 1;
1428 f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width); 1452 } else {
1429 f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height); 1453 // Get the normalized crop rectangle.
1430 // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering 1454 const auto crop = NormalizeCrop(framebuffer, screen_info);
1431 // (e.g. handheld mode) on a 1920x1080 framebuffer. 1455
1432 if (!fsr) { 1456 // Apply the crop.
1433 if (framebuffer_crop_rect.GetWidth() > 0) { 1457 left = crop.left;
1434 scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / 1458 top = crop.top;
1435 static_cast<f32>(screen_info.width); 1459 right = crop.right;
1436 } 1460 bottom = crop.bottom;
1437 if (framebuffer_crop_rect.GetHeight() > 0) {
1438 scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
1439 static_cast<f32>(screen_info.height);
1440 }
1441 } 1461 }
1442 1462
1463 // Map the coordinates to the screen.
1443 const auto& screen = layout.screen; 1464 const auto& screen = layout.screen;
1444 const auto x = static_cast<f32>(screen.left); 1465 const auto x = static_cast<f32>(screen.left);
1445 const auto y = static_cast<f32>(screen.top); 1466 const auto y = static_cast<f32>(screen.top);
1446 const auto w = static_cast<f32>(screen.GetWidth()); 1467 const auto w = static_cast<f32>(screen.GetWidth());
1447 const auto h = static_cast<f32>(screen.GetHeight()); 1468 const auto h = static_cast<f32>(screen.GetHeight());
1448 data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v); 1469
1449 data.vertices[1] = 1470 data.vertices[0] = ScreenRectVertex(x, y, left, top);
1450 ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v); 1471 data.vertices[1] = ScreenRectVertex(x + w, y, right, top);
1451 data.vertices[2] = 1472 data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
1452 ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v); 1473 data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
1453 data.vertices[3] =
1454 ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v);
1455} 1474}
1456 1475
1457void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { 1476void BlitScreen::CreateSMAA(VkExtent2D smaa_size) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 976c3f6a6..5958f52f7 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
79} // Anonymous namespace 79} // Anonymous namespace
80 80
81Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 81Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} 82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
83 83
84Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 84Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
85 VAddr cpu_addr_, u64 size_bytes_) 85 VAddr cpu_addr_, u64 size_bytes_)
86 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), 86 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
87 device{&runtime.device}, buffer{ 87 device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
88 CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { 88 tracker{SizeBytes()} {
89 if (runtime.device.HasDebuggingToolAttached()) { 89 if (runtime.device.HasDebuggingToolAttached()) {
90 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 90 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
91 } 91 }
@@ -359,12 +359,31 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
359 return static_cast<u32>(device.GetStorageBufferAlignment()); 359 return static_cast<u32>(device.GetStorageBufferAlignment());
360} 360}
361 361
362void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
363 for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
364 it->ResetUsageTracking();
365 }
366}
367
362void BufferCacheRuntime::Finish() { 368void BufferCacheRuntime::Finish() {
363 scheduler.Finish(); 369 scheduler.Finish();
364} 370}
365 371
372bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer,
373 std::span<const VideoCommon::BufferCopy> copies) {
374 if (Settings::values.disable_buffer_reorder) {
375 return false;
376 }
377 const bool can_use_upload_cmdbuf =
378 std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) {
379 return !buffer.IsRegionUsed(copy.dst_offset, copy.size);
380 });
381 return can_use_upload_cmdbuf;
382}
383
366void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, 384void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
367 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 385 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
386 bool can_reorder_upload) {
368 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { 387 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
369 return; 388 return;
370 } 389 }
@@ -380,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
380 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 399 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
381 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, 400 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
382 }; 401 };
402
383 // Measuring a popular game, this number never exceeds the specified size once data is warmed up 403 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
384 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); 404 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
385 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); 405 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
406 if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) {
407 scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies](
408 vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
409 upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
410 });
411 return;
412 }
413
386 scheduler.RequestOutsideRenderPassOperationContext(); 414 scheduler.RequestOutsideRenderPassOperationContext();
387 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { 415 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
388 if (barrier) { 416 if (barrier) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 833dfac45..0b3fbd6d0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,6 +5,7 @@
5 5
6#include "video_core/buffer_cache/buffer_cache_base.h" 6#include "video_core/buffer_cache/buffer_cache_base.h"
7#include "video_core/buffer_cache/memory_tracker_base.h" 7#include "video_core/buffer_cache/memory_tracker_base.h"
8#include "video_core/buffer_cache/usage_tracker.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_compute_pass.h" 10#include "video_core/renderer_vulkan/vk_compute_pass.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -34,6 +35,18 @@ public:
34 return *buffer; 35 return *buffer;
35 } 36 }
36 37
38 [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept {
39 return tracker.IsUsed(offset, size);
40 }
41
42 void MarkUsage(u64 offset, u64 size) noexcept {
43 tracker.Track(offset, size);
44 }
45
46 void ResetUsageTracking() noexcept {
47 tracker.Reset();
48 }
49
37 operator VkBuffer() const noexcept { 50 operator VkBuffer() const noexcept {
38 return *buffer; 51 return *buffer;
39 } 52 }
@@ -49,6 +62,7 @@ private:
49 const Device* device{}; 62 const Device* device{};
50 vk::Buffer buffer; 63 vk::Buffer buffer;
51 std::vector<BufferView> views; 64 std::vector<BufferView> views;
65 VideoCommon::UsageTracker tracker;
52}; 66};
53 67
54class QuadArrayIndexBuffer; 68class QuadArrayIndexBuffer;
@@ -67,6 +81,8 @@ public:
67 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 81 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
68 DescriptorPool& descriptor_pool); 82 DescriptorPool& descriptor_pool);
69 83
84 void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
85
70 void Finish(); 86 void Finish();
71 87
72 u64 GetDeviceLocalMemory() const; 88 u64 GetDeviceLocalMemory() const;
@@ -81,12 +97,15 @@ public:
81 97
82 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); 98 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
83 99
100 bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies);
101
84 void FreeDeferredStagingBuffer(StagingBufferRef& ref); 102 void FreeDeferredStagingBuffer(StagingBufferRef& ref);
85 103
86 void PreCopyBarrier(); 104 void PreCopyBarrier();
87 105
88 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, 106 void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
89 std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); 107 std::span<const VideoCommon::BufferCopy> copies, bool barrier,
108 bool can_reorder_upload = false);
90 109
91 void PostCopyBarrier(); 110 void PostCopyBarrier();
92 111
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index ce8f3f3c2..f7a05fbc0 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
34} 34}
35 35
36VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, 36VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
37 VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { 37 VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) {
38 38
39 UpdateDescriptorSet(image_index, image_view); 39 UpdateDescriptorSet(image_index, image_view);
40 40
@@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag
61 61
62 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); 62 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
63 63
64 const f32 input_image_width = static_cast<f32>(input_image_extent.width);
65 const f32 input_image_height = static_cast<f32>(input_image_extent.height);
66 const f32 output_image_width = static_cast<f32>(output_size.width);
67 const f32 output_image_height = static_cast<f32>(output_size.height);
68 const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width;
69 const f32 viewport_x = crop_rect.left * input_image_width;
70 const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height;
71 const f32 viewport_y = crop_rect.top * input_image_height;
72
64 std::array<u32, 4 * 4> push_constants; 73 std::array<u32, 4 * 4> push_constants;
65 FsrEasuConOffset( 74 FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4,
66 push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, 75 push_constants.data() + 8, push_constants.data() + 12,
67 push_constants.data() + 12, 76
68 77 viewport_width, viewport_height, input_image_width, input_image_height,
69 static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), 78 output_image_width, output_image_height, viewport_x, viewport_y);
70 static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
71 static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
72 static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
73 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); 79 cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
74 80
75 { 81 {
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 8bb9fc23a..3505c1416 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -17,7 +17,7 @@ public:
17 explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, 17 explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
18 VkExtent2D output_size); 18 VkExtent2D output_size);
19 VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, 19 VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view,
20 VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); 20 VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect);
21 21
22private: 22private:
23 void CreateDescriptorPool(); 23 void CreateDescriptorPool();
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 6b288b994..ac8b6e838 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) {
100 Refresh(); 100 Refresh();
101} 101}
102 102
103VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 103VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
104 VkSemaphore wait_semaphore, u64 host_tick) { 104 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
105 u64 host_tick) {
105 if (semaphore) { 106 if (semaphore) {
106 return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); 107 return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore,
108 host_tick);
107 } else { 109 } else {
108 return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); 110 return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick);
109 } 111 }
110} 112}
111 113
@@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
115}; 117};
116 118
117VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, 119VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
120 vk::CommandBuffer& upload_cmdbuf,
118 VkSemaphore signal_semaphore, 121 VkSemaphore signal_semaphore,
119 VkSemaphore wait_semaphore, u64 host_tick) { 122 VkSemaphore wait_semaphore, u64 host_tick) {
120 const VkSemaphore timeline_semaphore = *semaphore; 123 const VkSemaphore timeline_semaphore = *semaphore;
@@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
123 const std::array signal_values{host_tick, u64(0)}; 126 const std::array signal_values{host_tick, u64(0)};
124 const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; 127 const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
125 128
129 const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
130
126 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; 131 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
127 const VkTimelineSemaphoreSubmitInfo timeline_si{ 132 const VkTimelineSemaphoreSubmitInfo timeline_si{
128 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, 133 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
138 .waitSemaphoreCount = num_wait_semaphores, 143 .waitSemaphoreCount = num_wait_semaphores,
139 .pWaitSemaphores = &wait_semaphore, 144 .pWaitSemaphores = &wait_semaphore,
140 .pWaitDstStageMask = wait_stage_masks.data(), 145 .pWaitDstStageMask = wait_stage_masks.data(),
141 .commandBufferCount = 1, 146 .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
142 .pCommandBuffers = cmdbuf.address(), 147 .pCommandBuffers = cmdbuffers.data(),
143 .signalSemaphoreCount = num_signal_semaphores, 148 .signalSemaphoreCount = num_signal_semaphores,
144 .pSignalSemaphores = signal_semaphores.data(), 149 .pSignalSemaphores = signal_semaphores.data(),
145 }; 150 };
@@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf,
147 return device.GetGraphicsQueue().Submit(submit_info); 152 return device.GetGraphicsQueue().Submit(submit_info);
148} 153}
149 154
150VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 155VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf,
151 VkSemaphore wait_semaphore, u64 host_tick) { 156 vk::CommandBuffer& upload_cmdbuf,
157 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
158 u64 host_tick) {
152 const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; 159 const u32 num_signal_semaphores = signal_semaphore ? 1 : 0;
153 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; 160 const u32 num_wait_semaphores = wait_semaphore ? 1 : 0;
154 161
162 const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf};
163
155 const VkSubmitInfo submit_info{ 164 const VkSubmitInfo submit_info{
156 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 165 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
157 .pNext = nullptr, 166 .pNext = nullptr,
158 .waitSemaphoreCount = num_wait_semaphores, 167 .waitSemaphoreCount = num_wait_semaphores,
159 .pWaitSemaphores = &wait_semaphore, 168 .pWaitSemaphores = &wait_semaphore,
160 .pWaitDstStageMask = wait_stage_masks.data(), 169 .pWaitDstStageMask = wait_stage_masks.data(),
161 .commandBufferCount = 1, 170 .commandBufferCount = static_cast<u32>(cmdbuffers.size()),
162 .pCommandBuffers = cmdbuf.address(), 171 .pCommandBuffers = cmdbuffers.data(),
163 .signalSemaphoreCount = num_signal_semaphores, 172 .signalSemaphoreCount = num_signal_semaphores,
164 .pSignalSemaphores = &signal_semaphore, 173 .pSignalSemaphores = &signal_semaphore,
165 }; 174 };
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 3f599d7bd..7dfb93ffb 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -52,14 +52,16 @@ public:
52 void Wait(u64 tick); 52 void Wait(u64 tick);
53 53
54 /// Submits the device graphics queue, updating the tick as necessary 54 /// Submits the device graphics queue, updating the tick as necessary
55 VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 55 VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
56 VkSemaphore wait_semaphore, u64 host_tick); 56 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick);
57 57
58private: 58private:
59 VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 59 VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
60 VkSemaphore wait_semaphore, u64 host_tick); 60 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
61 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 61 u64 host_tick);
62 VkSemaphore wait_semaphore, u64 host_tick); 62 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf,
63 VkSemaphore signal_semaphore, VkSemaphore wait_semaphore,
64 u64 host_tick);
63 65
64 void WaitThread(std::stop_token token); 66 void WaitThread(std::stop_token token);
65 67
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 0d604eee3..2a13b2a72 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
263 info.y_negate = key.state.y_negate != 0; 263 info.y_negate = key.state.y_negate != 0;
264 return info; 264 return info;
265} 265}
266
267size_t GetTotalPipelineWorkers() {
268 const size_t max_core_threads =
269 std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
270#ifdef ANDROID
271 // Leave at least a few cores free in android
272 constexpr size_t free_cores = 3ULL;
273 if (max_core_threads <= free_cores) {
274 return 1ULL;
275 }
276 return max_core_threads - free_cores;
277#else
278 return max_core_threads;
279#endif
280}
281
266} // Anonymous namespace 282} // Anonymous namespace
267 283
268size_t ComputePipelineCacheKey::Hash() const noexcept { 284size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
294 texture_cache{texture_cache_}, shader_notify{shader_notify_}, 310 texture_cache{texture_cache_}, shader_notify{shader_notify_},
295 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, 311 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
296 use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, 312 use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
297#ifdef ANDROID 313 workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(),
298 workers(1, "VkPipelineBuilder"), 314 "VkPipelineBuilder"),
299#else
300 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
301#endif
302 serialization_thread(1, "VkPipelineSerialization") { 315 serialization_thread(1, "VkPipelineSerialization") {
303 const auto& float_control{device.FloatControlProperties()}; 316 const auto& float_control{device.FloatControlProperties()};
304 const VkDriverId driver_id{device.GetDriverID()}; 317 const VkDriverId driver_id{device.GetDriverID()};
@@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
338 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), 351 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
339 .support_native_ndc = device.IsExtDepthClipControlSupported(), 352 .support_native_ndc = device.IsExtDepthClipControlSupported(),
340 .support_scaled_attributes = !device.MustEmulateScaledFormats(), 353 .support_scaled_attributes = !device.MustEmulateScaledFormats(),
354 .support_multi_viewport = device.SupportsMultiViewport(),
341 355
342 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), 356 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
343 357
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 66c03bf17..078777cdd 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -211,6 +211,13 @@ public:
211 return; 211 return;
212 } 212 }
213 PauseCounter(); 213 PauseCounter();
214 const auto driver_id = device.GetDriverID();
215 if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
216 driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
217 pending_sync.clear();
218 sync_values_stash.clear();
219 return;
220 }
214 sync_values_stash.clear(); 221 sync_values_stash.clear();
215 sync_values_stash.emplace_back(); 222 sync_values_stash.emplace_back();
216 std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); 223 std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
@@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
1378 return true; 1385 return true;
1379 } 1386 }
1380 1387
1388 auto driver_id = impl->device.GetDriverID();
1389 if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
1390 driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) {
1391 return true;
1392 }
1393
1381 for (size_t i = 0; i < 2; i++) { 1394 for (size_t i = 0; i < 2; i++) {
1382 is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); 1395 is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
1383 } 1396 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 059b7cb40..e0ab1eaac 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in
82 } 82 }
83 83
84 if (y_negate) { 84 if (y_negate) {
85 y += height; 85 y += conv(static_cast<f32>(regs.surface_clip.height));
86 height = -height; 86 height = -height;
87 } 87 }
88 88
@@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
199 if (!pipeline) { 199 if (!pipeline) {
200 return; 200 return;
201 } 201 }
202 std::scoped_lock lock{LockCaches()}; 202 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
203 // update engine as channel may be different. 203 // update engine as channel may be different.
204 pipeline->SetEngine(maxwell3d, gpu_memory); 204 pipeline->SetEngine(maxwell3d, gpu_memory);
205 pipeline->Configure(is_indexed); 205 pipeline->Configure(is_indexed);
@@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
621 } 621 }
622 { 622 {
623 std::scoped_lock lock{buffer_cache.mutex}; 623 std::scoped_lock lock{buffer_cache.mutex};
624 buffer_cache.CachedWriteMemory(addr, size); 624 buffer_cache.WriteMemory(addr, size);
625 } 625 }
626 pipeline_cache.InvalidateRegion(addr, size); 626 pipeline_cache.InvalidateRegion(addr, size);
627} 627}
@@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() {
710} 710}
711 711
712void RasterizerVulkan::FlushCommands() { 712void RasterizerVulkan::FlushCommands() {
713 std::scoped_lock lock{LockCaches()};
714 if (draw_counter == 0) { 713 if (draw_counter == 0) {
715 return; 714 return;
716 } 715 }
@@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() {
808 if ((++draw_counter & 7) != 7) { 807 if ((++draw_counter & 7) != 7) {
809 return; 808 return;
810 } 809 }
811 std::scoped_lock lock{LockCaches()};
812 if (draw_counter < DRAWS_TO_DISPATCH) { 810 if (draw_counter < DRAWS_TO_DISPATCH) {
813 // Send recorded tasks to the worker thread 811 // Send recorded tasks to the worker thread
814 scheduler.DispatchWork(); 812 scheduler.DispatchWork();
@@ -923,9 +921,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
923} 921}
924 922
925void RasterizerVulkan::HandleTransformFeedback() { 923void RasterizerVulkan::HandleTransformFeedback() {
924 static std::once_flag warn_unsupported;
925
926 const auto& regs = maxwell3d->regs; 926 const auto& regs = maxwell3d->regs;
927 if (!device.IsExtTransformFeedbackSupported()) { 927 if (!device.IsExtTransformFeedbackSupported()) {
928 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 928 std::call_once(warn_unsupported, [&] {
929 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
930 });
929 return; 931 return;
930 } 932 }
931 query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, 933 query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
@@ -1503,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
1503void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { 1505void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
1504 CreateChannel(channel); 1506 CreateChannel(channel);
1505 { 1507 {
1506 std::scoped_lock lock{LockCaches()}; 1508 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
1507 texture_cache.CreateChannel(channel); 1509 texture_cache.CreateChannel(channel);
1508 buffer_cache.CreateChannel(channel); 1510 buffer_cache.CreateChannel(channel);
1509 } 1511 }
@@ -1516,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
1516 const s32 channel_id = channel.bind_id; 1518 const s32 channel_id = channel.bind_id;
1517 BindToChannel(channel_id); 1519 BindToChannel(channel_id);
1518 { 1520 {
1519 std::scoped_lock lock{LockCaches()}; 1521 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
1520 texture_cache.BindToChannel(channel_id); 1522 texture_cache.BindToChannel(channel_id);
1521 buffer_cache.BindToChannel(channel_id); 1523 buffer_cache.BindToChannel(channel_id);
1522 } 1524 }
@@ -1529,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
1529void RasterizerVulkan::ReleaseChannel(s32 channel_id) { 1531void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
1530 EraseChannel(channel_id); 1532 EraseChannel(channel_id);
1531 { 1533 {
1532 std::scoped_lock lock{LockCaches()}; 1534 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
1533 texture_cache.EraseChannel(channel_id); 1535 texture_cache.EraseChannel(channel_id);
1534 buffer_cache.EraseChannel(channel_id); 1536 buffer_cache.EraseChannel(channel_id);
1535 } 1537 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ce3dfbaab..ad069556c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -133,10 +133,6 @@ public:
133 133
134 void ReleaseChannel(s32 channel_id) override; 134 void ReleaseChannel(s32 channel_id) override;
135 135
136 std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() {
137 return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex};
138 }
139
140private: 136private:
141 static constexpr size_t MAX_TEXTURES = 192; 137 static constexpr size_t MAX_TEXTURES = 192;
142 static constexpr size_t MAX_IMAGES = 48; 138 static constexpr size_t MAX_IMAGES = 48;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 3be7837f4..146923db4 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -22,11 +22,12 @@ namespace Vulkan {
22 22
23MICROPROFILE_DECLARE(Vulkan_WaitForWorker); 23MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
24 24
25void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { 25void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
26 vk::CommandBuffer upload_cmdbuf) {
26 auto command = first; 27 auto command = first;
27 while (command != nullptr) { 28 while (command != nullptr) {
28 auto next = command->GetNext(); 29 auto next = command->GetNext();
29 command->Execute(cmdbuf); 30 command->Execute(cmdbuf, upload_cmdbuf);
30 command->~Command(); 31 command->~Command();
31 command = next; 32 command = next;
32 } 33 }
@@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) {
180 // Perform the work, tracking whether the chunk was a submission 181 // Perform the work, tracking whether the chunk was a submission
181 // before executing. 182 // before executing.
182 const bool has_submit = work->HasSubmit(); 183 const bool has_submit = work->HasSubmit();
183 work->ExecuteAll(current_cmdbuf); 184 work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf);
184 185
185 // If the chunk was a submission, reallocate the command buffer. 186 // If the chunk was a submission, reallocate the command buffer.
186 if (has_submit) { 187 if (has_submit) {
@@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() {
205 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 206 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
206 .pInheritanceInfo = nullptr, 207 .pInheritanceInfo = nullptr,
207 }); 208 });
209 current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
210 current_upload_cmdbuf.Begin({
211 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
212 .pNext = nullptr,
213 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
214 .pInheritanceInfo = nullptr,
215 });
208} 216}
209 217
210u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 218u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
212 InvalidateState(); 220 InvalidateState();
213 221
214 const u64 signal_value = master_semaphore->NextTick(); 222 const u64 signal_value = master_semaphore->NextTick();
215 Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { 223 RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value,
224 this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) {
225 static constexpr VkMemoryBarrier WRITE_BARRIER{
226 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
227 .pNext = nullptr,
228 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
229 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
230 };
231 upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
232 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
233 upload_cmdbuf.End();
216 cmdbuf.End(); 234 cmdbuf.End();
217 235
218 if (on_submit) { 236 if (on_submit) {
@@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
221 239
222 std::scoped_lock lock{submit_mutex}; 240 std::scoped_lock lock{submit_mutex};
223 switch (const VkResult result = master_semaphore->SubmitQueue( 241 switch (const VkResult result = master_semaphore->SubmitQueue(
224 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { 242 cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
225 case VK_SUCCESS: 243 case VK_SUCCESS:
226 break; 244 break;
227 case VK_ERROR_DEVICE_LOST: 245 case VK_ERROR_DEVICE_LOST:
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index da03803aa..f8d8ca80a 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -80,7 +80,8 @@ public:
80 80
81 /// Send work to a separate thread. 81 /// Send work to a separate thread.
82 template <typename T> 82 template <typename T>
83 void Record(T&& command) { 83 requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer>
84 void RecordWithUploadBuffer(T&& command) {
84 if (chunk->Record(command)) { 85 if (chunk->Record(command)) {
85 return; 86 return;
86 } 87 }
@@ -88,6 +89,15 @@ public:
88 (void)chunk->Record(command); 89 (void)chunk->Record(command);
89 } 90 }
90 91
92 template <typename T>
93 requires std::is_invocable_v<T, vk::CommandBuffer>
94 void Record(T&& c) {
95 this->RecordWithUploadBuffer(
96 [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) {
97 command(cmdbuf);
98 });
99 }
100
91 /// Returns the current command buffer tick. 101 /// Returns the current command buffer tick.
92 [[nodiscard]] u64 CurrentTick() const noexcept { 102 [[nodiscard]] u64 CurrentTick() const noexcept {
93 return master_semaphore->CurrentTick(); 103 return master_semaphore->CurrentTick();
@@ -119,7 +129,7 @@ private:
119 public: 129 public:
120 virtual ~Command() = default; 130 virtual ~Command() = default;
121 131
122 virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; 132 virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
123 133
124 Command* GetNext() const { 134 Command* GetNext() const {
125 return next; 135 return next;
@@ -142,8 +152,8 @@ private:
142 TypedCommand(TypedCommand&&) = delete; 152 TypedCommand(TypedCommand&&) = delete;
143 TypedCommand& operator=(TypedCommand&&) = delete; 153 TypedCommand& operator=(TypedCommand&&) = delete;
144 154
145 void Execute(vk::CommandBuffer cmdbuf) const override { 155 void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
146 command(cmdbuf); 156 command(cmdbuf, upload_cmdbuf);
147 } 157 }
148 158
149 private: 159 private:
@@ -152,7 +162,7 @@ private:
152 162
153 class CommandChunk final { 163 class CommandChunk final {
154 public: 164 public:
155 void ExecuteAll(vk::CommandBuffer cmdbuf); 165 void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf);
156 166
157 template <typename T> 167 template <typename T>
158 bool Record(T& command) { 168 bool Record(T& command) {
@@ -228,6 +238,7 @@ private:
228 VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; 238 VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
229 239
230 vk::CommandBuffer current_cmdbuf; 240 vk::CommandBuffer current_cmdbuf;
241 vk::CommandBuffer current_upload_cmdbuf;
231 242
232 std::unique_ptr<CommandChunk> chunk; 243 std::unique_ptr<CommandChunk> chunk;
233 std::function<void()> on_submit; 244 std::function<void()> on_submit;
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index 5efd7d66e..70644ea82 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) {
672 UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, 672 UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent,
673 VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); 673 VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes));
674 674
675 scheduler.Record([&](vk::CommandBuffer& cmdbuf) { 675 scheduler.Record([&](vk::CommandBuffer cmdbuf) {
676 for (auto& images : m_dynamic_images) { 676 for (auto& images : m_dynamic_images) {
677 for (size_t i = 0; i < MaxDynamicImage; i++) { 677 for (size_t i = 0; i < MaxDynamicImage; i++) {
678 ClearColorImage(cmdbuf, *images.images[i]); 678 ClearColorImage(cmdbuf, *images.images[i]);
@@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_
707 UpdateDescriptorSets(source_image_view, image_index); 707 UpdateDescriptorSets(source_image_view, image_index);
708 708
709 scheduler.RequestOutsideRenderPassOperationContext(); 709 scheduler.RequestOutsideRenderPassOperationContext();
710 scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { 710 scheduler.Record([=, this](vk::CommandBuffer cmdbuf) {
711 TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); 711 TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL);
712 TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); 712 TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL);
713 BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, 713 BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d3deb9072..f63a20327 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -36,6 +36,10 @@ public:
36 StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); 36 StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false);
37 void FreeDeferred(StagingBufferRef& ref); 37 void FreeDeferred(StagingBufferRef& ref);
38 38
39 [[nodiscard]] VkBuffer StreamBuf() const noexcept {
40 return *stream_buffer;
41 }
42
39 void TickFrame(); 43 void TickFrame();
40 44
41private: 45private:
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index de34f6d49..5dbec2e62 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1785 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, 1785 : VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
1786 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} 1786 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1787 1787
1788ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) 1788ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params)
1789 : VideoCommon::ImageViewBase{params} {} 1789 : VideoCommon::ImageViewBase{params}, device{&runtime.device} {
1790 if (device->HasNullDescriptor()) {
1791 return;
1792 }
1793
1794 // Handle fallback for devices without nullDescriptor
1795 ImageInfo info{};
1796 info.format = PixelFormat::A8B8G8R8_UNORM;
1797
1798 null_image = MakeImage(*device, runtime.memory_allocator, info, {});
1799 image_handle = *null_image;
1800 for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) {
1801 image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT);
1802 }
1803}
1790 1804
1791ImageView::~ImageView() = default; 1805ImageView::~ImageView() = default;
1792 1806
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 7a0807709..edf5d7635 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -267,6 +267,7 @@ private:
267 vk::ImageView depth_view; 267 vk::ImageView depth_view;
268 vk::ImageView stencil_view; 268 vk::ImageView stencil_view;
269 vk::ImageView color_view; 269 vk::ImageView color_view;
270 vk::Image null_image;
270 VkImage image_handle = VK_NULL_HANDLE; 271 VkImage image_handle = VK_NULL_HANDLE;
271 VkImageView render_target = VK_NULL_HANDLE; 272 VkImageView render_target = VK_NULL_HANDLE;
272 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 273 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index 9df6a2903..3ffa2a661 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -138,6 +138,10 @@ public:
138 return Iterator(this, SlotId{SlotId::INVALID_INDEX}); 138 return Iterator(this, SlotId{SlotId::INVALID_INDEX});
139 } 139 }
140 140
141 [[nodiscard]] size_t size() const noexcept {
142 return values_capacity - free_list.size();
143 }
144
141private: 145private:
142 struct NonTrivialDummy { 146 struct NonTrivialDummy {
143 NonTrivialDummy() noexcept {} 147 NonTrivialDummy() noexcept {}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index e518756d2..fde36a49c 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -635,6 +635,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
635 has_broken_cube_compatibility = true; 635 has_broken_cube_compatibility = true;
636 } 636 }
637 } 637 }
638 if (is_qualcomm) {
639 const u32 version = (properties.properties.driverVersion << 3) >> 3;
640 if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) {
641 has_broken_parallel_compiling = true;
642 }
643 }
638 if (extensions.sampler_filter_minmax && is_amd) { 644 if (extensions.sampler_filter_minmax && is_amd) {
639 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. 645 // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
640 if (!features.shader_float16_int8.shaderFloat16) { 646 if (!features.shader_float16_int8.shaderFloat16) {
@@ -863,7 +869,8 @@ bool Device::ShouldBoostClocks() const {
863 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || 869 driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA ||
864 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; 870 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP;
865 871
866 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; 872 const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) ||
873 (vendor_id == 0x1002 && device_id == 0x1435);
867 874
868 const bool is_debugging = this->HasDebuggingToolAttached(); 875 const bool is_debugging = this->HasDebuggingToolAttached();
869 876
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index b213ed7dd..4f3846345 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -102,6 +102,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
102 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ 102 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
103 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ 103 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \
104 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ 104 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \
105 EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \
105 EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ 106 EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \
106 EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ 107 EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \
107 EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ 108 EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \
@@ -599,6 +600,11 @@ public:
599 return has_broken_cube_compatibility; 600 return has_broken_cube_compatibility;
600 } 601 }
601 602
603 /// Returns true if parallel shader compiling has issues with the current driver.
604 bool HasBrokenParallelShaderCompiling() const {
605 return has_broken_parallel_compiling;
606 }
607
602 /// Returns the vendor name reported from Vulkan. 608 /// Returns the vendor name reported from Vulkan.
603 std::string_view GetVendorName() const { 609 std::string_view GetVendorName() const {
604 return properties.driver.driverName; 610 return properties.driver.driverName;
@@ -663,6 +669,10 @@ public:
663 return supports_conditional_barriers; 669 return supports_conditional_barriers;
664 } 670 }
665 671
672 bool SupportsMultiViewport() const {
673 return features2.features.multiViewport;
674 }
675
666 [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, 676 [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
667 u32 driver_version) { 677 u32 driver_version) {
668 if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { 678 if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
@@ -794,6 +804,7 @@ private:
794 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. 804 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
795 bool has_broken_compute{}; ///< Compute shaders can cause crashes 805 bool has_broken_compute{}; ///< Compute shaders can cause crashes
796 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit 806 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
807 bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling.
797 bool has_renderdoc{}; ///< Has RenderDoc attached 808 bool has_renderdoc{}; ///< Has RenderDoc attached
798 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 809 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
799 bool supports_d24_depth{}; ///< Supports D24 depth buffers. 810 bool supports_d24_depth{}; ///< Supports D24 depth buffers.
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 0487cd3b6..a0c70797f 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -1101,6 +1101,10 @@ public:
1101 return &handle; 1101 return &handle;
1102 } 1102 }
1103 1103
1104 VkCommandBuffer operator*() const noexcept {
1105 return handle;
1106 }
1107
1104 void Begin(const VkCommandBufferBeginInfo& begin_info) const { 1108 void Begin(const VkCommandBufferBeginInfo& begin_info) const {
1105 Check(dld->vkBeginCommandBuffer(handle, &begin_info)); 1109 Check(dld->vkBeginCommandBuffer(handle, &begin_info));
1106 } 1110 }