diff options
| author | 2023-11-26 21:08:53 -0500 | |
|---|---|---|
| committer | 2023-11-26 21:08:53 -0500 | |
| commit | 1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a (patch) | |
| tree | c219aacab776c0a1e3956614b60a01fa2f6164cb /src/video_core | |
| parent | shader_recompiler: Align SSBO offsets in GlobalMemory functions (diff) | |
| parent | Merge pull request #11535 from GPUCode/upload_cmdbuf (diff) | |
| download | yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.gz yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.tar.xz yuzu-1d11fe00a3000efbf6a0a4bb690e0d544a1b7b4a.zip | |
Merge branch 'master' into ssbo-align
Diffstat (limited to 'src/video_core')
46 files changed, 1168 insertions, 596 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cf9266d54..c22c7631c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | add_subdirectory(host_shaders) | 4 | add_subdirectory(host_shaders) |
| 5 | 5 | ||
| 6 | if(LIBVA_FOUND) | 6 | if(LIBVA_FOUND) |
| 7 | set_source_files_properties(host1x/codecs/codec.cpp | 7 | set_source_files_properties(host1x/ffmpeg/ffmpeg.cpp |
| 8 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) | 8 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) |
| 9 | list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) | 9 | list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) |
| 10 | endif() | 10 | endif() |
| @@ -15,6 +15,7 @@ add_library(video_core STATIC | |||
| 15 | buffer_cache/buffer_cache.cpp | 15 | buffer_cache/buffer_cache.cpp |
| 16 | buffer_cache/buffer_cache.h | 16 | buffer_cache/buffer_cache.h |
| 17 | buffer_cache/memory_tracker_base.h | 17 | buffer_cache/memory_tracker_base.h |
| 18 | buffer_cache/usage_tracker.h | ||
| 18 | buffer_cache/word_manager.h | 19 | buffer_cache/word_manager.h |
| 19 | cache_types.h | 20 | cache_types.h |
| 20 | cdma_pusher.cpp | 21 | cdma_pusher.cpp |
| @@ -66,6 +67,8 @@ add_library(video_core STATIC | |||
| 66 | host1x/codecs/vp9.cpp | 67 | host1x/codecs/vp9.cpp |
| 67 | host1x/codecs/vp9.h | 68 | host1x/codecs/vp9.h |
| 68 | host1x/codecs/vp9_types.h | 69 | host1x/codecs/vp9_types.h |
| 70 | host1x/ffmpeg/ffmpeg.cpp | ||
| 71 | host1x/ffmpeg/ffmpeg.h | ||
| 69 | host1x/control.cpp | 72 | host1x/control.cpp |
| 70 | host1x/control.h | 73 | host1x/control.h |
| 71 | host1x/host1x.cpp | 74 | host1x/host1x.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2648970b6..6d1fc3887 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -67,6 +67,7 @@ void BufferCache<P>::TickFrame() { | |||
| 67 | if (!channel_state) { | 67 | if (!channel_state) { |
| 68 | return; | 68 | return; |
| 69 | } | 69 | } |
| 70 | runtime.TickFrame(slot_buffers); | ||
| 70 | 71 | ||
| 71 | // Calculate hits and shots and move hit bits to the right | 72 | // Calculate hits and shots and move hit bits to the right |
| 72 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), | 73 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), |
| @@ -230,7 +231,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 230 | for (const IntervalType& add_interval : tmp_intervals) { | 231 | for (const IntervalType& add_interval : tmp_intervals) { |
| 231 | common_ranges.add(add_interval); | 232 | common_ranges.add(add_interval); |
| 232 | } | 233 | } |
| 233 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 234 | const auto& copy = copies[0]; |
| 235 | src_buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 236 | dest_buffer.MarkUsage(copy.dst_offset, copy.size); | ||
| 237 | runtime.CopyBuffer(dest_buffer, src_buffer, copies, true); | ||
| 234 | if (has_new_downloads) { | 238 | if (has_new_downloads) { |
| 235 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 236 | } | 240 | } |
| @@ -258,9 +262,10 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 258 | common_ranges.subtract(subtract_interval); | 262 | common_ranges.subtract(subtract_interval); |
| 259 | 263 | ||
| 260 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | 264 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); |
| 261 | auto& dest_buffer = slot_buffers[buffer]; | 265 | Buffer& dest_buffer = slot_buffers[buffer]; |
| 262 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); | 266 | const u32 offset = dest_buffer.Offset(*cpu_dst_address); |
| 263 | runtime.ClearBuffer(dest_buffer, offset, size, value); | 267 | runtime.ClearBuffer(dest_buffer, offset, size, value); |
| 268 | dest_buffer.MarkUsage(offset, size); | ||
| 264 | return true; | 269 | return true; |
| 265 | } | 270 | } |
| 266 | 271 | ||
| @@ -603,6 +608,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 603 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 604 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 605 | async_downloads += std::make_pair(base_interval, 1); | 610 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 606 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 607 | normalized_copies.push_back(second_copy); | 613 | normalized_copies.push_back(second_copy); |
| 608 | } | 614 | } |
| @@ -621,8 +627,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 621 | // Have in mind the staging buffer offset for the copy | 627 | // Have in mind the staging buffer offset for the copy |
| 622 | copy.dst_offset += download_staging.offset; | 628 | copy.dst_offset += download_staging.offset; |
| 623 | const std::array copies{copy}; | 629 | const std::array copies{copy}; |
| 624 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | 630 | Buffer& buffer = slot_buffers[buffer_id]; |
| 625 | false); | 631 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 632 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 626 | } | 633 | } |
| 627 | runtime.PostCopyBarrier(); | 634 | runtime.PostCopyBarrier(); |
| 628 | runtime.Finish(); | 635 | runtime.Finish(); |
| @@ -742,7 +749,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 742 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; | 749 | {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; |
| 743 | std::memcpy(upload_staging.mapped_span.data(), | 750 | std::memcpy(upload_staging.mapped_span.data(), |
| 744 | draw_state.inline_index_draw_indexes.data(), size); | 751 | draw_state.inline_index_draw_indexes.data(), size); |
| 745 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 752 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); |
| 746 | } else { | 753 | } else { |
| 747 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 748 | } | 755 | } |
| @@ -754,6 +761,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); | 761 | offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); |
| 755 | runtime.BindIndexBuffer(buffer, new_offset, size); | 762 | runtime.BindIndexBuffer(buffer, new_offset, size); |
| 756 | } else { | 763 | } else { |
| 764 | buffer.MarkUsage(offset, size); | ||
| 757 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, | 765 | runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, |
| 758 | draw_state.index_buffer.first, draw_state.index_buffer.count, | 766 | draw_state.index_buffer.first, draw_state.index_buffer.count, |
| 759 | buffer, offset, size); | 767 | buffer, offset, size); |
| @@ -790,6 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 790 | 798 | ||
| 791 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 792 | const u32 offset = buffer.Offset(binding.cpu_addr); | 800 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | ||
| 793 | 802 | ||
| 794 | host_bindings.buffers.push_back(&buffer); | 803 | host_bindings.buffers.push_back(&buffer); |
| 795 | host_bindings.offsets.push_back(offset); | 804 | host_bindings.offsets.push_back(offset); |
| @@ -895,6 +904,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 895 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 904 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 896 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 905 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 897 | } | 906 | } |
| 907 | buffer.MarkUsage(offset, size); | ||
| 898 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 908 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 899 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); | 909 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 900 | } else { | 910 | } else { |
| @@ -913,6 +923,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 913 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 914 | 924 | ||
| 915 | const u32 offset = buffer.Offset(binding.cpu_addr); | 925 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 926 | buffer.MarkUsage(offset, size); | ||
| 916 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 917 | 928 | ||
| 918 | if (is_written) { | 929 | if (is_written) { |
| @@ -943,6 +954,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 943 | 954 | ||
| 944 | const u32 offset = buffer.Offset(binding.cpu_addr); | 955 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 945 | const PixelFormat format = binding.format; | 956 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | ||
| 946 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 947 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { | 959 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { |
| 948 | runtime.BindImageBuffer(buffer, offset, size, format); | 960 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -975,9 +987,10 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 975 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); |
| 976 | 988 | ||
| 977 | const u32 offset = buffer.Offset(binding.cpu_addr); | 989 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 990 | buffer.MarkUsage(offset, size); | ||
| 978 | host_bindings.buffers.push_back(&buffer); | 991 | host_bindings.buffers.push_back(&buffer); |
| 979 | host_bindings.offsets.push_back(offset); | 992 | host_bindings.offsets.push_back(offset); |
| 980 | host_bindings.sizes.push_back(binding.size); | 993 | host_bindings.sizes.push_back(size); |
| 981 | } | 994 | } |
| 982 | if (host_bindings.buffers.size() > 0) { | 995 | if (host_bindings.buffers.size() > 0) { |
| 983 | runtime.BindTransformFeedbackBuffers(host_bindings); | 996 | runtime.BindTransformFeedbackBuffers(host_bindings); |
| @@ -1001,6 +1014,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1001 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1002 | 1015 | ||
| 1003 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1017 | buffer.MarkUsage(offset, size); | ||
| 1004 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1005 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| 1006 | ++binding_index; | 1020 | ++binding_index; |
| @@ -1021,6 +1035,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1021 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1022 | 1036 | ||
| 1023 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1038 | buffer.MarkUsage(offset, size); | ||
| 1024 | const bool is_written = | 1039 | const bool is_written = |
| 1025 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1026 | 1041 | ||
| @@ -1053,6 +1068,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1053 | 1068 | ||
| 1054 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1055 | const PixelFormat format = binding.format; | 1070 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | ||
| 1056 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 1057 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { | 1073 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { |
| 1058 | runtime.BindImageBuffer(buffer, offset, size, format); | 1074 | runtime.BindImageBuffer(buffer, offset, size, format); |
| @@ -1172,10 +1188,11 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1172 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1173 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1174 | } | 1190 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 1175 | channel_state->vertex_buffers[index] = Binding{ | 1192 | channel_state->vertex_buffers[index] = Binding{ |
| 1176 | .cpu_addr = *cpu_addr, | 1193 | .cpu_addr = *cpu_addr, |
| 1177 | .size = size, | 1194 | .size = size, |
| 1178 | .buffer_id = FindBuffer(*cpu_addr, size), | 1195 | .buffer_id = buffer_id, |
| 1179 | }; | 1196 | }; |
| 1180 | } | 1197 | } |
| 1181 | 1198 | ||
| @@ -1192,11 +1209,6 @@ void BufferCache<P>::UpdateDrawIndirect() { | |||
| 1192 | .size = static_cast<u32>(size), | 1209 | .size = static_cast<u32>(size), |
| 1193 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), | 1210 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), |
| 1194 | }; | 1211 | }; |
| 1195 | VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); | ||
| 1196 | VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); | ||
| 1197 | IntervalType interval{cpu_addr_start, cpu_addr_end}; | ||
| 1198 | ClearDownload(interval); | ||
| 1199 | common_ranges.subtract(interval); | ||
| 1200 | }; | 1212 | }; |
| 1201 | if (current_draw_indirect->include_count) { | 1213 | if (current_draw_indirect->include_count) { |
| 1202 | update(current_draw_indirect->count_start_address, sizeof(u32), | 1214 | update(current_draw_indirect->count_start_address, sizeof(u32), |
| @@ -1406,7 +1418,8 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1406 | .dst_offset = dst_base_offset, | 1418 | .dst_offset = dst_base_offset, |
| 1407 | .size = overlap.SizeBytes(), | 1419 | .size = overlap.SizeBytes(), |
| 1408 | }); | 1420 | }); |
| 1409 | runtime.CopyBuffer(new_buffer, overlap, copies); | 1421 | new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); |
| 1422 | runtime.CopyBuffer(new_buffer, overlap, copies, true); | ||
| 1410 | DeleteBuffer(overlap_id, true); | 1423 | DeleteBuffer(overlap_id, true); |
| 1411 | } | 1424 | } |
| 1412 | 1425 | ||
| @@ -1419,7 +1432,9 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1419 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1420 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1421 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1422 | runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); | 1435 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | ||
| 1437 | new_buffer.MarkUsage(0, size_bytes); | ||
| 1423 | for (const BufferId overlap_id : overlap.ids) { | 1438 | for (const BufferId overlap_id : overlap.ids) { |
| 1424 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1439 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1425 | } | 1440 | } |
| @@ -1472,11 +1487,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1472 | 1487 | ||
| 1473 | template <class P> | 1488 | template <class P> |
| 1474 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1475 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | ||
| 1476 | } | ||
| 1477 | |||
| 1478 | template <class P> | ||
| 1479 | bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1480 | boost::container::small_vector<BufferCopy, 4> copies; | 1490 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1481 | u64 total_size_bytes = 0; | 1491 | u64 total_size_bytes = 0; |
| 1482 | u64 largest_copy = 0; | 1492 | u64 largest_copy = 0; |
| @@ -1499,51 +1509,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1499 | } | 1509 | } |
| 1500 | 1510 | ||
| 1501 | template <class P> | 1511 | template <class P> |
| 1502 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1503 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1504 | u64 total_size_bytes = 0; | ||
| 1505 | u64 largest_copy = 0; | ||
| 1506 | IntervalSet found_sets{}; | ||
| 1507 | auto make_copies = [&] { | ||
| 1508 | for (auto& interval : found_sets) { | ||
| 1509 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1510 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1511 | copies.push_back(BufferCopy{ | ||
| 1512 | .src_offset = total_size_bytes, | ||
| 1513 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1514 | .size = sub_size, | ||
| 1515 | }); | ||
| 1516 | total_size_bytes += sub_size; | ||
| 1517 | largest_copy = std::max<u64>(largest_copy, sub_size); | ||
| 1518 | } | ||
| 1519 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1520 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1521 | }; | ||
| 1522 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1523 | const VAddr base_adr = cpu_addr_out; | ||
| 1524 | const VAddr end_adr = base_adr + range_size; | ||
| 1525 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1526 | found_sets.add(add_interval); | ||
| 1527 | }); | ||
| 1528 | if (found_sets.empty()) { | ||
| 1529 | return true; | ||
| 1530 | } | ||
| 1531 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1532 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1533 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1534 | if (it == common_ranges.end()) { | ||
| 1535 | make_copies(); | ||
| 1536 | return false; | ||
| 1537 | } | ||
| 1538 | while (it != it_end) { | ||
| 1539 | found_sets.subtract(*it); | ||
| 1540 | it++; | ||
| 1541 | } | ||
| 1542 | make_copies(); | ||
| 1543 | return false; | ||
| 1544 | } | ||
| 1545 | |||
| 1546 | template <class P> | ||
| 1547 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1512 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1548 | std::span<BufferCopy> copies) { | 1513 | std::span<BufferCopy> copies) { |
| 1549 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { | 1514 | if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { |
| @@ -1591,7 +1556,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1591 | // Apply the staging offset | 1556 | // Apply the staging offset |
| 1592 | copy.src_offset += upload_staging.offset; | 1557 | copy.src_offset += upload_staging.offset; |
| 1593 | } | 1558 | } |
| 1594 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1559 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1560 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1595 | } | 1561 | } |
| 1596 | } | 1562 | } |
| 1597 | 1563 | ||
| @@ -1633,7 +1599,8 @@ void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_ | |||
| 1633 | }}; | 1599 | }}; |
| 1634 | u8* const src_pointer = upload_staging.mapped_span.data(); | 1600 | u8* const src_pointer = upload_staging.mapped_span.data(); |
| 1635 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); | 1601 | std::memcpy(src_pointer, inlined_buffer.data(), copy_size); |
| 1636 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1602 | const bool can_reorder = runtime.CanReorderUpload(buffer, copies); |
| 1603 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); | ||
| 1637 | } else { | 1604 | } else { |
| 1638 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); | 1605 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); |
| 1639 | } | 1606 | } |
| @@ -1686,8 +1653,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1686 | for (BufferCopy& copy : copies) { | 1653 | for (BufferCopy& copy : copies) { |
| 1687 | // Modify copies to have the staging offset in mind | 1654 | // Modify copies to have the staging offset in mind |
| 1688 | copy.dst_offset += download_staging.offset; | 1655 | copy.dst_offset += download_staging.offset; |
| 1656 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 1689 | } | 1657 | } |
| 1690 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1691 | runtime.Finish(); | 1659 | runtime.Finish(); |
| 1692 | for (const BufferCopy& copy : copies) { | 1660 | for (const BufferCopy& copy : copies) { |
| 1693 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index eed267361..d6d696d8c 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -529,10 +529,6 @@ private: | |||
| 529 | 529 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 531 | 531 | ||
| 532 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 533 | |||
| 534 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 535 | |||
| 536 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 537 | std::span<BufferCopy> copies); | 533 | std::span<BufferCopy> copies); |
| 538 | 534 | ||
diff --git a/src/video_core/buffer_cache/usage_tracker.h b/src/video_core/buffer_cache/usage_tracker.h new file mode 100644 index 000000000..ab05fe415 --- /dev/null +++ b/src/video_core/buffer_cache/usage_tracker.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/alignment.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | class UsageTracker { | ||
| 12 | static constexpr size_t BYTES_PER_BIT_SHIFT = 6; | ||
| 13 | static constexpr size_t PAGE_SHIFT = 6 + BYTES_PER_BIT_SHIFT; | ||
| 14 | static constexpr size_t PAGE_BYTES = 1 << PAGE_SHIFT; | ||
| 15 | |||
| 16 | public: | ||
| 17 | explicit UsageTracker(size_t size) { | ||
| 18 | const size_t num_pages = (size >> PAGE_SHIFT) + 1; | ||
| 19 | pages.resize(num_pages, 0ULL); | ||
| 20 | } | ||
| 21 | |||
| 22 | void Reset() noexcept { | ||
| 23 | std::ranges::fill(pages, 0ULL); | ||
| 24 | } | ||
| 25 | |||
| 26 | void Track(u64 offset, u64 size) noexcept { | ||
| 27 | const size_t page = offset >> PAGE_SHIFT; | ||
| 28 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 29 | TrackPage(page, offset, size); | ||
| 30 | if (page == page_end) { | ||
| 31 | return; | ||
| 32 | } | ||
| 33 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 34 | pages[i] = ~u64{0}; | ||
| 35 | } | ||
| 36 | const size_t offset_end = offset + size; | ||
| 37 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 38 | TrackPage(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 39 | } | ||
| 40 | |||
| 41 | [[nodiscard]] bool IsUsed(u64 offset, u64 size) const noexcept { | ||
| 42 | const size_t page = offset >> PAGE_SHIFT; | ||
| 43 | const size_t page_end = (offset + size) >> PAGE_SHIFT; | ||
| 44 | if (IsPageUsed(page, offset, size)) { | ||
| 45 | return true; | ||
| 46 | } | ||
| 47 | for (size_t i = page + 1; i < page_end; i++) { | ||
| 48 | if (pages[i] != 0) { | ||
| 49 | return true; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | const size_t offset_end = offset + size; | ||
| 53 | const size_t offset_end_page_aligned = Common::AlignDown(offset_end, PAGE_BYTES); | ||
| 54 | return IsPageUsed(page_end, offset_end_page_aligned, offset_end - offset_end_page_aligned); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | void TrackPage(u64 page, u64 offset, u64 size) noexcept { | ||
| 59 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 60 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 61 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 62 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 63 | pages[page] |= (~u64{0} & mask) << first_bit; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool IsPageUsed(u64 page, u64 offset, u64 size) const noexcept { | ||
| 67 | const size_t offset_in_page = offset % PAGE_BYTES; | ||
| 68 | const size_t first_bit = offset_in_page >> BYTES_PER_BIT_SHIFT; | ||
| 69 | const size_t num_bits = std::min(size, PAGE_BYTES) >> BYTES_PER_BIT_SHIFT; | ||
| 70 | const size_t mask = ~u64{0} >> (64 - num_bits); | ||
| 71 | const size_t mask2 = (~u64{0} & mask) << first_bit; | ||
| 72 | return (pages[page] & mask2) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | std::vector<u64> pages; | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace VideoCommon | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 02e161270..91f10aec2 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -72,7 +72,7 @@ void Fermi2D::Blit() { | |||
| 72 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); | 72 | UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); |
| 73 | 73 | ||
| 74 | const auto& args = regs.pixels_from_memory; | 74 | const auto& args = regs.pixels_from_memory; |
| 75 | constexpr s64 null_derivate = 1ULL << 32; | 75 | constexpr s64 null_derivative = 1ULL << 32; |
| 76 | Surface src = regs.src; | 76 | Surface src = regs.src; |
| 77 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | 77 | const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); |
| 78 | const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && | 78 | const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && |
| @@ -89,7 +89,7 @@ void Fermi2D::Blit() { | |||
| 89 | .operation = regs.operation, | 89 | .operation = regs.operation, |
| 90 | .filter = args.sample_mode.filter, | 90 | .filter = args.sample_mode.filter, |
| 91 | .must_accelerate = | 91 | .must_accelerate = |
| 92 | args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, | 92 | args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu, |
| 93 | .dst_x0 = args.dst_x0, | 93 | .dst_x0 = args.dst_x0, |
| 94 | .dst_y0 = args.dst_y0, | 94 | .dst_y0 = args.dst_y0, |
| 95 | .dst_x1 = args.dst_x0 + args.dst_width, | 95 | .dst_x1 = args.dst_x0 + args.dst_width, |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 32d767d85..592c28ba3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -268,7 +268,7 @@ size_t Maxwell3D::EstimateIndexBufferSize() { | |||
| 268 | std::numeric_limits<u32>::max()}; | 268 | std::numeric_limits<u32>::max()}; |
| 269 | const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); | 269 | const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); |
| 270 | const size_t log2_byte_size = Common::Log2Ceil64(byte_size); | 270 | const size_t log2_byte_size = Common::Log2Ceil64(byte_size); |
| 271 | const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; | 271 | const size_t cap{GetMaxCurrentVertices() * 4 * byte_size}; |
| 272 | const size_t lower_cap = | 272 | const size_t lower_cap = |
| 273 | std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); | 273 | std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); |
| 274 | return std::min<size_t>( | 274 | return std::min<size_t>( |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c0e6471fe..805a89900 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -86,10 +86,7 @@ public: | |||
| 86 | uncommitted_operations.emplace_back(std::move(func)); | 86 | uncommitted_operations.emplace_back(std::move(func)); |
| 87 | } | 87 | } |
| 88 | pending_operations.emplace_back(std::move(uncommitted_operations)); | 88 | pending_operations.emplace_back(std::move(uncommitted_operations)); |
| 89 | { | 89 | QueueFence(new_fence); |
| 90 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 91 | QueueFence(new_fence); | ||
| 92 | } | ||
| 93 | if (!delay_fence) { | 90 | if (!delay_fence) { |
| 94 | func(); | 91 | func(); |
| 95 | } | 92 | } |
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index dbcf508e5..1030db681 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp | |||
| @@ -1,11 +1,7 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | ||
| 5 | #include <fstream> | ||
| 6 | #include <vector> | ||
| 7 | #include "common/assert.h" | 4 | #include "common/assert.h" |
| 8 | #include "common/scope_exit.h" | ||
| 9 | #include "common/settings.h" | 5 | #include "common/settings.h" |
| 10 | #include "video_core/host1x/codecs/codec.h" | 6 | #include "video_core/host1x/codecs/codec.h" |
| 11 | #include "video_core/host1x/codecs/h264.h" | 7 | #include "video_core/host1x/codecs/h264.h" |
| @@ -14,242 +10,17 @@ | |||
| 14 | #include "video_core/host1x/host1x.h" | 10 | #include "video_core/host1x/host1x.h" |
| 15 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 16 | 12 | ||
| 17 | extern "C" { | ||
| 18 | #include <libavfilter/buffersink.h> | ||
| 19 | #include <libavfilter/buffersrc.h> | ||
| 20 | #include <libavutil/opt.h> | ||
| 21 | #ifdef LIBVA_FOUND | ||
| 22 | // for querying VAAPI driver information | ||
| 23 | #include <libavutil/hwcontext_vaapi.h> | ||
| 24 | #endif | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra { | 13 | namespace Tegra { |
| 28 | namespace { | ||
| 29 | constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; | ||
| 30 | constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; | ||
| 31 | constexpr std::array PREFERRED_GPU_DECODERS = { | ||
| 32 | AV_HWDEVICE_TYPE_CUDA, | ||
| 33 | #ifdef _WIN32 | ||
| 34 | AV_HWDEVICE_TYPE_D3D11VA, | ||
| 35 | AV_HWDEVICE_TYPE_DXVA2, | ||
| 36 | #elif defined(__unix__) | ||
| 37 | AV_HWDEVICE_TYPE_VAAPI, | ||
| 38 | AV_HWDEVICE_TYPE_VDPAU, | ||
| 39 | #endif | ||
| 40 | // last resort for Linux Flatpak (w/ NVIDIA) | ||
| 41 | AV_HWDEVICE_TYPE_VULKAN, | ||
| 42 | }; | ||
| 43 | |||
| 44 | void AVPacketDeleter(AVPacket* ptr) { | ||
| 45 | av_packet_free(&ptr); | ||
| 46 | } | ||
| 47 | |||
| 48 | using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; | ||
| 49 | |||
| 50 | AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { | ||
| 51 | for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { | ||
| 52 | if (*p == av_codec_ctx->pix_fmt) { | ||
| 53 | return av_codec_ctx->pix_fmt; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); | ||
| 57 | av_buffer_unref(&av_codec_ctx->hw_device_ctx); | ||
| 58 | av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; | ||
| 59 | return PREFERRED_CPU_FMT; | ||
| 60 | } | ||
| 61 | |||
| 62 | // List all the currently available hwcontext in ffmpeg | ||
| 63 | std::vector<AVHWDeviceType> ListSupportedContexts() { | ||
| 64 | std::vector<AVHWDeviceType> contexts{}; | ||
| 65 | AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; | ||
| 66 | do { | ||
| 67 | current_device_type = av_hwdevice_iterate_types(current_device_type); | ||
| 68 | contexts.push_back(current_device_type); | ||
| 69 | } while (current_device_type != AV_HWDEVICE_TYPE_NONE); | ||
| 70 | return contexts; | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace | ||
| 74 | |||
| 75 | void AVFrameDeleter(AVFrame* ptr) { | ||
| 76 | av_frame_free(&ptr); | ||
| 77 | } | ||
| 78 | 14 | ||
| 79 | Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) | 15 | Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs) |
| 80 | : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)), | 16 | : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)), |
| 81 | vp8_decoder(std::make_unique<Decoder::VP8>(host1x)), | 17 | vp8_decoder(std::make_unique<Decoder::VP8>(host1x)), |
| 82 | vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {} | 18 | vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {} |
| 83 | 19 | ||
| 84 | Codec::~Codec() { | 20 | Codec::~Codec() = default; |
| 85 | if (!initialized) { | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | // Free libav memory | ||
| 89 | avcodec_free_context(&av_codec_ctx); | ||
| 90 | av_buffer_unref(&av_gpu_decoder); | ||
| 91 | |||
| 92 | if (filters_initialized) { | ||
| 93 | avfilter_graph_free(&av_filter_graph); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | bool Codec::CreateGpuAvDevice() { | ||
| 98 | static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; | ||
| 99 | static const auto supported_contexts = ListSupportedContexts(); | ||
| 100 | for (const auto& type : PREFERRED_GPU_DECODERS) { | ||
| 101 | if (std::none_of(supported_contexts.begin(), supported_contexts.end(), | ||
| 102 | [&type](const auto& context) { return context == type; })) { | ||
| 103 | LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); | ||
| 104 | continue; | ||
| 105 | } | ||
| 106 | // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create | ||
| 107 | av_buffer_unref(&av_gpu_decoder); | ||
| 108 | const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); | ||
| 109 | if (hwdevice_res < 0) { | ||
| 110 | LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", | ||
| 111 | av_hwdevice_get_type_name(type), hwdevice_res); | ||
| 112 | continue; | ||
| 113 | } | ||
| 114 | #ifdef LIBVA_FOUND | ||
| 115 | if (type == AV_HWDEVICE_TYPE_VAAPI) { | ||
| 116 | // we need to determine if this is an impersonated VAAPI driver | ||
| 117 | AVHWDeviceContext* hwctx = | ||
| 118 | static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data)); | ||
| 119 | AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); | ||
| 120 | const char* vendor_name = vaQueryVendorString(vactx->display); | ||
| 121 | if (strstr(vendor_name, "VDPAU backend")) { | ||
| 122 | // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them | ||
| 123 | LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver"); | ||
| 124 | continue; | ||
| 125 | } else { | ||
| 126 | // according to some user testing, certain vaapi driver (Intel?) could be buggy | ||
| 127 | // so let's log the driver name which may help the developers/supporters | ||
| 128 | LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | #endif | ||
| 132 | for (int i = 0;; i++) { | ||
| 133 | const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); | ||
| 134 | if (!config) { | ||
| 135 | LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", | ||
| 136 | av_codec->name, av_hwdevice_get_type_name(type)); | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) { | ||
| 140 | LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); | ||
| 141 | av_codec_ctx->pix_fmt = config->pix_fmt; | ||
| 142 | return true; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | } | ||
| 146 | return false; | ||
| 147 | } | ||
| 148 | |||
| 149 | void Codec::InitializeAvCodecContext() { | ||
| 150 | av_codec_ctx = avcodec_alloc_context3(av_codec); | ||
| 151 | av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); | ||
| 152 | av_codec_ctx->thread_count = 0; | ||
| 153 | av_codec_ctx->thread_type &= ~FF_THREAD_FRAME; | ||
| 154 | } | ||
| 155 | |||
| 156 | void Codec::InitializeGpuDecoder() { | ||
| 157 | if (!CreateGpuAvDevice()) { | ||
| 158 | av_buffer_unref(&av_gpu_decoder); | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); | ||
| 162 | ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); | ||
| 163 | av_codec_ctx->hw_device_ctx = hw_device_ctx; | ||
| 164 | av_codec_ctx->get_format = GetGpuFormat; | ||
| 165 | } | ||
| 166 | |||
| 167 | void Codec::InitializeAvFilters(AVFrame* frame) { | ||
| 168 | const AVFilter* buffer_src = avfilter_get_by_name("buffer"); | ||
| 169 | const AVFilter* buffer_sink = avfilter_get_by_name("buffersink"); | ||
| 170 | AVFilterInOut* inputs = avfilter_inout_alloc(); | ||
| 171 | AVFilterInOut* outputs = avfilter_inout_alloc(); | ||
| 172 | SCOPE_EXIT({ | ||
| 173 | avfilter_inout_free(&inputs); | ||
| 174 | avfilter_inout_free(&outputs); | ||
| 175 | }); | ||
| 176 | |||
| 177 | // Don't know how to get the accurate time_base but it doesn't matter for yadif filter | ||
| 178 | // so just use 1/1 to make buffer filter happy | ||
| 179 | std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width, | ||
| 180 | frame->height, frame->format); | ||
| 181 | |||
| 182 | av_filter_graph = avfilter_graph_alloc(); | ||
| 183 | int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(), | ||
| 184 | nullptr, av_filter_graph); | ||
| 185 | if (ret < 0) { | ||
| 186 | LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret); | ||
| 187 | return; | ||
| 188 | } | ||
| 189 | |||
| 190 | ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr, | ||
| 191 | av_filter_graph); | ||
| 192 | if (ret < 0) { | ||
| 193 | LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret); | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | |||
| 197 | inputs->name = av_strdup("out"); | ||
| 198 | inputs->filter_ctx = av_filter_sink_ctx; | ||
| 199 | inputs->pad_idx = 0; | ||
| 200 | inputs->next = nullptr; | ||
| 201 | |||
| 202 | outputs->name = av_strdup("in"); | ||
| 203 | outputs->filter_ctx = av_filter_src_ctx; | ||
| 204 | outputs->pad_idx = 0; | ||
| 205 | outputs->next = nullptr; | ||
| 206 | |||
| 207 | const char* description = "yadif=1:-1:0"; | ||
| 208 | ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr); | ||
| 209 | if (ret < 0) { | ||
| 210 | LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret); | ||
| 211 | return; | ||
| 212 | } | ||
| 213 | |||
| 214 | ret = avfilter_graph_config(av_filter_graph, nullptr); | ||
| 215 | if (ret < 0) { | ||
| 216 | LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret); | ||
| 217 | return; | ||
| 218 | } | ||
| 219 | |||
| 220 | filters_initialized = true; | ||
| 221 | } | ||
| 222 | 21 | ||
| 223 | void Codec::Initialize() { | 22 | void Codec::Initialize() { |
| 224 | const AVCodecID codec = [&] { | 23 | initialized = decode_api.Initialize(current_codec); |
| 225 | switch (current_codec) { | ||
| 226 | case Host1x::NvdecCommon::VideoCodec::H264: | ||
| 227 | return AV_CODEC_ID_H264; | ||
| 228 | case Host1x::NvdecCommon::VideoCodec::VP8: | ||
| 229 | return AV_CODEC_ID_VP8; | ||
| 230 | case Host1x::NvdecCommon::VideoCodec::VP9: | ||
| 231 | return AV_CODEC_ID_VP9; | ||
| 232 | default: | ||
| 233 | UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); | ||
| 234 | return AV_CODEC_ID_NONE; | ||
| 235 | } | ||
| 236 | }(); | ||
| 237 | av_codec = avcodec_find_decoder(codec); | ||
| 238 | |||
| 239 | InitializeAvCodecContext(); | ||
| 240 | if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) { | ||
| 241 | InitializeGpuDecoder(); | ||
| 242 | } | ||
| 243 | if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { | ||
| 244 | LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); | ||
| 245 | avcodec_free_context(&av_codec_ctx); | ||
| 246 | av_buffer_unref(&av_gpu_decoder); | ||
| 247 | return; | ||
| 248 | } | ||
| 249 | if (!av_codec_ctx->hw_device_ctx) { | ||
| 250 | LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); | ||
| 251 | } | ||
| 252 | initialized = true; | ||
| 253 | } | 24 | } |
| 254 | 25 | ||
| 255 | void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { | 26 | void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { |
| @@ -264,14 +35,18 @@ void Codec::Decode() { | |||
| 264 | if (is_first_frame) { | 35 | if (is_first_frame) { |
| 265 | Initialize(); | 36 | Initialize(); |
| 266 | } | 37 | } |
| 38 | |||
| 267 | if (!initialized) { | 39 | if (!initialized) { |
| 268 | return; | 40 | return; |
| 269 | } | 41 | } |
| 42 | |||
| 43 | // Assemble bitstream. | ||
| 270 | bool vp9_hidden_frame = false; | 44 | bool vp9_hidden_frame = false; |
| 271 | const auto& frame_data = [&]() { | 45 | size_t configuration_size = 0; |
| 46 | const auto packet_data = [&]() { | ||
| 272 | switch (current_codec) { | 47 | switch (current_codec) { |
| 273 | case Tegra::Host1x::NvdecCommon::VideoCodec::H264: | 48 | case Tegra::Host1x::NvdecCommon::VideoCodec::H264: |
| 274 | return h264_decoder->ComposeFrame(state, is_first_frame); | 49 | return h264_decoder->ComposeFrame(state, &configuration_size, is_first_frame); |
| 275 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: | 50 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: |
| 276 | return vp8_decoder->ComposeFrame(state); | 51 | return vp8_decoder->ComposeFrame(state); |
| 277 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: | 52 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: |
| @@ -283,89 +58,35 @@ void Codec::Decode() { | |||
| 283 | return std::span<const u8>{}; | 58 | return std::span<const u8>{}; |
| 284 | } | 59 | } |
| 285 | }(); | 60 | }(); |
| 286 | AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; | 61 | |
| 287 | if (!packet) { | 62 | // Send assembled bitstream to decoder. |
| 288 | LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); | 63 | if (!decode_api.SendPacket(packet_data, configuration_size)) { |
| 289 | return; | ||
| 290 | } | ||
| 291 | packet->data = const_cast<u8*>(frame_data.data()); | ||
| 292 | packet->size = static_cast<s32>(frame_data.size()); | ||
| 293 | if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { | ||
| 294 | LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); | ||
| 295 | return; | 64 | return; |
| 296 | } | 65 | } |
| 297 | // Only receive/store visible frames | 66 | |
| 67 | // Only receive/store visible frames. | ||
| 298 | if (vp9_hidden_frame) { | 68 | if (vp9_hidden_frame) { |
| 299 | return; | 69 | return; |
| 300 | } | 70 | } |
| 301 | AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; | ||
| 302 | AVFramePtr final_frame{nullptr, AVFrameDeleter}; | ||
| 303 | ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); | ||
| 304 | if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { | ||
| 305 | LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); | ||
| 306 | return; | ||
| 307 | } | ||
| 308 | if (initial_frame->width == 0 || initial_frame->height == 0) { | ||
| 309 | LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); | ||
| 310 | return; | ||
| 311 | } | ||
| 312 | bool is_interlaced = initial_frame->interlaced_frame != 0; | ||
| 313 | if (av_codec_ctx->hw_device_ctx) { | ||
| 314 | final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; | ||
| 315 | ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); | ||
| 316 | // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp | ||
| 317 | // because Intel drivers crash unless using AV_PIX_FMT_NV12 | ||
| 318 | final_frame->format = PREFERRED_GPU_FMT; | ||
| 319 | const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); | ||
| 320 | ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); | ||
| 321 | } else { | ||
| 322 | final_frame = std::move(initial_frame); | ||
| 323 | } | ||
| 324 | if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { | ||
| 325 | UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); | ||
| 326 | return; | ||
| 327 | } | ||
| 328 | if (!is_interlaced) { | ||
| 329 | av_frames.push(std::move(final_frame)); | ||
| 330 | } else { | ||
| 331 | if (!filters_initialized) { | ||
| 332 | InitializeAvFilters(final_frame.get()); | ||
| 333 | } | ||
| 334 | if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(), | ||
| 335 | AV_BUFFERSRC_FLAG_KEEP_REF); | ||
| 336 | ret) { | ||
| 337 | LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret); | ||
| 338 | return; | ||
| 339 | } | ||
| 340 | while (true) { | ||
| 341 | auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; | ||
| 342 | 71 | ||
| 343 | int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); | 72 | // Receive output frames from decoder. |
| 73 | decode_api.ReceiveFrames(frames); | ||
| 344 | 74 | ||
| 345 | if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) | 75 | while (frames.size() > 10) { |
| 346 | break; | 76 | LOG_DEBUG(HW_GPU, "ReceiveFrames overflow, dropped frame"); |
| 347 | if (ret < 0) { | 77 | frames.pop(); |
| 348 | LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret); | ||
| 349 | return; | ||
| 350 | } | ||
| 351 | |||
| 352 | av_frames.push(std::move(filter_frame)); | ||
| 353 | } | ||
| 354 | } | ||
| 355 | while (av_frames.size() > 10) { | ||
| 356 | LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); | ||
| 357 | av_frames.pop(); | ||
| 358 | } | 78 | } |
| 359 | } | 79 | } |
| 360 | 80 | ||
| 361 | AVFramePtr Codec::GetCurrentFrame() { | 81 | std::unique_ptr<FFmpeg::Frame> Codec::GetCurrentFrame() { |
| 362 | // Sometimes VIC will request more frames than have been decoded. | 82 | // Sometimes VIC will request more frames than have been decoded. |
| 363 | // in this case, return a nullptr and don't overwrite previous frame data | 83 | // in this case, return a blank frame and don't overwrite previous data. |
| 364 | if (av_frames.empty()) { | 84 | if (frames.empty()) { |
| 365 | return AVFramePtr{nullptr, AVFrameDeleter}; | 85 | return {}; |
| 366 | } | 86 | } |
| 367 | AVFramePtr frame = std::move(av_frames.front()); | 87 | |
| 368 | av_frames.pop(); | 88 | auto frame = std::move(frames.front()); |
| 89 | frames.pop(); | ||
| 369 | return frame; | 90 | return frame; |
| 370 | } | 91 | } |
| 371 | 92 | ||
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 06fe00a4b..f700ae129 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h | |||
| @@ -4,28 +4,15 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <optional> | ||
| 7 | #include <string_view> | 8 | #include <string_view> |
| 8 | #include <queue> | 9 | #include <queue> |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/host1x/ffmpeg/ffmpeg.h" | ||
| 10 | #include "video_core/host1x/nvdec_common.h" | 12 | #include "video_core/host1x/nvdec_common.h" |
| 11 | 13 | ||
| 12 | extern "C" { | ||
| 13 | #if defined(__GNUC__) || defined(__clang__) | ||
| 14 | #pragma GCC diagnostic push | ||
| 15 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 16 | #endif | ||
| 17 | #include <libavcodec/avcodec.h> | ||
| 18 | #include <libavfilter/avfilter.h> | ||
| 19 | #if defined(__GNUC__) || defined(__clang__) | ||
| 20 | #pragma GCC diagnostic pop | ||
| 21 | #endif | ||
| 22 | } | ||
| 23 | |||
| 24 | namespace Tegra { | 14 | namespace Tegra { |
| 25 | 15 | ||
| 26 | void AVFrameDeleter(AVFrame* ptr); | ||
| 27 | using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>; | ||
| 28 | |||
| 29 | namespace Decoder { | 16 | namespace Decoder { |
| 30 | class H264; | 17 | class H264; |
| 31 | class VP8; | 18 | class VP8; |
| @@ -51,7 +38,7 @@ public: | |||
| 51 | void Decode(); | 38 | void Decode(); |
| 52 | 39 | ||
| 53 | /// Returns next decoded frame | 40 | /// Returns next decoded frame |
| 54 | [[nodiscard]] AVFramePtr GetCurrentFrame(); | 41 | [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetCurrentFrame(); |
| 55 | 42 | ||
| 56 | /// Returns the value of current_codec | 43 | /// Returns the value of current_codec |
| 57 | [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; | 44 | [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; |
| @@ -60,25 +47,9 @@ public: | |||
| 60 | [[nodiscard]] std::string_view GetCurrentCodecName() const; | 47 | [[nodiscard]] std::string_view GetCurrentCodecName() const; |
| 61 | 48 | ||
| 62 | private: | 49 | private: |
| 63 | void InitializeAvCodecContext(); | ||
| 64 | |||
| 65 | void InitializeAvFilters(AVFrame* frame); | ||
| 66 | |||
| 67 | void InitializeGpuDecoder(); | ||
| 68 | |||
| 69 | bool CreateGpuAvDevice(); | ||
| 70 | |||
| 71 | bool initialized{}; | 50 | bool initialized{}; |
| 72 | bool filters_initialized{}; | ||
| 73 | Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; | 51 | Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; |
| 74 | 52 | FFmpeg::DecodeApi decode_api; | |
| 75 | const AVCodec* av_codec{nullptr}; | ||
| 76 | AVCodecContext* av_codec_ctx{nullptr}; | ||
| 77 | AVBufferRef* av_gpu_decoder{nullptr}; | ||
| 78 | |||
| 79 | AVFilterContext* av_filter_src_ctx{nullptr}; | ||
| 80 | AVFilterContext* av_filter_sink_ctx{nullptr}; | ||
| 81 | AVFilterGraph* av_filter_graph{nullptr}; | ||
| 82 | 53 | ||
| 83 | Host1x::Host1x& host1x; | 54 | Host1x::Host1x& host1x; |
| 84 | const Host1x::NvdecCommon::NvdecRegisters& state; | 55 | const Host1x::NvdecCommon::NvdecRegisters& state; |
| @@ -86,7 +57,7 @@ private: | |||
| 86 | std::unique_ptr<Decoder::VP8> vp8_decoder; | 57 | std::unique_ptr<Decoder::VP8> vp8_decoder; |
| 87 | std::unique_ptr<Decoder::VP9> vp9_decoder; | 58 | std::unique_ptr<Decoder::VP9> vp9_decoder; |
| 88 | 59 | ||
| 89 | std::queue<AVFramePtr> av_frames{}; | 60 | std::queue<std::unique_ptr<FFmpeg::Frame>> frames{}; |
| 90 | }; | 61 | }; |
| 91 | 62 | ||
| 92 | } // namespace Tegra | 63 | } // namespace Tegra |
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index ece79b1e2..309a7f1d5 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -30,7 +30,7 @@ H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {} | |||
| 30 | H264::~H264() = default; | 30 | H264::~H264() = default; |
| 31 | 31 | ||
| 32 | std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, | 32 | std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, |
| 33 | bool is_first_frame) { | 33 | size_t* out_configuration_size, bool is_first_frame) { |
| 34 | H264DecoderContext context; | 34 | H264DecoderContext context; |
| 35 | host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, | 35 | host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, |
| 36 | sizeof(H264DecoderContext)); | 36 | sizeof(H264DecoderContext)); |
| @@ -39,6 +39,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters | |||
| 39 | if (!is_first_frame && frame_number != 0) { | 39 | if (!is_first_frame && frame_number != 0) { |
| 40 | frame.resize_destructive(context.stream_len); | 40 | frame.resize_destructive(context.stream_len); |
| 41 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | 41 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); |
| 42 | *out_configuration_size = 0; | ||
| 42 | return frame; | 43 | return frame; |
| 43 | } | 44 | } |
| 44 | 45 | ||
| @@ -157,6 +158,7 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters | |||
| 157 | frame.resize(encoded_header.size() + context.stream_len); | 158 | frame.resize(encoded_header.size() + context.stream_len); |
| 158 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | 159 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); |
| 159 | 160 | ||
| 161 | *out_configuration_size = encoded_header.size(); | ||
| 160 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, | 162 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, |
| 161 | frame.data() + encoded_header.size(), context.stream_len); | 163 | frame.data() + encoded_header.size(), context.stream_len); |
| 162 | 164 | ||
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index d6b556322..1deaf4632 100644 --- a/src/video_core/host1x/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | 67 | ||
| 68 | /// Compose the H264 frame for FFmpeg decoding | 68 | /// Compose the H264 frame for FFmpeg decoding |
| 69 | [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, | 69 | [[nodiscard]] std::span<const u8> ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, |
| 70 | size_t* out_configuration_size, | ||
| 70 | bool is_first_frame = false); | 71 | bool is_first_frame = false); |
| 71 | 72 | ||
| 72 | private: | 73 | private: |
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp new file mode 100644 index 000000000..dcd07e6d2 --- /dev/null +++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp | |||
| @@ -0,0 +1,419 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/assert.h" | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "common/scope_exit.h" | ||
| 7 | #include "common/settings.h" | ||
| 8 | #include "video_core/host1x/ffmpeg/ffmpeg.h" | ||
| 9 | |||
| 10 | extern "C" { | ||
| 11 | #ifdef LIBVA_FOUND | ||
| 12 | // for querying VAAPI driver information | ||
| 13 | #include <libavutil/hwcontext_vaapi.h> | ||
| 14 | #endif | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace FFmpeg { | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12; | ||
| 22 | constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P; | ||
| 23 | constexpr std::array PreferredGpuDecoders = { | ||
| 24 | AV_HWDEVICE_TYPE_CUDA, | ||
| 25 | #ifdef _WIN32 | ||
| 26 | AV_HWDEVICE_TYPE_D3D11VA, | ||
| 27 | AV_HWDEVICE_TYPE_DXVA2, | ||
| 28 | #elif defined(__unix__) | ||
| 29 | AV_HWDEVICE_TYPE_VAAPI, | ||
| 30 | AV_HWDEVICE_TYPE_VDPAU, | ||
| 31 | #endif | ||
| 32 | // last resort for Linux Flatpak (w/ NVIDIA) | ||
| 33 | AV_HWDEVICE_TYPE_VULKAN, | ||
| 34 | }; | ||
| 35 | |||
| 36 | AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) { | ||
| 37 | for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { | ||
| 38 | if (*p == codec_context->pix_fmt) { | ||
| 39 | return codec_context->pix_fmt; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU"); | ||
| 44 | av_buffer_unref(&codec_context->hw_device_ctx); | ||
| 45 | |||
| 46 | codec_context->pix_fmt = PreferredCpuFormat; | ||
| 47 | return codec_context->pix_fmt; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::string AVError(int errnum) { | ||
| 51 | char errbuf[AV_ERROR_MAX_STRING_SIZE] = {}; | ||
| 52 | av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum); | ||
| 53 | return errbuf; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace | ||
| 57 | |||
| 58 | Packet::Packet(std::span<const u8> data) { | ||
| 59 | m_packet = av_packet_alloc(); | ||
| 60 | m_packet->data = const_cast<u8*>(data.data()); | ||
| 61 | m_packet->size = static_cast<s32>(data.size()); | ||
| 62 | } | ||
| 63 | |||
| 64 | Packet::~Packet() { | ||
| 65 | av_packet_free(&m_packet); | ||
| 66 | } | ||
| 67 | |||
| 68 | Frame::Frame() { | ||
| 69 | m_frame = av_frame_alloc(); | ||
| 70 | } | ||
| 71 | |||
| 72 | Frame::~Frame() { | ||
| 73 | av_frame_free(&m_frame); | ||
| 74 | } | ||
| 75 | |||
| 76 | Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) { | ||
| 77 | const AVCodecID av_codec = [&] { | ||
| 78 | switch (codec) { | ||
| 79 | case Tegra::Host1x::NvdecCommon::VideoCodec::H264: | ||
| 80 | return AV_CODEC_ID_H264; | ||
| 81 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: | ||
| 82 | return AV_CODEC_ID_VP8; | ||
| 83 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: | ||
| 84 | return AV_CODEC_ID_VP9; | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unknown codec {}", codec); | ||
| 87 | return AV_CODEC_ID_NONE; | ||
| 88 | } | ||
| 89 | }(); | ||
| 90 | |||
| 91 | m_codec = avcodec_find_decoder(av_codec); | ||
| 92 | } | ||
| 93 | |||
| 94 | bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const { | ||
| 95 | for (int i = 0;; i++) { | ||
| 96 | const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i); | ||
| 97 | if (!config) { | ||
| 98 | LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, | ||
| 99 | av_hwdevice_get_type_name(type)); | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 && | ||
| 103 | config->device_type == type) { | ||
| 104 | LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); | ||
| 105 | *out_pix_fmt = config->pix_fmt; | ||
| 106 | return true; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | return false; | ||
| 111 | } | ||
| 112 | |||
| 113 | std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() { | ||
| 114 | std::vector<AVHWDeviceType> types; | ||
| 115 | AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; | ||
| 116 | |||
| 117 | while (true) { | ||
| 118 | current_device_type = av_hwdevice_iterate_types(current_device_type); | ||
| 119 | if (current_device_type == AV_HWDEVICE_TYPE_NONE) { | ||
| 120 | return types; | ||
| 121 | } | ||
| 122 | |||
| 123 | types.push_back(current_device_type); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | HardwareContext::~HardwareContext() { | ||
| 128 | av_buffer_unref(&m_gpu_decoder); | ||
| 129 | } | ||
| 130 | |||
| 131 | bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, | ||
| 132 | const Decoder& decoder) { | ||
| 133 | const auto supported_types = GetSupportedDeviceTypes(); | ||
| 134 | for (const auto type : PreferredGpuDecoders) { | ||
| 135 | AVPixelFormat hw_pix_fmt; | ||
| 136 | |||
| 137 | if (std::ranges::find(supported_types, type) == supported_types.end()) { | ||
| 138 | LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type)); | ||
| 139 | continue; | ||
| 140 | } | ||
| 141 | |||
| 142 | if (!this->InitializeWithType(type)) { | ||
| 143 | continue; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) { | ||
| 147 | decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt); | ||
| 148 | return true; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | return false; | ||
| 153 | } | ||
| 154 | |||
| 155 | bool HardwareContext::InitializeWithType(AVHWDeviceType type) { | ||
| 156 | av_buffer_unref(&m_gpu_decoder); | ||
| 157 | |||
| 158 | if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); | ||
| 159 | ret < 0) { | ||
| 160 | LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), | ||
| 161 | AVError(ret)); | ||
| 162 | return false; | ||
| 163 | } | ||
| 164 | |||
| 165 | #ifdef LIBVA_FOUND | ||
| 166 | if (type == AV_HWDEVICE_TYPE_VAAPI) { | ||
| 167 | // We need to determine if this is an impersonated VAAPI driver. | ||
| 168 | auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data); | ||
| 169 | auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx); | ||
| 170 | const char* vendor_name = vaQueryVendorString(vactx->display); | ||
| 171 | if (strstr(vendor_name, "VDPAU backend")) { | ||
| 172 | // VDPAU impersonated VAAPI impls are super buggy, we need to skip them. | ||
| 173 | LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver"); | ||
| 174 | return false; | ||
| 175 | } else { | ||
| 176 | // According to some user testing, certain VAAPI drivers (Intel?) could be buggy. | ||
| 177 | // Log the driver name just in case. | ||
| 178 | LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | #endif | ||
| 182 | |||
| 183 | return true; | ||
| 184 | } | ||
| 185 | |||
| 186 | DecoderContext::DecoderContext(const Decoder& decoder) { | ||
| 187 | m_codec_context = avcodec_alloc_context3(decoder.GetCodec()); | ||
| 188 | av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0); | ||
| 189 | m_codec_context->thread_count = 0; | ||
| 190 | m_codec_context->thread_type &= ~FF_THREAD_FRAME; | ||
| 191 | } | ||
| 192 | |||
| 193 | DecoderContext::~DecoderContext() { | ||
| 194 | av_buffer_unref(&m_codec_context->hw_device_ctx); | ||
| 195 | avcodec_free_context(&m_codec_context); | ||
| 196 | } | ||
| 197 | |||
| 198 | void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, | ||
| 199 | AVPixelFormat hw_pix_fmt) { | ||
| 200 | m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef()); | ||
| 201 | m_codec_context->get_format = GetGpuFormat; | ||
| 202 | m_codec_context->pix_fmt = hw_pix_fmt; | ||
| 203 | } | ||
| 204 | |||
| 205 | bool DecoderContext::OpenContext(const Decoder& decoder) { | ||
| 206 | if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) { | ||
| 207 | LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret)); | ||
| 208 | return false; | ||
| 209 | } | ||
| 210 | |||
| 211 | if (!m_codec_context->hw_device_ctx) { | ||
| 212 | LOG_INFO(HW_GPU, "Using FFmpeg software decoding"); | ||
| 213 | } | ||
| 214 | |||
| 215 | return true; | ||
| 216 | } | ||
| 217 | |||
| 218 | bool DecoderContext::SendPacket(const Packet& packet) { | ||
| 219 | if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) { | ||
| 220 | LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); | ||
| 221 | return false; | ||
| 222 | } | ||
| 223 | |||
| 224 | return true; | ||
| 225 | } | ||
| 226 | |||
| 227 | std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) { | ||
| 228 | auto dst_frame = std::make_unique<Frame>(); | ||
| 229 | |||
| 230 | const auto ReceiveImpl = [&](AVFrame* frame) { | ||
| 231 | if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) { | ||
| 232 | LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); | ||
| 233 | return false; | ||
| 234 | } | ||
| 235 | |||
| 236 | *out_is_interlaced = frame->interlaced_frame != 0; | ||
| 237 | return true; | ||
| 238 | }; | ||
| 239 | |||
| 240 | if (m_codec_context->hw_device_ctx) { | ||
| 241 | // If we have a hardware context, make a separate frame here to receive the | ||
| 242 | // hardware result before sending it to the output. | ||
| 243 | Frame intermediate_frame; | ||
| 244 | |||
| 245 | if (!ReceiveImpl(intermediate_frame.GetFrame())) { | ||
| 246 | return {}; | ||
| 247 | } | ||
| 248 | |||
| 249 | dst_frame->SetFormat(PreferredGpuFormat); | ||
| 250 | if (const int ret = | ||
| 251 | av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0); | ||
| 252 | ret < 0) { | ||
| 253 | LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); | ||
| 254 | return {}; | ||
| 255 | } | ||
| 256 | } else { | ||
| 257 | // Otherwise, decode the frame as normal. | ||
| 258 | if (!ReceiveImpl(dst_frame->GetFrame())) { | ||
| 259 | return {}; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | return dst_frame; | ||
| 264 | } | ||
| 265 | |||
| 266 | DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) { | ||
| 267 | const AVFilter* buffer_src = avfilter_get_by_name("buffer"); | ||
| 268 | const AVFilter* buffer_sink = avfilter_get_by_name("buffersink"); | ||
| 269 | AVFilterInOut* inputs = avfilter_inout_alloc(); | ||
| 270 | AVFilterInOut* outputs = avfilter_inout_alloc(); | ||
| 271 | SCOPE_EXIT({ | ||
| 272 | avfilter_inout_free(&inputs); | ||
| 273 | avfilter_inout_free(&outputs); | ||
| 274 | }); | ||
| 275 | |||
| 276 | // Don't know how to get the accurate time_base but it doesn't matter for yadif filter | ||
| 277 | // so just use 1/1 to make buffer filter happy | ||
| 278 | std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(), | ||
| 279 | frame.GetHeight(), static_cast<int>(frame.GetPixelFormat())); | ||
| 280 | |||
| 281 | m_filter_graph = avfilter_graph_alloc(); | ||
| 282 | int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(), | ||
| 283 | nullptr, m_filter_graph); | ||
| 284 | if (ret < 0) { | ||
| 285 | LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret)); | ||
| 286 | return; | ||
| 287 | } | ||
| 288 | |||
| 289 | ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr, | ||
| 290 | m_filter_graph); | ||
| 291 | if (ret < 0) { | ||
| 292 | LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret)); | ||
| 293 | return; | ||
| 294 | } | ||
| 295 | |||
| 296 | inputs->name = av_strdup("out"); | ||
| 297 | inputs->filter_ctx = m_sink_context; | ||
| 298 | inputs->pad_idx = 0; | ||
| 299 | inputs->next = nullptr; | ||
| 300 | |||
| 301 | outputs->name = av_strdup("in"); | ||
| 302 | outputs->filter_ctx = m_source_context; | ||
| 303 | outputs->pad_idx = 0; | ||
| 304 | outputs->next = nullptr; | ||
| 305 | |||
| 306 | const char* description = "yadif=1:-1:0"; | ||
| 307 | ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr); | ||
| 308 | if (ret < 0) { | ||
| 309 | LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret)); | ||
| 310 | return; | ||
| 311 | } | ||
| 312 | |||
| 313 | ret = avfilter_graph_config(m_filter_graph, nullptr); | ||
| 314 | if (ret < 0) { | ||
| 315 | LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret)); | ||
| 316 | return; | ||
| 317 | } | ||
| 318 | |||
| 319 | m_initialized = true; | ||
| 320 | } | ||
| 321 | |||
| 322 | bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) { | ||
| 323 | if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(), | ||
| 324 | AV_BUFFERSRC_FLAG_KEEP_REF); | ||
| 325 | ret < 0) { | ||
| 326 | LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret)); | ||
| 327 | return false; | ||
| 328 | } | ||
| 329 | |||
| 330 | return true; | ||
| 331 | } | ||
| 332 | |||
| 333 | std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() { | ||
| 334 | auto dst_frame = std::make_unique<Frame>(); | ||
| 335 | const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame()); | ||
| 336 | |||
| 337 | if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) { | ||
| 338 | return {}; | ||
| 339 | } | ||
| 340 | |||
| 341 | if (ret < 0) { | ||
| 342 | LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret)); | ||
| 343 | return {}; | ||
| 344 | } | ||
| 345 | |||
| 346 | return dst_frame; | ||
| 347 | } | ||
| 348 | |||
| 349 | DeinterlaceFilter::~DeinterlaceFilter() { | ||
| 350 | avfilter_graph_free(&m_filter_graph); | ||
| 351 | } | ||
| 352 | |||
| 353 | void DecodeApi::Reset() { | ||
| 354 | m_deinterlace_filter.reset(); | ||
| 355 | m_hardware_context.reset(); | ||
| 356 | m_decoder_context.reset(); | ||
| 357 | m_decoder.reset(); | ||
| 358 | } | ||
| 359 | |||
| 360 | bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) { | ||
| 361 | this->Reset(); | ||
| 362 | m_decoder.emplace(codec); | ||
| 363 | m_decoder_context.emplace(*m_decoder); | ||
| 364 | |||
| 365 | // Enable GPU decoding if requested. | ||
| 366 | if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) { | ||
| 367 | m_hardware_context.emplace(); | ||
| 368 | m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder); | ||
| 369 | } | ||
| 370 | |||
| 371 | // Open the decoder context. | ||
| 372 | if (!m_decoder_context->OpenContext(*m_decoder)) { | ||
| 373 | this->Reset(); | ||
| 374 | return false; | ||
| 375 | } | ||
| 376 | |||
| 377 | return true; | ||
| 378 | } | ||
| 379 | |||
| 380 | bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) { | ||
| 381 | FFmpeg::Packet packet(packet_data); | ||
| 382 | return m_decoder_context->SendPacket(packet); | ||
| 383 | } | ||
| 384 | |||
| 385 | void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) { | ||
| 386 | // Receive raw frame from decoder. | ||
| 387 | bool is_interlaced; | ||
| 388 | auto frame = m_decoder_context->ReceiveFrame(&is_interlaced); | ||
| 389 | if (!frame) { | ||
| 390 | return; | ||
| 391 | } | ||
| 392 | |||
| 393 | if (!is_interlaced) { | ||
| 394 | // If the frame is not interlaced, we can pend it now. | ||
| 395 | frame_queue.push(std::move(frame)); | ||
| 396 | } else { | ||
| 397 | // Create the deinterlacer if needed. | ||
| 398 | if (!m_deinterlace_filter) { | ||
| 399 | m_deinterlace_filter.emplace(*frame); | ||
| 400 | } | ||
| 401 | |||
| 402 | // Add the frame we just received. | ||
| 403 | if (!m_deinterlace_filter->AddSourceFrame(*frame)) { | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | |||
| 407 | // Pend output fields. | ||
| 408 | while (true) { | ||
| 409 | auto filter_frame = m_deinterlace_filter->DrainSinkFrame(); | ||
| 410 | if (!filter_frame) { | ||
| 411 | break; | ||
| 412 | } | ||
| 413 | |||
| 414 | frame_queue.push(std::move(filter_frame)); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | } // namespace FFmpeg | ||
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h new file mode 100644 index 000000000..1de0bbd83 --- /dev/null +++ b/src/video_core/host1x/ffmpeg/ffmpeg.h | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <optional> | ||
| 8 | #include <span> | ||
| 9 | #include <vector> | ||
| 10 | #include <queue> | ||
| 11 | |||
| 12 | #include "common/common_funcs.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/host1x/nvdec_common.h" | ||
| 15 | |||
| 16 | extern "C" { | ||
| 17 | #if defined(__GNUC__) || defined(__clang__) | ||
| 18 | #pragma GCC diagnostic push | ||
| 19 | #pragma GCC diagnostic ignored "-Wconversion" | ||
| 20 | #endif | ||
| 21 | |||
| 22 | #include <libavcodec/avcodec.h> | ||
| 23 | #include <libavfilter/avfilter.h> | ||
| 24 | #include <libavfilter/buffersink.h> | ||
| 25 | #include <libavfilter/buffersrc.h> | ||
| 26 | #include <libavutil/avutil.h> | ||
| 27 | #include <libavutil/opt.h> | ||
| 28 | |||
| 29 | #if defined(__GNUC__) || defined(__clang__) | ||
| 30 | #pragma GCC diagnostic pop | ||
| 31 | #endif | ||
| 32 | } | ||
| 33 | |||
| 34 | namespace FFmpeg { | ||
| 35 | |||
| 36 | class Packet; | ||
| 37 | class Frame; | ||
| 38 | class Decoder; | ||
| 39 | class HardwareContext; | ||
| 40 | class DecoderContext; | ||
| 41 | class DeinterlaceFilter; | ||
| 42 | |||
| 43 | // Wraps an AVPacket, a container for compressed bitstream data. | ||
| 44 | class Packet { | ||
| 45 | public: | ||
| 46 | YUZU_NON_COPYABLE(Packet); | ||
| 47 | YUZU_NON_MOVEABLE(Packet); | ||
| 48 | |||
| 49 | explicit Packet(std::span<const u8> data); | ||
| 50 | ~Packet(); | ||
| 51 | |||
| 52 | AVPacket* GetPacket() const { | ||
| 53 | return m_packet; | ||
| 54 | } | ||
| 55 | |||
| 56 | private: | ||
| 57 | AVPacket* m_packet{}; | ||
| 58 | }; | ||
| 59 | |||
| 60 | // Wraps an AVFrame, a container for audio and video stream data. | ||
| 61 | class Frame { | ||
| 62 | public: | ||
| 63 | YUZU_NON_COPYABLE(Frame); | ||
| 64 | YUZU_NON_MOVEABLE(Frame); | ||
| 65 | |||
| 66 | explicit Frame(); | ||
| 67 | ~Frame(); | ||
| 68 | |||
| 69 | int GetWidth() const { | ||
| 70 | return m_frame->width; | ||
| 71 | } | ||
| 72 | |||
| 73 | int GetHeight() const { | ||
| 74 | return m_frame->height; | ||
| 75 | } | ||
| 76 | |||
| 77 | AVPixelFormat GetPixelFormat() const { | ||
| 78 | return static_cast<AVPixelFormat>(m_frame->format); | ||
| 79 | } | ||
| 80 | |||
| 81 | int GetStride(int plane) const { | ||
| 82 | return m_frame->linesize[plane]; | ||
| 83 | } | ||
| 84 | |||
| 85 | int* GetStrides() const { | ||
| 86 | return m_frame->linesize; | ||
| 87 | } | ||
| 88 | |||
| 89 | u8* GetData(int plane) const { | ||
| 90 | return m_frame->data[plane]; | ||
| 91 | } | ||
| 92 | |||
| 93 | u8** GetPlanes() const { | ||
| 94 | return m_frame->data; | ||
| 95 | } | ||
| 96 | |||
| 97 | void SetFormat(int format) { | ||
| 98 | m_frame->format = format; | ||
| 99 | } | ||
| 100 | |||
| 101 | AVFrame* GetFrame() const { | ||
| 102 | return m_frame; | ||
| 103 | } | ||
| 104 | |||
| 105 | private: | ||
| 106 | AVFrame* m_frame{}; | ||
| 107 | }; | ||
| 108 | |||
| 109 | // Wraps an AVCodec, a type containing information about a codec. | ||
| 110 | class Decoder { | ||
| 111 | public: | ||
| 112 | YUZU_NON_COPYABLE(Decoder); | ||
| 113 | YUZU_NON_MOVEABLE(Decoder); | ||
| 114 | |||
| 115 | explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec); | ||
| 116 | ~Decoder() = default; | ||
| 117 | |||
| 118 | bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const; | ||
| 119 | |||
| 120 | const AVCodec* GetCodec() const { | ||
| 121 | return m_codec; | ||
| 122 | } | ||
| 123 | |||
| 124 | private: | ||
| 125 | const AVCodec* m_codec{}; | ||
| 126 | }; | ||
| 127 | |||
| 128 | // Wraps AVBufferRef for an accelerated decoder. | ||
| 129 | class HardwareContext { | ||
| 130 | public: | ||
| 131 | YUZU_NON_COPYABLE(HardwareContext); | ||
| 132 | YUZU_NON_MOVEABLE(HardwareContext); | ||
| 133 | |||
| 134 | static std::vector<AVHWDeviceType> GetSupportedDeviceTypes(); | ||
| 135 | |||
| 136 | explicit HardwareContext() = default; | ||
| 137 | ~HardwareContext(); | ||
| 138 | |||
| 139 | bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder); | ||
| 140 | |||
| 141 | AVBufferRef* GetBufferRef() const { | ||
| 142 | return m_gpu_decoder; | ||
| 143 | } | ||
| 144 | |||
| 145 | private: | ||
| 146 | bool InitializeWithType(AVHWDeviceType type); | ||
| 147 | |||
| 148 | AVBufferRef* m_gpu_decoder{}; | ||
| 149 | }; | ||
| 150 | |||
| 151 | // Wraps an AVCodecContext. | ||
| 152 | class DecoderContext { | ||
| 153 | public: | ||
| 154 | YUZU_NON_COPYABLE(DecoderContext); | ||
| 155 | YUZU_NON_MOVEABLE(DecoderContext); | ||
| 156 | |||
| 157 | explicit DecoderContext(const Decoder& decoder); | ||
| 158 | ~DecoderContext(); | ||
| 159 | |||
| 160 | void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt); | ||
| 161 | bool OpenContext(const Decoder& decoder); | ||
| 162 | bool SendPacket(const Packet& packet); | ||
| 163 | std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced); | ||
| 164 | |||
| 165 | AVCodecContext* GetCodecContext() const { | ||
| 166 | return m_codec_context; | ||
| 167 | } | ||
| 168 | |||
| 169 | private: | ||
| 170 | AVCodecContext* m_codec_context{}; | ||
| 171 | }; | ||
| 172 | |||
| 173 | // Wraps an AVFilterGraph. | ||
| 174 | class DeinterlaceFilter { | ||
| 175 | public: | ||
| 176 | YUZU_NON_COPYABLE(DeinterlaceFilter); | ||
| 177 | YUZU_NON_MOVEABLE(DeinterlaceFilter); | ||
| 178 | |||
| 179 | explicit DeinterlaceFilter(const Frame& frame); | ||
| 180 | ~DeinterlaceFilter(); | ||
| 181 | |||
| 182 | bool AddSourceFrame(const Frame& frame); | ||
| 183 | std::unique_ptr<Frame> DrainSinkFrame(); | ||
| 184 | |||
| 185 | private: | ||
| 186 | AVFilterGraph* m_filter_graph{}; | ||
| 187 | AVFilterContext* m_source_context{}; | ||
| 188 | AVFilterContext* m_sink_context{}; | ||
| 189 | bool m_initialized{}; | ||
| 190 | }; | ||
| 191 | |||
| 192 | class DecodeApi { | ||
| 193 | public: | ||
| 194 | YUZU_NON_COPYABLE(DecodeApi); | ||
| 195 | YUZU_NON_MOVEABLE(DecodeApi); | ||
| 196 | |||
| 197 | DecodeApi() = default; | ||
| 198 | ~DecodeApi() = default; | ||
| 199 | |||
| 200 | bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec); | ||
| 201 | void Reset(); | ||
| 202 | |||
| 203 | bool SendPacket(std::span<const u8> packet_data, size_t configuration_size); | ||
| 204 | void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue); | ||
| 205 | |||
| 206 | private: | ||
| 207 | std::optional<FFmpeg::Decoder> m_decoder; | ||
| 208 | std::optional<FFmpeg::DecoderContext> m_decoder_context; | ||
| 209 | std::optional<FFmpeg::HardwareContext> m_hardware_context; | ||
| 210 | std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter; | ||
| 211 | }; | ||
| 212 | |||
| 213 | } // namespace FFmpeg | ||
diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp index a4bd5b79f..b8f5866d3 100644 --- a/src/video_core/host1x/nvdec.cpp +++ b/src/video_core/host1x/nvdec.cpp | |||
| @@ -28,7 +28,7 @@ void Nvdec::ProcessMethod(u32 method, u32 argument) { | |||
| 28 | } | 28 | } |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | AVFramePtr Nvdec::GetFrame() { | 31 | std::unique_ptr<FFmpeg::Frame> Nvdec::GetFrame() { |
| 32 | return codec->GetCurrentFrame(); | 32 | return codec->GetCurrentFrame(); |
| 33 | } | 33 | } |
| 34 | 34 | ||
diff --git a/src/video_core/host1x/nvdec.h b/src/video_core/host1x/nvdec.h index 3949d5181..ddddb8d28 100644 --- a/src/video_core/host1x/nvdec.h +++ b/src/video_core/host1x/nvdec.h | |||
| @@ -23,7 +23,7 @@ public: | |||
| 23 | void ProcessMethod(u32 method, u32 argument); | 23 | void ProcessMethod(u32 method, u32 argument); |
| 24 | 24 | ||
| 25 | /// Return most recently decoded frame | 25 | /// Return most recently decoded frame |
| 26 | [[nodiscard]] AVFramePtr GetFrame(); | 26 | [[nodiscard]] std::unique_ptr<FFmpeg::Frame> GetFrame(); |
| 27 | 27 | ||
| 28 | private: | 28 | private: |
| 29 | /// Invoke codec to decode a frame | 29 | /// Invoke codec to decode a frame |
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 10d7ef884..2a5eba415 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -82,27 +82,26 @@ void Vic::Execute() { | |||
| 82 | return; | 82 | return; |
| 83 | } | 83 | } |
| 84 | const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; | 84 | const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; |
| 85 | const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); | 85 | auto frame = nvdec_processor->GetFrame(); |
| 86 | const auto* frame = frame_ptr.get(); | ||
| 87 | if (!frame) { | 86 | if (!frame) { |
| 88 | return; | 87 | return; |
| 89 | } | 88 | } |
| 90 | const u64 surface_width = config.surface_width_minus1 + 1; | 89 | const u64 surface_width = config.surface_width_minus1 + 1; |
| 91 | const u64 surface_height = config.surface_height_minus1 + 1; | 90 | const u64 surface_height = config.surface_height_minus1 + 1; |
| 92 | if (static_cast<u64>(frame->width) != surface_width || | 91 | if (static_cast<u64>(frame->GetWidth()) != surface_width || |
| 93 | static_cast<u64>(frame->height) != surface_height) { | 92 | static_cast<u64>(frame->GetHeight()) != surface_height) { |
| 94 | // TODO: Properly support multiple video streams with differing frame dimensions | 93 | // TODO: Properly support multiple video streams with differing frame dimensions |
| 95 | LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", | 94 | LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}", |
| 96 | frame->width, frame->height, surface_width, surface_height); | 95 | frame->GetWidth(), frame->GetHeight(), surface_width, surface_height); |
| 97 | } | 96 | } |
| 98 | switch (config.pixel_format) { | 97 | switch (config.pixel_format) { |
| 99 | case VideoPixelFormat::RGBA8: | 98 | case VideoPixelFormat::RGBA8: |
| 100 | case VideoPixelFormat::BGRA8: | 99 | case VideoPixelFormat::BGRA8: |
| 101 | case VideoPixelFormat::RGBX8: | 100 | case VideoPixelFormat::RGBX8: |
| 102 | WriteRGBFrame(frame, config); | 101 | WriteRGBFrame(std::move(frame), config); |
| 103 | break; | 102 | break; |
| 104 | case VideoPixelFormat::YUV420: | 103 | case VideoPixelFormat::YUV420: |
| 105 | WriteYUVFrame(frame, config); | 104 | WriteYUVFrame(std::move(frame), config); |
| 106 | break; | 105 | break; |
| 107 | default: | 106 | default: |
| 108 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); | 107 | UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value()); |
| @@ -110,10 +109,14 @@ void Vic::Execute() { | |||
| 110 | } | 109 | } |
| 111 | } | 110 | } |
| 112 | 111 | ||
| 113 | void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | 112 | void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) { |
| 114 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); | 113 | LOG_TRACE(Service_NVDRV, "Writing RGB Frame"); |
| 115 | 114 | ||
| 116 | if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) { | 115 | const auto frame_width = frame->GetWidth(); |
| 116 | const auto frame_height = frame->GetHeight(); | ||
| 117 | const auto frame_format = frame->GetPixelFormat(); | ||
| 118 | |||
| 119 | if (!scaler_ctx || frame_width != scaler_width || frame_height != scaler_height) { | ||
| 117 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { | 120 | const AVPixelFormat target_format = [pixel_format = config.pixel_format]() { |
| 118 | switch (pixel_format) { | 121 | switch (pixel_format) { |
| 119 | case VideoPixelFormat::RGBA8: | 122 | case VideoPixelFormat::RGBA8: |
| @@ -129,27 +132,26 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 129 | 132 | ||
| 130 | sws_freeContext(scaler_ctx); | 133 | sws_freeContext(scaler_ctx); |
| 131 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format | 134 | // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format |
| 132 | scaler_ctx = sws_getContext(frame->width, frame->height, | 135 | scaler_ctx = sws_getContext(frame_width, frame_height, frame_format, frame_width, |
| 133 | static_cast<AVPixelFormat>(frame->format), frame->width, | 136 | frame_height, target_format, 0, nullptr, nullptr, nullptr); |
| 134 | frame->height, target_format, 0, nullptr, nullptr, nullptr); | 137 | scaler_width = frame_width; |
| 135 | scaler_width = frame->width; | 138 | scaler_height = frame_height; |
| 136 | scaler_height = frame->height; | ||
| 137 | converted_frame_buffer.reset(); | 139 | converted_frame_buffer.reset(); |
| 138 | } | 140 | } |
| 139 | if (!converted_frame_buffer) { | 141 | if (!converted_frame_buffer) { |
| 140 | const size_t frame_size = frame->width * frame->height * 4; | 142 | const size_t frame_size = frame_width * frame_height * 4; |
| 141 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; | 143 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free}; |
| 142 | } | 144 | } |
| 143 | const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0}; | 145 | const std::array<int, 4> converted_stride{frame_width * 4, frame_height * 4, 0, 0}; |
| 144 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | 146 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; |
| 145 | sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr, | 147 | sws_scale(scaler_ctx, frame->GetPlanes(), frame->GetStrides(), 0, frame_height, |
| 146 | converted_stride.data()); | 148 | &converted_frame_buf_addr, converted_stride.data()); |
| 147 | 149 | ||
| 148 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | 150 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. |
| 149 | const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; | 151 | const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1; |
| 150 | const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; | 152 | const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1; |
| 151 | const u32 width = std::min(surface_width, static_cast<u32>(frame->width)); | 153 | const u32 width = std::min(surface_width, static_cast<u32>(frame_width)); |
| 152 | const u32 height = std::min(surface_height, static_cast<u32>(frame->height)); | 154 | const u32 height = std::min(surface_height, static_cast<u32>(frame_height)); |
| 153 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); | 155 | const u32 blk_kind = static_cast<u32>(config.block_linear_kind); |
| 154 | if (blk_kind != 0) { | 156 | if (blk_kind != 0) { |
| 155 | // swizzle pitch linear to block linear | 157 | // swizzle pitch linear to block linear |
| @@ -169,23 +171,23 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 169 | } | 171 | } |
| 170 | } | 172 | } |
| 171 | 173 | ||
| 172 | void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | 174 | void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config) { |
| 173 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); | 175 | LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame"); |
| 174 | 176 | ||
| 175 | const std::size_t surface_width = config.surface_width_minus1 + 1; | 177 | const std::size_t surface_width = config.surface_width_minus1 + 1; |
| 176 | const std::size_t surface_height = config.surface_height_minus1 + 1; | 178 | const std::size_t surface_height = config.surface_height_minus1 + 1; |
| 177 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; | 179 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL; |
| 178 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. | 180 | // Use the minimum of surface/frame dimensions to avoid buffer overflow. |
| 179 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); | 181 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->GetWidth())); |
| 180 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); | 182 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->GetHeight())); |
| 181 | 183 | ||
| 182 | const auto stride = static_cast<size_t>(frame->linesize[0]); | 184 | const auto stride = static_cast<size_t>(frame->GetStride(0)); |
| 183 | 185 | ||
| 184 | luma_buffer.resize_destructive(aligned_width * surface_height); | 186 | luma_buffer.resize_destructive(aligned_width * surface_height); |
| 185 | chroma_buffer.resize_destructive(aligned_width * surface_height / 2); | 187 | chroma_buffer.resize_destructive(aligned_width * surface_height / 2); |
| 186 | 188 | ||
| 187 | // Populate luma buffer | 189 | // Populate luma buffer |
| 188 | const u8* luma_src = frame->data[0]; | 190 | const u8* luma_src = frame->GetData(0); |
| 189 | for (std::size_t y = 0; y < frame_height; ++y) { | 191 | for (std::size_t y = 0; y < frame_height; ++y) { |
| 190 | const std::size_t src = y * stride; | 192 | const std::size_t src = y * stride; |
| 191 | const std::size_t dst = y * aligned_width; | 193 | const std::size_t dst = y * aligned_width; |
| @@ -196,16 +198,16 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 196 | 198 | ||
| 197 | // Chroma | 199 | // Chroma |
| 198 | const std::size_t half_height = frame_height / 2; | 200 | const std::size_t half_height = frame_height / 2; |
| 199 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); | 201 | const auto half_stride = static_cast<size_t>(frame->GetStride(1)); |
| 200 | 202 | ||
| 201 | switch (frame->format) { | 203 | switch (frame->GetPixelFormat()) { |
| 202 | case AV_PIX_FMT_YUV420P: { | 204 | case AV_PIX_FMT_YUV420P: { |
| 203 | // Frame from FFmpeg software | 205 | // Frame from FFmpeg software |
| 204 | // Populate chroma buffer from both channels with interleaving. | 206 | // Populate chroma buffer from both channels with interleaving. |
| 205 | const std::size_t half_width = frame_width / 2; | 207 | const std::size_t half_width = frame_width / 2; |
| 206 | u8* chroma_buffer_data = chroma_buffer.data(); | 208 | u8* chroma_buffer_data = chroma_buffer.data(); |
| 207 | const u8* chroma_b_src = frame->data[1]; | 209 | const u8* chroma_b_src = frame->GetData(1); |
| 208 | const u8* chroma_r_src = frame->data[2]; | 210 | const u8* chroma_r_src = frame->GetData(2); |
| 209 | for (std::size_t y = 0; y < half_height; ++y) { | 211 | for (std::size_t y = 0; y < half_height; ++y) { |
| 210 | const std::size_t src = y * half_stride; | 212 | const std::size_t src = y * half_stride; |
| 211 | const std::size_t dst = y * aligned_width; | 213 | const std::size_t dst = y * aligned_width; |
| @@ -219,7 +221,7 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 219 | case AV_PIX_FMT_NV12: { | 221 | case AV_PIX_FMT_NV12: { |
| 220 | // Frame from VA-API hardware | 222 | // Frame from VA-API hardware |
| 221 | // This is already interleaved so just copy | 223 | // This is already interleaved so just copy |
| 222 | const u8* chroma_src = frame->data[1]; | 224 | const u8* chroma_src = frame->GetData(1); |
| 223 | for (std::size_t y = 0; y < half_height; ++y) { | 225 | for (std::size_t y = 0; y < half_height; ++y) { |
| 224 | const std::size_t src = y * stride; | 226 | const std::size_t src = y * stride; |
| 225 | const std::size_t dst = y * aligned_width; | 227 | const std::size_t dst = y * aligned_width; |
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h index 3d9753047..6c868f062 100644 --- a/src/video_core/host1x/vic.h +++ b/src/video_core/host1x/vic.h | |||
| @@ -39,9 +39,9 @@ public: | |||
| 39 | private: | 39 | private: |
| 40 | void Execute(); | 40 | void Execute(); |
| 41 | 41 | ||
| 42 | void WriteRGBFrame(const AVFrame* frame, const VicConfig& config); | 42 | void WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config); |
| 43 | 43 | ||
| 44 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); | 44 | void WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& config); |
| 45 | 45 | ||
| 46 | Host1x& host1x; | 46 | Host1x& host1x; |
| 47 | std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; | 47 | std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; |
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 78b42b518..efa9adf7a 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -266,7 +266,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 266 | return; | 266 | return; |
| 267 | } | 267 | } |
| 268 | if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { | 268 | if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { |
| 269 | UNREACHABLE(); | 269 | ASSERT(false); |
| 270 | return; | 270 | return; |
| 271 | } | 271 | } |
| 272 | query_base->value += streamer->GetAmmendValue(); | 272 | query_base->value += streamer->GetAmmendValue(); |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 65cd5aa06..4f1d5b548 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include "common/alignment.h" | 4 | #include "common/alignment.h" |
| 5 | #include "core/memory.h" | 5 | #include "core/memory.h" |
| 6 | #include "video_core/control/channel_state.h" | ||
| 6 | #include "video_core/host1x/host1x.h" | 7 | #include "video_core/host1x/host1x.h" |
| 7 | #include "video_core/memory_manager.h" | 8 | #include "video_core/memory_manager.h" |
| 8 | #include "video_core/renderer_null/null_rasterizer.h" | 9 | #include "video_core/renderer_null/null_rasterizer.h" |
| @@ -99,8 +100,14 @@ bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 99 | } | 100 | } |
| 100 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 101 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 101 | const VideoCore::DiskResourceLoadCallback& callback) {} | 102 | const VideoCore::DiskResourceLoadCallback& callback) {} |
| 102 | void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {} | 103 | void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) { |
| 103 | void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {} | 104 | CreateChannel(channel); |
| 104 | void RasterizerNull::ReleaseChannel(s32 channel_id) {} | 105 | } |
| 106 | void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) { | ||
| 107 | BindToChannel(channel.bind_id); | ||
| 108 | } | ||
| 109 | void RasterizerNull::ReleaseChannel(s32 channel_id) { | ||
| 110 | EraseChannel(channel_id); | ||
| 111 | } | ||
| 105 | 112 | ||
| 106 | } // namespace Null | 113 | } // namespace Null |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 38d553d3c..dfd696de6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -178,13 +178,14 @@ void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | |||
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | 180 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, |
| 181 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | 181 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 182 | bool) { | ||
| 182 | CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); | 183 | CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); |
| 183 | } | 184 | } |
| 184 | 185 | ||
| 185 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 186 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 186 | std::span<const VideoCommon::BufferCopy> copies) { | 187 | std::span<const VideoCommon::BufferCopy> copies, bool) { |
| 187 | CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); | 188 | CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies, true); |
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | void BufferCacheRuntime::PreCopyBarrier() { | 191 | void BufferCacheRuntime::PreCopyBarrier() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index e8dbbd3a2..000f29a82 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -30,6 +30,8 @@ public: | |||
| 30 | 30 | ||
| 31 | void MakeResident(GLenum access) noexcept; | 31 | void MakeResident(GLenum access) noexcept; |
| 32 | 32 | ||
| 33 | void MarkUsage(u64 offset, u64 size) {} | ||
| 34 | |||
| 33 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | 35 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); |
| 34 | 36 | ||
| 35 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | 37 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| @@ -66,22 +68,29 @@ public: | |||
| 66 | 68 | ||
| 67 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); |
| 68 | 70 | ||
| 71 | bool CanReorderUpload(const Buffer&, std::span<const VideoCommon::BufferCopy>) { | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 69 | void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, | 75 | void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, |
| 70 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 76 | std::span<const VideoCommon::BufferCopy> copies, bool barrier); |
| 71 | 77 | ||
| 72 | void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, | 78 | void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, |
| 73 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 79 | std::span<const VideoCommon::BufferCopy> copies, bool barrier); |
| 74 | 80 | ||
| 75 | void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, | 81 | void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, |
| 76 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 82 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 83 | bool can_reorder_upload = false); | ||
| 77 | 84 | ||
| 78 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 85 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 79 | std::span<const VideoCommon::BufferCopy> copies); | 86 | std::span<const VideoCommon::BufferCopy> copies, bool); |
| 80 | 87 | ||
| 81 | void PreCopyBarrier(); | 88 | void PreCopyBarrier(); |
| 82 | void PostCopyBarrier(); | 89 | void PostCopyBarrier(); |
| 83 | void Finish(); | 90 | void Finish(); |
| 84 | 91 | ||
| 92 | void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {} | ||
| 93 | |||
| 85 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); | 94 | void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); |
| 86 | 95 | ||
| 87 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | 96 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 44a771d65..af0a453ee 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -559,7 +559,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 559 | } | 559 | } |
| 560 | 560 | ||
| 561 | void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { | 561 | void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { |
| 562 | glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS); | 562 | const GLenum buffer_mode = |
| 563 | num_xfb_buffers_active == 1 ? GL_INTERLEAVED_ATTRIBS : GL_SEPARATE_ATTRIBS; | ||
| 564 | glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), buffer_mode); | ||
| 563 | } | 565 | } |
| 564 | 566 | ||
| 565 | void GraphicsPipeline::GenerateTransformFeedbackState() { | 567 | void GraphicsPipeline::GenerateTransformFeedbackState() { |
| @@ -567,12 +569,14 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { | |||
| 567 | // when this is required. | 569 | // when this is required. |
| 568 | GLint* cursor{xfb_attribs.data()}; | 570 | GLint* cursor{xfb_attribs.data()}; |
| 569 | 571 | ||
| 572 | num_xfb_buffers_active = 0; | ||
| 570 | for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { | 573 | for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { |
| 571 | const auto& layout = key.xfb_state.layouts[feedback]; | 574 | const auto& layout = key.xfb_state.layouts[feedback]; |
| 572 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); | 575 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); |
| 573 | if (layout.varying_count == 0) { | 576 | if (layout.varying_count == 0) { |
| 574 | continue; | 577 | continue; |
| 575 | } | 578 | } |
| 579 | num_xfb_buffers_active++; | ||
| 576 | 580 | ||
| 577 | const auto& locations = key.xfb_state.varyings[feedback]; | 581 | const auto& locations = key.xfb_state.varyings[feedback]; |
| 578 | std::optional<u32> current_index; | 582 | std::optional<u32> current_index; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 74fc9cc3d..2f70c1ae9 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -154,6 +154,7 @@ private: | |||
| 154 | 154 | ||
| 155 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; | 155 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; |
| 156 | GLsizei num_xfb_attribs{}; | 156 | GLsizei num_xfb_attribs{}; |
| 157 | u32 num_xfb_buffers_active{}; | ||
| 157 | std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; | 158 | std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; |
| 158 | 159 | ||
| 159 | std::mutex built_mutex; | 160 | std::mutex built_mutex; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 27e2de1bf..9995b6dd4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -555,7 +555,7 @@ void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | |||
| 555 | } | 555 | } |
| 556 | { | 556 | { |
| 557 | std::scoped_lock lock{buffer_cache.mutex}; | 557 | std::scoped_lock lock{buffer_cache.mutex}; |
| 558 | buffer_cache.CachedWriteMemory(addr, size); | 558 | buffer_cache.WriteMemory(addr, size); |
| 559 | } | 559 | } |
| 560 | shader_cache.InvalidateRegion(addr, size); | 560 | shader_cache.InvalidateRegion(addr, size); |
| 561 | } | 561 | } |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e7a80740..c4c30d807 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 132 | const bool use_accelerated = | 132 | const bool use_accelerated = |
| 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 134 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 134 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 135 | RenderScreenshot(*framebuffer, use_accelerated); | ||
| 135 | 136 | ||
| 136 | { | 137 | Frame* frame = present_manager.GetRenderFrame(); |
| 137 | std::scoped_lock lock{rasterizer.LockCaches()}; | 138 | blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); |
| 138 | RenderScreenshot(*framebuffer, use_accelerated); | 139 | scheduler.Flush(*frame->render_ready); |
| 139 | 140 | present_manager.Present(frame); | |
| 140 | Frame* frame = present_manager.GetRenderFrame(); | ||
| 141 | blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); | ||
| 142 | scheduler.Flush(*frame->render_ready); | ||
| 143 | present_manager.Present(frame); | ||
| 144 | } | ||
| 145 | 141 | ||
| 146 | gpu.RendererFrameEndNotify(); | 142 | gpu.RendererFrameEndNotify(); |
| 147 | rasterizer.TickFrame(); | 143 | rasterizer.TickFrame(); |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 52fc142d1..66483a900 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin | |||
| 137 | 137 | ||
| 138 | BlitScreen::~BlitScreen() = default; | 138 | BlitScreen::~BlitScreen() = default; |
| 139 | 139 | ||
| 140 | static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer, | ||
| 141 | const ScreenInfo& screen_info) { | ||
| 142 | f32 left, top, right, bottom; | ||
| 143 | |||
| 144 | if (!framebuffer.crop_rect.IsEmpty()) { | ||
| 145 | // If crop rectangle is not empty, apply properties from rectangle. | ||
| 146 | left = static_cast<f32>(framebuffer.crop_rect.left); | ||
| 147 | top = static_cast<f32>(framebuffer.crop_rect.top); | ||
| 148 | right = static_cast<f32>(framebuffer.crop_rect.right); | ||
| 149 | bottom = static_cast<f32>(framebuffer.crop_rect.bottom); | ||
| 150 | } else { | ||
| 151 | // Otherwise, fall back to framebuffer dimensions. | ||
| 152 | left = 0; | ||
| 153 | top = 0; | ||
| 154 | right = static_cast<f32>(framebuffer.width); | ||
| 155 | bottom = static_cast<f32>(framebuffer.height); | ||
| 156 | } | ||
| 157 | |||
| 158 | // Apply transformation flags. | ||
| 159 | auto framebuffer_transform_flags = framebuffer.transform_flags; | ||
| 160 | |||
| 161 | if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { | ||
| 162 | // Switch left and right. | ||
| 163 | std::swap(left, right); | ||
| 164 | } | ||
| 165 | if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { | ||
| 166 | // Switch top and bottom. | ||
| 167 | std::swap(top, bottom); | ||
| 168 | } | ||
| 169 | |||
| 170 | framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; | ||
| 171 | framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; | ||
| 172 | if (True(framebuffer_transform_flags)) { | ||
| 173 | UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", | ||
| 174 | static_cast<u32>(framebuffer_transform_flags)); | ||
| 175 | } | ||
| 176 | |||
| 177 | // Get the screen properties. | ||
| 178 | const f32 screen_width = static_cast<f32>(screen_info.width); | ||
| 179 | const f32 screen_height = static_cast<f32>(screen_info.height); | ||
| 180 | |||
| 181 | // Normalize coordinate space. | ||
| 182 | left /= screen_width; | ||
| 183 | top /= screen_height; | ||
| 184 | right /= screen_width; | ||
| 185 | bottom /= screen_height; | ||
| 186 | |||
| 187 | return Common::Rectangle<f32>(left, top, right, bottom); | ||
| 188 | } | ||
| 189 | |||
| 140 | void BlitScreen::Recreate() { | 190 | void BlitScreen::Recreate() { |
| 141 | present_manager.WaitPresent(); | 191 | present_manager.WaitPresent(); |
| 142 | scheduler.Finish(); | 192 | scheduler.Finish(); |
| @@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 354 | source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); | 404 | source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); |
| 355 | } | 405 | } |
| 356 | if (fsr) { | 406 | if (fsr) { |
| 357 | auto crop_rect = framebuffer.crop_rect; | 407 | const auto crop_rect = NormalizeCrop(framebuffer, screen_info); |
| 358 | if (crop_rect.GetWidth() == 0) { | 408 | const VkExtent2D fsr_input_size{ |
| 359 | crop_rect.right = framebuffer.width; | 409 | .width = Settings::values.resolution_info.ScaleUp(screen_info.width), |
| 360 | } | 410 | .height = Settings::values.resolution_info.ScaleUp(screen_info.height), |
| 361 | if (crop_rect.GetHeight() == 0) { | ||
| 362 | crop_rect.bottom = framebuffer.height; | ||
| 363 | } | ||
| 364 | crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); | ||
| 365 | VkExtent2D fsr_input_size{ | ||
| 366 | .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), | ||
| 367 | .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), | ||
| 368 | }; | 411 | }; |
| 369 | VkImageView fsr_image_view = | 412 | VkImageView fsr_image_view = |
| 370 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); | 413 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); |
| @@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou | |||
| 1397 | 1440 | ||
| 1398 | void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, | 1441 | void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, |
| 1399 | const Layout::FramebufferLayout layout) const { | 1442 | const Layout::FramebufferLayout layout) const { |
| 1400 | const auto& framebuffer_transform_flags = framebuffer.transform_flags; | 1443 | f32 left, top, right, bottom; |
| 1401 | const auto& framebuffer_crop_rect = framebuffer.crop_rect; | ||
| 1402 | |||
| 1403 | static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f}; | ||
| 1404 | auto left = texcoords.left; | ||
| 1405 | auto right = texcoords.right; | ||
| 1406 | |||
| 1407 | switch (framebuffer_transform_flags) { | ||
| 1408 | case Service::android::BufferTransformFlags::Unset: | ||
| 1409 | break; | ||
| 1410 | case Service::android::BufferTransformFlags::FlipV: | ||
| 1411 | // Flip the framebuffer vertically | ||
| 1412 | left = texcoords.right; | ||
| 1413 | right = texcoords.left; | ||
| 1414 | break; | ||
| 1415 | default: | ||
| 1416 | UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", | ||
| 1417 | static_cast<u32>(framebuffer_transform_flags)); | ||
| 1418 | break; | ||
| 1419 | } | ||
| 1420 | 1444 | ||
| 1421 | UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); | 1445 | if (fsr) { |
| 1422 | 1446 | // FSR has already applied the crop, so we just want to render the image | |
| 1423 | f32 left_start{}; | 1447 | // it has produced. |
| 1424 | if (framebuffer_crop_rect.Top() > 0) { | 1448 | left = 0; |
| 1425 | left_start = static_cast<f32>(framebuffer_crop_rect.Top()) / | 1449 | top = 0; |
| 1426 | static_cast<f32>(framebuffer_crop_rect.Bottom()); | 1450 | right = 1; |
| 1427 | } | 1451 | bottom = 1; |
| 1428 | f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width); | 1452 | } else { |
| 1429 | f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height); | 1453 | // Get the normalized crop rectangle. |
| 1430 | // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering | 1454 | const auto crop = NormalizeCrop(framebuffer, screen_info); |
| 1431 | // (e.g. handheld mode) on a 1920x1080 framebuffer. | 1455 | |
| 1432 | if (!fsr) { | 1456 | // Apply the crop. |
| 1433 | if (framebuffer_crop_rect.GetWidth() > 0) { | 1457 | left = crop.left; |
| 1434 | scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / | 1458 | top = crop.top; |
| 1435 | static_cast<f32>(screen_info.width); | 1459 | right = crop.right; |
| 1436 | } | 1460 | bottom = crop.bottom; |
| 1437 | if (framebuffer_crop_rect.GetHeight() > 0) { | ||
| 1438 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / | ||
| 1439 | static_cast<f32>(screen_info.height); | ||
| 1440 | } | ||
| 1441 | } | 1461 | } |
| 1442 | 1462 | ||
| 1463 | // Map the coordinates to the screen. | ||
| 1443 | const auto& screen = layout.screen; | 1464 | const auto& screen = layout.screen; |
| 1444 | const auto x = static_cast<f32>(screen.left); | 1465 | const auto x = static_cast<f32>(screen.left); |
| 1445 | const auto y = static_cast<f32>(screen.top); | 1466 | const auto y = static_cast<f32>(screen.top); |
| 1446 | const auto w = static_cast<f32>(screen.GetWidth()); | 1467 | const auto w = static_cast<f32>(screen.GetWidth()); |
| 1447 | const auto h = static_cast<f32>(screen.GetHeight()); | 1468 | const auto h = static_cast<f32>(screen.GetHeight()); |
| 1448 | data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v); | 1469 | |
| 1449 | data.vertices[1] = | 1470 | data.vertices[0] = ScreenRectVertex(x, y, left, top); |
| 1450 | ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v); | 1471 | data.vertices[1] = ScreenRectVertex(x + w, y, right, top); |
| 1451 | data.vertices[2] = | 1472 | data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom); |
| 1452 | ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v); | 1473 | data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom); |
| 1453 | data.vertices[3] = | ||
| 1454 | ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v); | ||
| 1455 | } | 1474 | } |
| 1456 | 1475 | ||
| 1457 | void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { | 1476 | void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 976c3f6a6..5958f52f7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -79,13 +79,13 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 79 | } // Anonymous namespace | 79 | } // Anonymous namespace |
| 80 | 80 | ||
| 81 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | 81 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {} |
| 83 | 83 | ||
| 84 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 84 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 85 | VAddr cpu_addr_, u64 size_bytes_) | 85 | VAddr cpu_addr_, u64 size_bytes_) |
| 86 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | 86 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), |
| 87 | device{&runtime.device}, buffer{ | 87 | device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, |
| 88 | CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { | 88 | tracker{SizeBytes()} { |
| 89 | if (runtime.device.HasDebuggingToolAttached()) { | 89 | if (runtime.device.HasDebuggingToolAttached()) { |
| 90 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 90 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 91 | } | 91 | } |
| @@ -359,12 +359,31 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const { | |||
| 359 | return static_cast<u32>(device.GetStorageBufferAlignment()); | 359 | return static_cast<u32>(device.GetStorageBufferAlignment()); |
| 360 | } | 360 | } |
| 361 | 361 | ||
| 362 | void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept { | ||
| 363 | for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { | ||
| 364 | it->ResetUsageTracking(); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 362 | void BufferCacheRuntime::Finish() { | 368 | void BufferCacheRuntime::Finish() { |
| 363 | scheduler.Finish(); | 369 | scheduler.Finish(); |
| 364 | } | 370 | } |
| 365 | 371 | ||
| 372 | bool BufferCacheRuntime::CanReorderUpload(const Buffer& buffer, | ||
| 373 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 374 | if (Settings::values.disable_buffer_reorder) { | ||
| 375 | return false; | ||
| 376 | } | ||
| 377 | const bool can_use_upload_cmdbuf = | ||
| 378 | std::ranges::all_of(copies, [&](const VideoCommon::BufferCopy& copy) { | ||
| 379 | return !buffer.IsRegionUsed(copy.dst_offset, copy.size); | ||
| 380 | }); | ||
| 381 | return can_use_upload_cmdbuf; | ||
| 382 | } | ||
| 383 | |||
| 366 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | 384 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 367 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | 385 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 386 | bool can_reorder_upload) { | ||
| 368 | if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { | 387 | if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { |
| 369 | return; | 388 | return; |
| 370 | } | 389 | } |
| @@ -380,9 +399,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 380 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 399 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 381 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | 400 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 382 | }; | 401 | }; |
| 402 | |||
| 383 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up | 403 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 384 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); | 404 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); |
| 385 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | 405 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 406 | if (src_buffer == staging_pool.StreamBuf() && can_reorder_upload) { | ||
| 407 | scheduler.RecordWithUploadBuffer([src_buffer, dst_buffer, vk_copies]( | ||
| 408 | vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { | ||
| 409 | upload_cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 410 | }); | ||
| 411 | return; | ||
| 412 | } | ||
| 413 | |||
| 386 | scheduler.RequestOutsideRenderPassOperationContext(); | 414 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 387 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { | 415 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { |
| 388 | if (barrier) { | 416 | if (barrier) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 833dfac45..0b3fbd6d0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include "video_core/buffer_cache/buffer_cache_base.h" | 6 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 7 | #include "video_core/buffer_cache/memory_tracker_base.h" | 7 | #include "video_core/buffer_cache/memory_tracker_base.h" |
| 8 | #include "video_core/buffer_cache/usage_tracker.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 10 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -34,6 +35,18 @@ public: | |||
| 34 | return *buffer; | 35 | return *buffer; |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 38 | [[nodiscard]] bool IsRegionUsed(u64 offset, u64 size) const noexcept { | ||
| 39 | return tracker.IsUsed(offset, size); | ||
| 40 | } | ||
| 41 | |||
| 42 | void MarkUsage(u64 offset, u64 size) noexcept { | ||
| 43 | tracker.Track(offset, size); | ||
| 44 | } | ||
| 45 | |||
| 46 | void ResetUsageTracking() noexcept { | ||
| 47 | tracker.Reset(); | ||
| 48 | } | ||
| 49 | |||
| 37 | operator VkBuffer() const noexcept { | 50 | operator VkBuffer() const noexcept { |
| 38 | return *buffer; | 51 | return *buffer; |
| 39 | } | 52 | } |
| @@ -49,6 +62,7 @@ private: | |||
| 49 | const Device* device{}; | 62 | const Device* device{}; |
| 50 | vk::Buffer buffer; | 63 | vk::Buffer buffer; |
| 51 | std::vector<BufferView> views; | 64 | std::vector<BufferView> views; |
| 65 | VideoCommon::UsageTracker tracker; | ||
| 52 | }; | 66 | }; |
| 53 | 67 | ||
| 54 | class QuadArrayIndexBuffer; | 68 | class QuadArrayIndexBuffer; |
| @@ -67,6 +81,8 @@ public: | |||
| 67 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 81 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 68 | DescriptorPool& descriptor_pool); | 82 | DescriptorPool& descriptor_pool); |
| 69 | 83 | ||
| 84 | void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept; | ||
| 85 | |||
| 70 | void Finish(); | 86 | void Finish(); |
| 71 | 87 | ||
| 72 | u64 GetDeviceLocalMemory() const; | 88 | u64 GetDeviceLocalMemory() const; |
| @@ -81,12 +97,15 @@ public: | |||
| 81 | 97 | ||
| 82 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); | 98 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
| 83 | 99 | ||
| 100 | bool CanReorderUpload(const Buffer& buffer, std::span<const VideoCommon::BufferCopy> copies); | ||
| 101 | |||
| 84 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); | 102 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); |
| 85 | 103 | ||
| 86 | void PreCopyBarrier(); | 104 | void PreCopyBarrier(); |
| 87 | 105 | ||
| 88 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | 106 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, |
| 89 | std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); | 107 | std::span<const VideoCommon::BufferCopy> copies, bool barrier, |
| 108 | bool can_reorder_upload = false); | ||
| 90 | 109 | ||
| 91 | void PostCopyBarrier(); | 110 | void PostCopyBarrier(); |
| 92 | 111 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index ce8f3f3c2..f7a05fbc0 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp | |||
| @@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image | |||
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, | 36 | VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, |
| 37 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { | 37 | VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) { |
| 38 | 38 | ||
| 39 | UpdateDescriptorSet(image_index, image_view); | 39 | UpdateDescriptorSet(image_index, image_view); |
| 40 | 40 | ||
| @@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag | |||
| 61 | 61 | ||
| 62 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); | 62 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); |
| 63 | 63 | ||
| 64 | const f32 input_image_width = static_cast<f32>(input_image_extent.width); | ||
| 65 | const f32 input_image_height = static_cast<f32>(input_image_extent.height); | ||
| 66 | const f32 output_image_width = static_cast<f32>(output_size.width); | ||
| 67 | const f32 output_image_height = static_cast<f32>(output_size.height); | ||
| 68 | const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; | ||
| 69 | const f32 viewport_x = crop_rect.left * input_image_width; | ||
| 70 | const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; | ||
| 71 | const f32 viewport_y = crop_rect.top * input_image_height; | ||
| 72 | |||
| 64 | std::array<u32, 4 * 4> push_constants; | 73 | std::array<u32, 4 * 4> push_constants; |
| 65 | FsrEasuConOffset( | 74 | FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4, |
| 66 | push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, | 75 | push_constants.data() + 8, push_constants.data() + 12, |
| 67 | push_constants.data() + 12, | 76 | |
| 68 | 77 | viewport_width, viewport_height, input_image_width, input_image_height, | |
| 69 | static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), | 78 | output_image_width, output_image_height, viewport_x, viewport_y); |
| 70 | static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height), | ||
| 71 | static_cast<f32>(output_size.width), static_cast<f32>(output_size.height), | ||
| 72 | static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); | ||
| 73 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); | 79 | cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); |
| 74 | 80 | ||
| 75 | { | 81 | { |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 8bb9fc23a..3505c1416 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h | |||
| @@ -17,7 +17,7 @@ public: | |||
| 17 | explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, | 17 | explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, |
| 18 | VkExtent2D output_size); | 18 | VkExtent2D output_size); |
| 19 | VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, | 19 | VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, |
| 20 | VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); | 20 | VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect); |
| 21 | 21 | ||
| 22 | private: | 22 | private: |
| 23 | void CreateDescriptorPool(); | 23 | void CreateDescriptorPool(); |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 6b288b994..ac8b6e838 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp | |||
| @@ -100,12 +100,14 @@ void MasterSemaphore::Wait(u64 tick) { | |||
| 100 | Refresh(); | 100 | Refresh(); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 103 | VkResult MasterSemaphore::SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 104 | VkSemaphore wait_semaphore, u64 host_tick) { | 104 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, |
| 105 | u64 host_tick) { | ||
| 105 | if (semaphore) { | 106 | if (semaphore) { |
| 106 | return SubmitQueueTimeline(cmdbuf, signal_semaphore, wait_semaphore, host_tick); | 107 | return SubmitQueueTimeline(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, |
| 108 | host_tick); | ||
| 107 | } else { | 109 | } else { |
| 108 | return SubmitQueueFence(cmdbuf, signal_semaphore, wait_semaphore, host_tick); | 110 | return SubmitQueueFence(cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, host_tick); |
| 109 | } | 111 | } |
| 110 | } | 112 | } |
| 111 | 113 | ||
| @@ -115,6 +117,7 @@ static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ | |||
| 115 | }; | 117 | }; |
| 116 | 118 | ||
| 117 | VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | 119 | VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, |
| 120 | vk::CommandBuffer& upload_cmdbuf, | ||
| 118 | VkSemaphore signal_semaphore, | 121 | VkSemaphore signal_semaphore, |
| 119 | VkSemaphore wait_semaphore, u64 host_tick) { | 122 | VkSemaphore wait_semaphore, u64 host_tick) { |
| 120 | const VkSemaphore timeline_semaphore = *semaphore; | 123 | const VkSemaphore timeline_semaphore = *semaphore; |
| @@ -123,6 +126,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 123 | const std::array signal_values{host_tick, u64(0)}; | 126 | const std::array signal_values{host_tick, u64(0)}; |
| 124 | const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; | 127 | const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; |
| 125 | 128 | ||
| 129 | const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; | ||
| 130 | |||
| 126 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; | 131 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; |
| 127 | const VkTimelineSemaphoreSubmitInfo timeline_si{ | 132 | const VkTimelineSemaphoreSubmitInfo timeline_si{ |
| 128 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, | 133 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, |
| @@ -138,8 +143,8 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 138 | .waitSemaphoreCount = num_wait_semaphores, | 143 | .waitSemaphoreCount = num_wait_semaphores, |
| 139 | .pWaitSemaphores = &wait_semaphore, | 144 | .pWaitSemaphores = &wait_semaphore, |
| 140 | .pWaitDstStageMask = wait_stage_masks.data(), | 145 | .pWaitDstStageMask = wait_stage_masks.data(), |
| 141 | .commandBufferCount = 1, | 146 | .commandBufferCount = static_cast<u32>(cmdbuffers.size()), |
| 142 | .pCommandBuffers = cmdbuf.address(), | 147 | .pCommandBuffers = cmdbuffers.data(), |
| 143 | .signalSemaphoreCount = num_signal_semaphores, | 148 | .signalSemaphoreCount = num_signal_semaphores, |
| 144 | .pSignalSemaphores = signal_semaphores.data(), | 149 | .pSignalSemaphores = signal_semaphores.data(), |
| 145 | }; | 150 | }; |
| @@ -147,19 +152,23 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, | |||
| 147 | return device.GetGraphicsQueue().Submit(submit_info); | 152 | return device.GetGraphicsQueue().Submit(submit_info); |
| 148 | } | 153 | } |
| 149 | 154 | ||
| 150 | VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 155 | VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, |
| 151 | VkSemaphore wait_semaphore, u64 host_tick) { | 156 | vk::CommandBuffer& upload_cmdbuf, |
| 157 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, | ||
| 158 | u64 host_tick) { | ||
| 152 | const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; | 159 | const u32 num_signal_semaphores = signal_semaphore ? 1 : 0; |
| 153 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; | 160 | const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; |
| 154 | 161 | ||
| 162 | const std::array cmdbuffers{*upload_cmdbuf, *cmdbuf}; | ||
| 163 | |||
| 155 | const VkSubmitInfo submit_info{ | 164 | const VkSubmitInfo submit_info{ |
| 156 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | 165 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, |
| 157 | .pNext = nullptr, | 166 | .pNext = nullptr, |
| 158 | .waitSemaphoreCount = num_wait_semaphores, | 167 | .waitSemaphoreCount = num_wait_semaphores, |
| 159 | .pWaitSemaphores = &wait_semaphore, | 168 | .pWaitSemaphores = &wait_semaphore, |
| 160 | .pWaitDstStageMask = wait_stage_masks.data(), | 169 | .pWaitDstStageMask = wait_stage_masks.data(), |
| 161 | .commandBufferCount = 1, | 170 | .commandBufferCount = static_cast<u32>(cmdbuffers.size()), |
| 162 | .pCommandBuffers = cmdbuf.address(), | 171 | .pCommandBuffers = cmdbuffers.data(), |
| 163 | .signalSemaphoreCount = num_signal_semaphores, | 172 | .signalSemaphoreCount = num_signal_semaphores, |
| 164 | .pSignalSemaphores = &signal_semaphore, | 173 | .pSignalSemaphores = &signal_semaphore, |
| 165 | }; | 174 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 3f599d7bd..7dfb93ffb 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -52,14 +52,16 @@ public: | |||
| 52 | void Wait(u64 tick); | 52 | void Wait(u64 tick); |
| 53 | 53 | ||
| 54 | /// Submits the device graphics queue, updating the tick as necessary | 54 | /// Submits the device graphics queue, updating the tick as necessary |
| 55 | VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 55 | VkResult SubmitQueue(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 56 | VkSemaphore wait_semaphore, u64 host_tick); | 56 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, u64 host_tick); |
| 57 | 57 | ||
| 58 | private: | 58 | private: |
| 59 | VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 59 | VkResult SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 60 | VkSemaphore wait_semaphore, u64 host_tick); | 60 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, |
| 61 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 61 | u64 host_tick); |
| 62 | VkSemaphore wait_semaphore, u64 host_tick); | 62 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, vk::CommandBuffer& upload_cmdbuf, |
| 63 | VkSemaphore signal_semaphore, VkSemaphore wait_semaphore, | ||
| 64 | u64 host_tick); | ||
| 63 | 65 | ||
| 64 | void WaitThread(std::stop_token token); | 66 | void WaitThread(std::stop_token token); |
| 65 | 67 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0d604eee3..2a13b2a72 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program | |||
| 263 | info.y_negate = key.state.y_negate != 0; | 263 | info.y_negate = key.state.y_negate != 0; |
| 264 | return info; | 264 | return info; |
| 265 | } | 265 | } |
| 266 | |||
| 267 | size_t GetTotalPipelineWorkers() { | ||
| 268 | const size_t max_core_threads = | ||
| 269 | std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL; | ||
| 270 | #ifdef ANDROID | ||
| 271 | // Leave at least a few cores free in android | ||
| 272 | constexpr size_t free_cores = 3ULL; | ||
| 273 | if (max_core_threads <= free_cores) { | ||
| 274 | return 1ULL; | ||
| 275 | } | ||
| 276 | return max_core_threads - free_cores; | ||
| 277 | #else | ||
| 278 | return max_core_threads; | ||
| 279 | #endif | ||
| 280 | } | ||
| 281 | |||
| 266 | } // Anonymous namespace | 282 | } // Anonymous namespace |
| 267 | 283 | ||
| 268 | size_t ComputePipelineCacheKey::Hash() const noexcept { | 284 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| @@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 294 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, | 310 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, |
| 295 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, | 311 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, |
| 296 | use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, | 312 | use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, |
| 297 | #ifdef ANDROID | 313 | workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(), |
| 298 | workers(1, "VkPipelineBuilder"), | 314 | "VkPipelineBuilder"), |
| 299 | #else | ||
| 300 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), | ||
| 301 | #endif | ||
| 302 | serialization_thread(1, "VkPipelineSerialization") { | 315 | serialization_thread(1, "VkPipelineSerialization") { |
| 303 | const auto& float_control{device.FloatControlProperties()}; | 316 | const auto& float_control{device.FloatControlProperties()}; |
| 304 | const VkDriverId driver_id{device.GetDriverID()}; | 317 | const VkDriverId driver_id{device.GetDriverID()}; |
| @@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 338 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 351 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
| 339 | .support_native_ndc = device.IsExtDepthClipControlSupported(), | 352 | .support_native_ndc = device.IsExtDepthClipControlSupported(), |
| 340 | .support_scaled_attributes = !device.MustEmulateScaledFormats(), | 353 | .support_scaled_attributes = !device.MustEmulateScaledFormats(), |
| 354 | .support_multi_viewport = device.SupportsMultiViewport(), | ||
| 341 | 355 | ||
| 342 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | 356 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), |
| 343 | 357 | ||
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 66c03bf17..078777cdd 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -211,6 +211,13 @@ public: | |||
| 211 | return; | 211 | return; |
| 212 | } | 212 | } |
| 213 | PauseCounter(); | 213 | PauseCounter(); |
| 214 | const auto driver_id = device.GetDriverID(); | ||
| 215 | if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || | ||
| 216 | driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { | ||
| 217 | pending_sync.clear(); | ||
| 218 | sync_values_stash.clear(); | ||
| 219 | return; | ||
| 220 | } | ||
| 214 | sync_values_stash.clear(); | 221 | sync_values_stash.clear(); |
| 215 | sync_values_stash.emplace_back(); | 222 | sync_values_stash.emplace_back(); |
| 216 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); | 223 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); |
| @@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1378 | return true; | 1385 | return true; |
| 1379 | } | 1386 | } |
| 1380 | 1387 | ||
| 1388 | auto driver_id = impl->device.GetDriverID(); | ||
| 1389 | if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || | ||
| 1390 | driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { | ||
| 1391 | return true; | ||
| 1392 | } | ||
| 1393 | |||
| 1381 | for (size_t i = 0; i < 2; i++) { | 1394 | for (size_t i = 0; i < 2; i++) { |
| 1382 | is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); | 1395 | is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); |
| 1383 | } | 1396 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 059b7cb40..e0ab1eaac 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | if (y_negate) { | 84 | if (y_negate) { |
| 85 | y += height; | 85 | y += conv(static_cast<f32>(regs.surface_clip.height)); |
| 86 | height = -height; | 86 | height = -height; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| @@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 199 | if (!pipeline) { | 199 | if (!pipeline) { |
| 200 | return; | 200 | return; |
| 201 | } | 201 | } |
| 202 | std::scoped_lock lock{LockCaches()}; | 202 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 203 | // update engine as channel may be different. | 203 | // update engine as channel may be different. |
| 204 | pipeline->SetEngine(maxwell3d, gpu_memory); | 204 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 205 | pipeline->Configure(is_indexed); | 205 | pipeline->Configure(is_indexed); |
| @@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | |||
| 621 | } | 621 | } |
| 622 | { | 622 | { |
| 623 | std::scoped_lock lock{buffer_cache.mutex}; | 623 | std::scoped_lock lock{buffer_cache.mutex}; |
| 624 | buffer_cache.CachedWriteMemory(addr, size); | 624 | buffer_cache.WriteMemory(addr, size); |
| 625 | } | 625 | } |
| 626 | pipeline_cache.InvalidateRegion(addr, size); | 626 | pipeline_cache.InvalidateRegion(addr, size); |
| 627 | } | 627 | } |
| @@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() { | |||
| 710 | } | 710 | } |
| 711 | 711 | ||
| 712 | void RasterizerVulkan::FlushCommands() { | 712 | void RasterizerVulkan::FlushCommands() { |
| 713 | std::scoped_lock lock{LockCaches()}; | ||
| 714 | if (draw_counter == 0) { | 713 | if (draw_counter == 0) { |
| 715 | return; | 714 | return; |
| 716 | } | 715 | } |
| @@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() { | |||
| 808 | if ((++draw_counter & 7) != 7) { | 807 | if ((++draw_counter & 7) != 7) { |
| 809 | return; | 808 | return; |
| 810 | } | 809 | } |
| 811 | std::scoped_lock lock{LockCaches()}; | ||
| 812 | if (draw_counter < DRAWS_TO_DISPATCH) { | 810 | if (draw_counter < DRAWS_TO_DISPATCH) { |
| 813 | // Send recorded tasks to the worker thread | 811 | // Send recorded tasks to the worker thread |
| 814 | scheduler.DispatchWork(); | 812 | scheduler.DispatchWork(); |
| @@ -923,9 +921,13 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 923 | } | 921 | } |
| 924 | 922 | ||
| 925 | void RasterizerVulkan::HandleTransformFeedback() { | 923 | void RasterizerVulkan::HandleTransformFeedback() { |
| 924 | static std::once_flag warn_unsupported; | ||
| 925 | |||
| 926 | const auto& regs = maxwell3d->regs; | 926 | const auto& regs = maxwell3d->regs; |
| 927 | if (!device.IsExtTransformFeedbackSupported()) { | 927 | if (!device.IsExtTransformFeedbackSupported()) { |
| 928 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 928 | std::call_once(warn_unsupported, [&] { |
| 929 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | ||
| 930 | }); | ||
| 929 | return; | 931 | return; |
| 930 | } | 932 | } |
| 931 | query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, | 933 | query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, |
| @@ -1503,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 1503 | void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { | 1505 | void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { |
| 1504 | CreateChannel(channel); | 1506 | CreateChannel(channel); |
| 1505 | { | 1507 | { |
| 1506 | std::scoped_lock lock{LockCaches()}; | 1508 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 1507 | texture_cache.CreateChannel(channel); | 1509 | texture_cache.CreateChannel(channel); |
| 1508 | buffer_cache.CreateChannel(channel); | 1510 | buffer_cache.CreateChannel(channel); |
| 1509 | } | 1511 | } |
| @@ -1516,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { | |||
| 1516 | const s32 channel_id = channel.bind_id; | 1518 | const s32 channel_id = channel.bind_id; |
| 1517 | BindToChannel(channel_id); | 1519 | BindToChannel(channel_id); |
| 1518 | { | 1520 | { |
| 1519 | std::scoped_lock lock{LockCaches()}; | 1521 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 1520 | texture_cache.BindToChannel(channel_id); | 1522 | texture_cache.BindToChannel(channel_id); |
| 1521 | buffer_cache.BindToChannel(channel_id); | 1523 | buffer_cache.BindToChannel(channel_id); |
| 1522 | } | 1524 | } |
| @@ -1529,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { | |||
| 1529 | void RasterizerVulkan::ReleaseChannel(s32 channel_id) { | 1531 | void RasterizerVulkan::ReleaseChannel(s32 channel_id) { |
| 1530 | EraseChannel(channel_id); | 1532 | EraseChannel(channel_id); |
| 1531 | { | 1533 | { |
| 1532 | std::scoped_lock lock{LockCaches()}; | 1534 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 1533 | texture_cache.EraseChannel(channel_id); | 1535 | texture_cache.EraseChannel(channel_id); |
| 1534 | buffer_cache.EraseChannel(channel_id); | 1536 | buffer_cache.EraseChannel(channel_id); |
| 1535 | } | 1537 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ce3dfbaab..ad069556c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -133,10 +133,6 @@ public: | |||
| 133 | 133 | ||
| 134 | void ReleaseChannel(s32 channel_id) override; | 134 | void ReleaseChannel(s32 channel_id) override; |
| 135 | 135 | ||
| 136 | std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() { | ||
| 137 | return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 138 | } | ||
| 139 | |||
| 140 | private: | 136 | private: |
| 141 | static constexpr size_t MAX_TEXTURES = 192; | 137 | static constexpr size_t MAX_TEXTURES = 192; |
| 142 | static constexpr size_t MAX_IMAGES = 48; | 138 | static constexpr size_t MAX_IMAGES = 48; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3be7837f4..146923db4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -22,11 +22,12 @@ namespace Vulkan { | |||
| 22 | 22 | ||
| 23 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); | 23 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); |
| 24 | 24 | ||
| 25 | void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { | 25 | void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, |
| 26 | vk::CommandBuffer upload_cmdbuf) { | ||
| 26 | auto command = first; | 27 | auto command = first; |
| 27 | while (command != nullptr) { | 28 | while (command != nullptr) { |
| 28 | auto next = command->GetNext(); | 29 | auto next = command->GetNext(); |
| 29 | command->Execute(cmdbuf); | 30 | command->Execute(cmdbuf, upload_cmdbuf); |
| 30 | command->~Command(); | 31 | command->~Command(); |
| 31 | command = next; | 32 | command = next; |
| 32 | } | 33 | } |
| @@ -180,7 +181,7 @@ void Scheduler::WorkerThread(std::stop_token stop_token) { | |||
| 180 | // Perform the work, tracking whether the chunk was a submission | 181 | // Perform the work, tracking whether the chunk was a submission |
| 181 | // before executing. | 182 | // before executing. |
| 182 | const bool has_submit = work->HasSubmit(); | 183 | const bool has_submit = work->HasSubmit(); |
| 183 | work->ExecuteAll(current_cmdbuf); | 184 | work->ExecuteAll(current_cmdbuf, current_upload_cmdbuf); |
| 184 | 185 | ||
| 185 | // If the chunk was a submission, reallocate the command buffer. | 186 | // If the chunk was a submission, reallocate the command buffer. |
| 186 | if (has_submit) { | 187 | if (has_submit) { |
| @@ -205,6 +206,13 @@ void Scheduler::AllocateWorkerCommandBuffer() { | |||
| 205 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | 206 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, |
| 206 | .pInheritanceInfo = nullptr, | 207 | .pInheritanceInfo = nullptr, |
| 207 | }); | 208 | }); |
| 209 | current_upload_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 210 | current_upload_cmdbuf.Begin({ | ||
| 211 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 212 | .pNext = nullptr, | ||
| 213 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 214 | .pInheritanceInfo = nullptr, | ||
| 215 | }); | ||
| 208 | } | 216 | } |
| 209 | 217 | ||
| 210 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 218 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| @@ -212,7 +220,17 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se | |||
| 212 | InvalidateState(); | 220 | InvalidateState(); |
| 213 | 221 | ||
| 214 | const u64 signal_value = master_semaphore->NextTick(); | 222 | const u64 signal_value = master_semaphore->NextTick(); |
| 215 | Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { | 223 | RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value, |
| 224 | this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) { | ||
| 225 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 226 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 227 | .pNext = nullptr, | ||
| 228 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 229 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 230 | }; | ||
| 231 | upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 232 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 233 | upload_cmdbuf.End(); | ||
| 216 | cmdbuf.End(); | 234 | cmdbuf.End(); |
| 217 | 235 | ||
| 218 | if (on_submit) { | 236 | if (on_submit) { |
| @@ -221,7 +239,7 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se | |||
| 221 | 239 | ||
| 222 | std::scoped_lock lock{submit_mutex}; | 240 | std::scoped_lock lock{submit_mutex}; |
| 223 | switch (const VkResult result = master_semaphore->SubmitQueue( | 241 | switch (const VkResult result = master_semaphore->SubmitQueue( |
| 224 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { | 242 | cmdbuf, upload_cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { |
| 225 | case VK_SUCCESS: | 243 | case VK_SUCCESS: |
| 226 | break; | 244 | break; |
| 227 | case VK_ERROR_DEVICE_LOST: | 245 | case VK_ERROR_DEVICE_LOST: |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index da03803aa..f8d8ca80a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -80,7 +80,8 @@ public: | |||
| 80 | 80 | ||
| 81 | /// Send work to a separate thread. | 81 | /// Send work to a separate thread. |
| 82 | template <typename T> | 82 | template <typename T> |
| 83 | void Record(T&& command) { | 83 | requires std::is_invocable_v<T, vk::CommandBuffer, vk::CommandBuffer> |
| 84 | void RecordWithUploadBuffer(T&& command) { | ||
| 84 | if (chunk->Record(command)) { | 85 | if (chunk->Record(command)) { |
| 85 | return; | 86 | return; |
| 86 | } | 87 | } |
| @@ -88,6 +89,15 @@ public: | |||
| 88 | (void)chunk->Record(command); | 89 | (void)chunk->Record(command); |
| 89 | } | 90 | } |
| 90 | 91 | ||
| 92 | template <typename T> | ||
| 93 | requires std::is_invocable_v<T, vk::CommandBuffer> | ||
| 94 | void Record(T&& c) { | ||
| 95 | this->RecordWithUploadBuffer( | ||
| 96 | [command = std::move(c)](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { | ||
| 97 | command(cmdbuf); | ||
| 98 | }); | ||
| 99 | } | ||
| 100 | |||
| 91 | /// Returns the current command buffer tick. | 101 | /// Returns the current command buffer tick. |
| 92 | [[nodiscard]] u64 CurrentTick() const noexcept { | 102 | [[nodiscard]] u64 CurrentTick() const noexcept { |
| 93 | return master_semaphore->CurrentTick(); | 103 | return master_semaphore->CurrentTick(); |
| @@ -119,7 +129,7 @@ private: | |||
| 119 | public: | 129 | public: |
| 120 | virtual ~Command() = default; | 130 | virtual ~Command() = default; |
| 121 | 131 | ||
| 122 | virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; | 132 | virtual void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0; |
| 123 | 133 | ||
| 124 | Command* GetNext() const { | 134 | Command* GetNext() const { |
| 125 | return next; | 135 | return next; |
| @@ -142,8 +152,8 @@ private: | |||
| 142 | TypedCommand(TypedCommand&&) = delete; | 152 | TypedCommand(TypedCommand&&) = delete; |
| 143 | TypedCommand& operator=(TypedCommand&&) = delete; | 153 | TypedCommand& operator=(TypedCommand&&) = delete; |
| 144 | 154 | ||
| 145 | void Execute(vk::CommandBuffer cmdbuf) const override { | 155 | void Execute(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) const override { |
| 146 | command(cmdbuf); | 156 | command(cmdbuf, upload_cmdbuf); |
| 147 | } | 157 | } |
| 148 | 158 | ||
| 149 | private: | 159 | private: |
| @@ -152,7 +162,7 @@ private: | |||
| 152 | 162 | ||
| 153 | class CommandChunk final { | 163 | class CommandChunk final { |
| 154 | public: | 164 | public: |
| 155 | void ExecuteAll(vk::CommandBuffer cmdbuf); | 165 | void ExecuteAll(vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf); |
| 156 | 166 | ||
| 157 | template <typename T> | 167 | template <typename T> |
| 158 | bool Record(T& command) { | 168 | bool Record(T& command) { |
| @@ -228,6 +238,7 @@ private: | |||
| 228 | VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; | 238 | VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; |
| 229 | 239 | ||
| 230 | vk::CommandBuffer current_cmdbuf; | 240 | vk::CommandBuffer current_cmdbuf; |
| 241 | vk::CommandBuffer current_upload_cmdbuf; | ||
| 231 | 242 | ||
| 232 | std::unique_ptr<CommandChunk> chunk; | 243 | std::unique_ptr<CommandChunk> chunk; |
| 233 | std::function<void()> on_submit; | 244 | std::function<void()> on_submit; |
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index 5efd7d66e..70644ea82 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp | |||
| @@ -672,7 +672,7 @@ void SMAA::UploadImages(Scheduler& scheduler) { | |||
| 672 | UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, | 672 | UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, |
| 673 | VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); | 673 | VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); |
| 674 | 674 | ||
| 675 | scheduler.Record([&](vk::CommandBuffer& cmdbuf) { | 675 | scheduler.Record([&](vk::CommandBuffer cmdbuf) { |
| 676 | for (auto& images : m_dynamic_images) { | 676 | for (auto& images : m_dynamic_images) { |
| 677 | for (size_t i = 0; i < MaxDynamicImage; i++) { | 677 | for (size_t i = 0; i < MaxDynamicImage; i++) { |
| 678 | ClearColorImage(cmdbuf, *images.images[i]); | 678 | ClearColorImage(cmdbuf, *images.images[i]); |
| @@ -707,7 +707,7 @@ VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_ | |||
| 707 | UpdateDescriptorSets(source_image_view, image_index); | 707 | UpdateDescriptorSets(source_image_view, image_index); |
| 708 | 708 | ||
| 709 | scheduler.RequestOutsideRenderPassOperationContext(); | 709 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 710 | scheduler.Record([=, this](vk::CommandBuffer& cmdbuf) { | 710 | scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { |
| 711 | TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); | 711 | TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); |
| 712 | TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); | 712 | TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); |
| 713 | BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, | 713 | BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d3deb9072..f63a20327 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -36,6 +36,10 @@ public: | |||
| 36 | StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); | 36 | StagingBufferRef Request(size_t size, MemoryUsage usage, bool deferred = false); |
| 37 | void FreeDeferred(StagingBufferRef& ref); | 37 | void FreeDeferred(StagingBufferRef& ref); |
| 38 | 38 | ||
| 39 | [[nodiscard]] VkBuffer StreamBuf() const noexcept { | ||
| 40 | return *stream_buffer; | ||
| 41 | } | ||
| 42 | |||
| 39 | void TickFrame(); | 43 | void TickFrame(); |
| 40 | 44 | ||
| 41 | private: | 45 | private: |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index de34f6d49..5dbec2e62 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1785,8 +1785,22 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | |||
| 1785 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, | 1785 | : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, |
| 1786 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | 1786 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} |
| 1787 | 1787 | ||
| 1788 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams& params) | 1788 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageViewParams& params) |
| 1789 | : VideoCommon::ImageViewBase{params} {} | 1789 | : VideoCommon::ImageViewBase{params}, device{&runtime.device} { |
| 1790 | if (device->HasNullDescriptor()) { | ||
| 1791 | return; | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | // Handle fallback for devices without nullDescriptor | ||
| 1795 | ImageInfo info{}; | ||
| 1796 | info.format = PixelFormat::A8B8G8R8_UNORM; | ||
| 1797 | |||
| 1798 | null_image = MakeImage(*device, runtime.memory_allocator, info, {}); | ||
| 1799 | image_handle = *null_image; | ||
| 1800 | for (u32 i = 0; i < Shader::NUM_TEXTURE_TYPES; i++) { | ||
| 1801 | image_views[i] = MakeView(VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_IMAGE_ASPECT_COLOR_BIT); | ||
| 1802 | } | ||
| 1803 | } | ||
| 1790 | 1804 | ||
| 1791 | ImageView::~ImageView() = default; | 1805 | ImageView::~ImageView() = default; |
| 1792 | 1806 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 7a0807709..edf5d7635 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -267,6 +267,7 @@ private: | |||
| 267 | vk::ImageView depth_view; | 267 | vk::ImageView depth_view; |
| 268 | vk::ImageView stencil_view; | 268 | vk::ImageView stencil_view; |
| 269 | vk::ImageView color_view; | 269 | vk::ImageView color_view; |
| 270 | vk::Image null_image; | ||
| 270 | VkImage image_handle = VK_NULL_HANDLE; | 271 | VkImage image_handle = VK_NULL_HANDLE; |
| 271 | VkImageView render_target = VK_NULL_HANDLE; | 272 | VkImageView render_target = VK_NULL_HANDLE; |
| 272 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 273 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 9df6a2903..3ffa2a661 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -138,6 +138,10 @@ public: | |||
| 138 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); | 138 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); |
| 139 | } | 139 | } |
| 140 | 140 | ||
| 141 | [[nodiscard]] size_t size() const noexcept { | ||
| 142 | return values_capacity - free_list.size(); | ||
| 143 | } | ||
| 144 | |||
| 141 | private: | 145 | private: |
| 142 | struct NonTrivialDummy { | 146 | struct NonTrivialDummy { |
| 143 | NonTrivialDummy() noexcept {} | 147 | NonTrivialDummy() noexcept {} |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e518756d2..fde36a49c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -635,6 +635,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 635 | has_broken_cube_compatibility = true; | 635 | has_broken_cube_compatibility = true; |
| 636 | } | 636 | } |
| 637 | } | 637 | } |
| 638 | if (is_qualcomm) { | ||
| 639 | const u32 version = (properties.properties.driverVersion << 3) >> 3; | ||
| 640 | if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) { | ||
| 641 | has_broken_parallel_compiling = true; | ||
| 642 | } | ||
| 643 | } | ||
| 638 | if (extensions.sampler_filter_minmax && is_amd) { | 644 | if (extensions.sampler_filter_minmax && is_amd) { |
| 639 | // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. | 645 | // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. |
| 640 | if (!features.shader_float16_int8.shaderFloat16) { | 646 | if (!features.shader_float16_int8.shaderFloat16) { |
| @@ -863,7 +869,8 @@ bool Device::ShouldBoostClocks() const { | |||
| 863 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || | 869 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || |
| 864 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; | 870 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; |
| 865 | 871 | ||
| 866 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; | 872 | const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) || |
| 873 | (vendor_id == 0x1002 && device_id == 0x1435); | ||
| 867 | 874 | ||
| 868 | const bool is_debugging = this->HasDebuggingToolAttached(); | 875 | const bool is_debugging = this->HasDebuggingToolAttached(); |
| 869 | 876 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b213ed7dd..4f3846345 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -102,6 +102,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||
| 102 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ | 102 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ |
| 103 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ | 103 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ |
| 104 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ | 104 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ |
| 105 | EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \ | ||
| 105 | EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ | 106 | EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ |
| 106 | EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ | 107 | EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ |
| 107 | EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ | 108 | EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ |
| @@ -599,6 +600,11 @@ public: | |||
| 599 | return has_broken_cube_compatibility; | 600 | return has_broken_cube_compatibility; |
| 600 | } | 601 | } |
| 601 | 602 | ||
| 603 | /// Returns true if parallel shader compiling has issues with the current driver. | ||
| 604 | bool HasBrokenParallelShaderCompiling() const { | ||
| 605 | return has_broken_parallel_compiling; | ||
| 606 | } | ||
| 607 | |||
| 602 | /// Returns the vendor name reported from Vulkan. | 608 | /// Returns the vendor name reported from Vulkan. |
| 603 | std::string_view GetVendorName() const { | 609 | std::string_view GetVendorName() const { |
| 604 | return properties.driver.driverName; | 610 | return properties.driver.driverName; |
| @@ -663,6 +669,10 @@ public: | |||
| 663 | return supports_conditional_barriers; | 669 | return supports_conditional_barriers; |
| 664 | } | 670 | } |
| 665 | 671 | ||
| 672 | bool SupportsMultiViewport() const { | ||
| 673 | return features2.features.multiViewport; | ||
| 674 | } | ||
| 675 | |||
| 666 | [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, | 676 | [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, |
| 667 | u32 driver_version) { | 677 | u32 driver_version) { |
| 668 | if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | 678 | if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { |
| @@ -794,6 +804,7 @@ private: | |||
| 794 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. | 804 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. |
| 795 | bool has_broken_compute{}; ///< Compute shaders can cause crashes | 805 | bool has_broken_compute{}; ///< Compute shaders can cause crashes |
| 796 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit | 806 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit |
| 807 | bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling. | ||
| 797 | bool has_renderdoc{}; ///< Has RenderDoc attached | 808 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| 798 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 809 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |
| 799 | bool supports_d24_depth{}; ///< Supports D24 depth buffers. | 810 | bool supports_d24_depth{}; ///< Supports D24 depth buffers. |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 0487cd3b6..a0c70797f 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -1101,6 +1101,10 @@ public: | |||
| 1101 | return &handle; | 1101 | return &handle; |
| 1102 | } | 1102 | } |
| 1103 | 1103 | ||
| 1104 | VkCommandBuffer operator*() const noexcept { | ||
| 1105 | return handle; | ||
| 1106 | } | ||
| 1107 | |||
| 1104 | void Begin(const VkCommandBufferBeginInfo& begin_info) const { | 1108 | void Begin(const VkCommandBufferBeginInfo& begin_info) const { |
| 1105 | Check(dld->vkBeginCommandBuffer(handle, &begin_info)); | 1109 | Check(dld->vkBeginCommandBuffer(handle, &begin_info)); |
| 1106 | } | 1110 | } |