diff options
| author | 2020-11-23 13:25:01 -0500 | |
|---|---|---|
| committer | 2021-02-13 13:07:31 -0500 | |
| commit | ac265a72ce4176ceb3cd10a5548ab71519771640 (patch) | |
| tree | 0acde029388d465a5801db9106dd8f4e026e57e8 /src/video_core/command_classes | |
| parent | Merge pull request #5919 from ReinUsesLisp/stream-buffer-tragic (diff) | |
| download | yuzu-ac265a72ce4176ceb3cd10a5548ab71519771640.tar.gz yuzu-ac265a72ce4176ceb3cd10a5548ab71519771640.tar.xz yuzu-ac265a72ce4176ceb3cd10a5548ab71519771640.zip | |
nvdec cleanup
Diffstat (limited to 'src/video_core/command_classes')
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.h | 7 |
3 files changed, 24 insertions, 11 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 39bc923a5..d02dc6260 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp | |||
| @@ -44,8 +44,10 @@ Codec::~Codec() { | |||
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | 46 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { |
| 47 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); | 47 | if (current_codec != codec) { |
| 48 | current_codec = codec; | 48 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); |
| 49 | current_codec = codec; | ||
| 50 | } | ||
| 49 | } | 51 | } |
| 50 | 52 | ||
| 51 | void Codec::StateWrite(u32 offset, u64 arguments) { | 53 | void Codec::StateWrite(u32 offset, u64 arguments) { |
| @@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) { | |||
| 55 | 57 | ||
| 56 | void Codec::Decode() { | 58 | void Codec::Decode() { |
| 57 | bool is_first_frame = false; | 59 | bool is_first_frame = false; |
| 58 | |||
| 59 | if (!initialized) { | 60 | if (!initialized) { |
| 60 | if (current_codec == NvdecCommon::VideoCodec::H264) { | 61 | if (current_codec == NvdecCommon::VideoCodec::H264) { |
| 61 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); | 62 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); |
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 2b7569335..73680d057 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -18,7 +18,10 @@ extern "C" { | |||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | 19 | ||
| 20 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | 20 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) |
| 21 | : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} | 21 | : gpu(gpu_), |
| 22 | nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} | ||
| 23 | |||
| 24 | {} | ||
| 22 | Vic::~Vic() = default; | 25 | Vic::~Vic() = default; |
| 23 | 26 | ||
| 24 | void Vic::VicStateWrite(u32 offset, u32 arguments) { | 27 | void Vic::VicStateWrite(u32 offset, u32 arguments) { |
| @@ -89,8 +92,10 @@ void Vic::Execute() { | |||
| 89 | // Get Converted frame | 92 | // Get Converted frame |
| 90 | const std::size_t linear_size = frame->width * frame->height * 4; | 93 | const std::size_t linear_size = frame->width * frame->height * 4; |
| 91 | 94 | ||
| 92 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | 95 | // Only allocate frame_buffer once per stream, as the size is not expected to change |
| 93 | AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; | 96 | if (!converted_frame_buffer) { |
| 97 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 98 | } | ||
| 94 | 99 | ||
| 95 | const int converted_stride{frame->width * 4}; | 100 | const int converted_stride{frame->width * 4}; |
| 96 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | 101 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; |
| @@ -104,12 +109,12 @@ void Vic::Execute() { | |||
| 104 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | 109 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); |
| 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, | 110 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, |
| 106 | block_height, 0); | 111 | block_height, 0); |
| 107 | std::vector<u8> swizzled_data(size); | 112 | luma_buffer.resize(size); |
| 108 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, | 113 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, |
| 109 | frame->width, 4, swizzled_data.data(), | 114 | frame->width, 4, luma_buffer.data(), |
| 110 | converted_frame_buffer.get(), block_height, 0, 0); | 115 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 116 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 117 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); |
| 113 | } else { | 118 | } else { |
| 114 | // send pitch linear frame | 119 | // send pitch linear frame |
| 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 120 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, |
| @@ -132,8 +137,8 @@ void Vic::Execute() { | |||
| 132 | const auto stride = frame->linesize[0]; | 137 | const auto stride = frame->linesize[0]; |
| 133 | const auto half_stride = frame->linesize[1]; | 138 | const auto half_stride = frame->linesize[1]; |
| 134 | 139 | ||
| 135 | std::vector<u8> luma_buffer(aligned_width * surface_height); | 140 | luma_buffer.resize(aligned_width * surface_height); |
| 136 | std::vector<u8> chroma_buffer(aligned_width * half_height); | 141 | chroma_buffer.resize(aligned_width * half_height); |
| 137 | 142 | ||
| 138 | // Populate luma buffer | 143 | // Populate luma buffer |
| 139 | for (std::size_t y = 0; y < surface_height - 1; ++y) { | 144 | for (std::size_t y = 0; y < surface_height - 1; ++y) { |
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index 8c4e284a1..6eaf72f21 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h | |||
| @@ -97,6 +97,13 @@ private: | |||
| 97 | GPU& gpu; | 97 | GPU& gpu; |
| 98 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 98 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; |
| 99 | 99 | ||
| 100 | /// Avoid reallocation of the following buffers every frame, as their | ||
| 101 | /// size does not change during a stream | ||
| 102 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | ||
| 103 | AVMallocPtr converted_frame_buffer; | ||
| 104 | std::vector<u8> luma_buffer; | ||
| 105 | std::vector<u8> chroma_buffer; | ||
| 106 | |||
| 100 | GPUVAddr config_struct_address{}; | 107 | GPUVAddr config_struct_address{}; |
| 101 | GPUVAddr output_surface_luma_address{}; | 108 | GPUVAddr output_surface_luma_address{}; |
| 102 | GPUVAddr output_surface_chroma_u_address{}; | 109 | GPUVAddr output_surface_chroma_u_address{}; |