summaryrefslogtreecommitdiff
path: root/src/video_core/command_classes
diff options
context:
space:
mode:
authorGravatar bunnei2021-02-18 15:12:07 -0800
committerGravatar GitHub2021-02-18 15:12:07 -0800
commit9cae3e6e90f840903a0072b916e49f24d0f6cb10 (patch)
tree79511308066a4fbc11aa2e9058b0aa65772cc30a /src/video_core/command_classes
parentMerge pull request #5955 from yuzu-emu/revert-3603-port-5123 (diff)
parent rebase, fix name shadowing, more const (diff)
downloadyuzu-9cae3e6e90f840903a0072b916e49f24d0f6cb10.tar.gz
yuzu-9cae3e6e90f840903a0072b916e49f24d0f6cb10.tar.xz
yuzu-9cae3e6e90f840903a0072b916e49f24d0f6cb10.zip
Merge pull request #4973 from ameerj/nvdec-opt
nvdec: Reuse allocated buffers and general cleanup
Diffstat (limited to 'src/video_core/command_classes')
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp7
-rw-r--r--src/video_core/command_classes/nvdec.cpp8
-rw-r--r--src/video_core/command_classes/nvdec.h2
-rw-r--r--src/video_core/command_classes/vic.cpp45
-rw-r--r--src/video_core/command_classes/vic.h51
5 files changed, 38 insertions, 75 deletions
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 39bc923a5..d02dc6260 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -44,8 +44,10 @@ Codec::~Codec() {
44} 44}
45 45
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); 47 if (current_codec != codec) {
48 current_codec = codec; 48 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
49 current_codec = codec;
50 }
49} 51}
50 52
51void Codec::StateWrite(u32 offset, u64 arguments) { 53void Codec::StateWrite(u32 offset, u64 arguments) {
@@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) {
55 57
56void Codec::Decode() { 58void Codec::Decode() {
57 bool is_first_frame = false; 59 bool is_first_frame = false;
58
59 if (!initialized) { 60 if (!initialized) {
60 if (current_codec == NvdecCommon::VideoCodec::H264) { 61 if (current_codec == NvdecCommon::VideoCodec::H264) {
61 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); 62 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp
index 79e1f4e13..e4f919afd 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/command_classes/nvdec.cpp
@@ -12,16 +12,16 @@ Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {}
12 12
13Nvdec::~Nvdec() = default; 13Nvdec::~Nvdec() = default;
14 14
15void Nvdec::ProcessMethod(Method method, const std::vector<u32>& arguments) { 15void Nvdec::ProcessMethod(Method method, u32 argument) {
16 if (method == Method::SetVideoCodec) { 16 if (method == Method::SetVideoCodec) {
17 codec->StateWrite(static_cast<u32>(method), arguments[0]); 17 codec->StateWrite(static_cast<u32>(method), argument);
18 } else { 18 } else {
19 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(arguments[0]) << 8); 19 codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8);
20 } 20 }
21 21
22 switch (method) { 22 switch (method) {
23 case Method::SetVideoCodec: 23 case Method::SetVideoCodec:
24 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(arguments[0])); 24 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
25 break; 25 break;
26 case Method::Execute: 26 case Method::Execute:
27 Execute(); 27 Execute();
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index e4877c533..e66be80b8 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -23,7 +23,7 @@ public:
23 ~Nvdec(); 23 ~Nvdec();
24 24
25 /// Writes the method into the state, Invoke Execute() if encountered 25 /// Writes the method into the state, Invoke Execute() if encountered
26 void ProcessMethod(Method method, const std::vector<u32>& arguments); 26 void ProcessMethod(Method method, u32 argument);
27 27
28 /// Return most recently decoded frame 28 /// Return most recently decoded frame
29 [[nodiscard]] AVFramePtr GetFrame(); 29 [[nodiscard]] AVFramePtr GetFrame();
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 2b7569335..0a8b82f2b 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -18,18 +18,14 @@ extern "C" {
18namespace Tegra { 18namespace Tegra {
19 19
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) 20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
21 : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} 21 : gpu(gpu_),
22Vic::~Vic() = default; 22 nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {}
23 23
24void Vic::VicStateWrite(u32 offset, u32 arguments) { 24Vic::~Vic() = default;
25 u8* const state_offset = reinterpret_cast<u8*>(&vic_state) + offset * sizeof(u32);
26 std::memcpy(state_offset, &arguments, sizeof(u32));
27}
28 25
29void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) { 26void Vic::ProcessMethod(Method method, u32 argument) {
30 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", method); 27 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
31 VicStateWrite(static_cast<u32>(method), arguments[0]); 28 const u64 arg = static_cast<u64>(argument) << 8;
32 const u64 arg = static_cast<u64>(arguments[0]) << 8;
33 switch (method) { 29 switch (method) {
34 case Method::Execute: 30 case Method::Execute:
35 Execute(); 31 Execute();
@@ -53,8 +49,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
53 49
54void Vic::Execute() { 50void Vic::Execute() {
55 if (output_surface_luma_address == 0) { 51 if (output_surface_luma_address == 0) {
56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}", 52 LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
57 vic_state.output_surface.luma_offset);
58 return; 53 return;
59 } 54 }
60 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; 55 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
@@ -89,8 +84,10 @@ void Vic::Execute() {
89 // Get Converted frame 84 // Get Converted frame
90 const std::size_t linear_size = frame->width * frame->height * 4; 85 const std::size_t linear_size = frame->width * frame->height * 4;
91 86
92 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; 87 // Only allocate frame_buffer once per stream, as the size is not expected to change
93 AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; 88 if (!converted_frame_buffer) {
89 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
90 }
94 91
95 const int converted_stride{frame->width * 4}; 92 const int converted_stride{frame->width * 4};
96 u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; 93 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
@@ -104,12 +101,12 @@ void Vic::Execute() {
104 const u32 block_height = static_cast<u32>(config.block_linear_height_log2); 101 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 102 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0); 103 block_height, 0);
107 std::vector<u8> swizzled_data(size); 104 luma_buffer.resize(size);
108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, 105 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 frame->width, 4, swizzled_data.data(), 106 frame->width, 4, luma_buffer.data(),
110 converted_frame_buffer.get(), block_height, 0, 0); 107 converted_frame_buffer.get(), block_height, 0, 0);
111 108
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 109 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
113 } else { 110 } else {
114 // send pitch linear frame 111 // send pitch linear frame
115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
@@ -132,15 +129,15 @@ void Vic::Execute() {
132 const auto stride = frame->linesize[0]; 129 const auto stride = frame->linesize[0];
133 const auto half_stride = frame->linesize[1]; 130 const auto half_stride = frame->linesize[1];
134 131
135 std::vector<u8> luma_buffer(aligned_width * surface_height); 132 luma_buffer.resize(aligned_width * surface_height);
136 std::vector<u8> chroma_buffer(aligned_width * half_height); 133 chroma_buffer.resize(aligned_width * half_height);
137 134
138 // Populate luma buffer 135 // Populate luma buffer
139 for (std::size_t y = 0; y < surface_height - 1; ++y) { 136 for (std::size_t y = 0; y < surface_height - 1; ++y) {
140 std::size_t src = y * stride; 137 const std::size_t src = y * stride;
141 std::size_t dst = y * aligned_width; 138 const std::size_t dst = y * aligned_width;
142 139
143 std::size_t size = surface_width; 140 const std::size_t size = surface_width;
144 141
145 for (std::size_t offset = 0; offset < size; ++offset) { 142 for (std::size_t offset = 0; offset < size; ++offset) {
146 luma_buffer[dst + offset] = luma_ptr[src + offset]; 143 luma_buffer[dst + offset] = luma_ptr[src + offset];
@@ -151,8 +148,8 @@ void Vic::Execute() {
151 148
152 // Populate chroma buffer from both channels with interleaving. 149 // Populate chroma buffer from both channels with interleaving.
153 for (std::size_t y = 0; y < half_height; ++y) { 150 for (std::size_t y = 0; y < half_height; ++y) {
154 std::size_t src = y * half_stride; 151 const std::size_t src = y * half_stride;
155 std::size_t dst = y * aligned_width; 152 const std::size_t dst = y * aligned_width;
156 153
157 for (std::size_t x = 0; x < half_width; ++x) { 154 for (std::size_t x = 0; x < half_width; ++x) {
158 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; 155 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x];
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index 8c4e284a1..f5a2ed100 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -15,43 +15,6 @@ namespace Tegra {
15class GPU; 15class GPU;
16class Nvdec; 16class Nvdec;
17 17
18struct PlaneOffsets {
19 u32 luma_offset{};
20 u32 chroma_u_offset{};
21 u32 chroma_v_offset{};
22};
23
24struct VicRegisters {
25 INSERT_PADDING_WORDS(64);
26 u32 nop{};
27 INSERT_PADDING_WORDS(15);
28 u32 pm_trigger{};
29 INSERT_PADDING_WORDS(47);
30 u32 set_application_id{};
31 u32 set_watchdog_timer{};
32 INSERT_PADDING_WORDS(17);
33 u32 context_save_area{};
34 u32 context_switch{};
35 INSERT_PADDING_WORDS(43);
36 u32 execute{};
37 INSERT_PADDING_WORDS(63);
38 std::array<std::array<PlaneOffsets, 8>, 8> surfacex_slots{};
39 u32 picture_index{};
40 u32 control_params{};
41 u32 config_struct_offset{};
42 u32 filter_struct_offset{};
43 u32 palette_offset{};
44 u32 hist_offset{};
45 u32 context_id{};
46 u32 fce_ucode_size{};
47 PlaneOffsets output_surface{};
48 u32 fce_ucode_offset{};
49 INSERT_PADDING_WORDS(4);
50 std::array<u32, 8> slot_context_id{};
51 INSERT_PADDING_WORDS(16);
52};
53static_assert(sizeof(VicRegisters) == 0x7A0, "VicRegisters is an invalid size");
54
55class Vic { 18class Vic {
56public: 19public:
57 enum class Method : u32 { 20 enum class Method : u32 {
@@ -67,14 +30,11 @@ public:
67 ~Vic(); 30 ~Vic();
68 31
69 /// Write to the device state. 32 /// Write to the device state.
70 void ProcessMethod(Method method, const std::vector<u32>& arguments); 33 void ProcessMethod(Method method, u32 argument);
71 34
72private: 35private:
73 void Execute(); 36 void Execute();
74 37
75 void VicStateWrite(u32 offset, u32 arguments);
76 VicRegisters vic_state{};
77
78 enum class VideoPixelFormat : u64_le { 38 enum class VideoPixelFormat : u64_le {
79 RGBA8 = 0x1f, 39 RGBA8 = 0x1f,
80 BGRA8 = 0x20, 40 BGRA8 = 0x20,
@@ -88,8 +48,6 @@ private:
88 BitField<9, 2, u64_le> chroma_loc_vert; 48 BitField<9, 2, u64_le> chroma_loc_vert;
89 BitField<11, 4, u64_le> block_linear_kind; 49 BitField<11, 4, u64_le> block_linear_kind;
90 BitField<15, 4, u64_le> block_linear_height_log2; 50 BitField<15, 4, u64_le> block_linear_height_log2;
91 BitField<19, 3, u64_le> reserved0;
92 BitField<22, 10, u64_le> reserved1;
93 BitField<32, 14, u64_le> surface_width_minus1; 51 BitField<32, 14, u64_le> surface_width_minus1;
94 BitField<46, 14, u64_le> surface_height_minus1; 52 BitField<46, 14, u64_le> surface_height_minus1;
95 }; 53 };
@@ -97,6 +55,13 @@ private:
97 GPU& gpu; 55 GPU& gpu;
98 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 56 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
99 57
58 /// Avoid reallocation of the following buffers every frame, as their
59 /// size does not change during a stream
60 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
61 AVMallocPtr converted_frame_buffer;
62 std::vector<u8> luma_buffer;
63 std::vector<u8> chroma_buffer;
64
100 GPUVAddr config_struct_address{}; 65 GPUVAddr config_struct_address{};
101 GPUVAddr output_surface_luma_address{}; 66 GPUVAddr output_surface_luma_address{};
102 GPUVAddr output_surface_chroma_u_address{}; 67 GPUVAddr output_surface_chroma_u_address{};