summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp8
-rw-r--r--src/video_core/cdma_pusher.cpp15
-rw-r--r--src/video_core/cdma_pusher.h15
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp7
-rw-r--r--src/video_core/command_classes/vic.cpp21
-rw-r--r--src/video_core/command_classes/vic.h7
-rw-r--r--src/video_core/gpu.cpp3
-rw-r--r--src/video_core/gpu_thread.cpp5
8 files changed, 38 insertions, 43 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 72499654c..a29abd15b 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -28,8 +28,14 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
28 return GetWaitbase(input, output); 28 return GetWaitbase(input, output);
29 case 0x9: 29 case 0x9:
30 return MapBuffer(input, output); 30 return MapBuffer(input, output);
31 case 0xa: 31 case 0xa: {
32 if (command.length == 0x1c) {
33 Tegra::ChCommandHeaderList cmdlist(1);
34 cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F};
35 system.GPU().PushCommandBuffer(cmdlist);
36 }
32 return UnmapBuffer(input, output); 37 return UnmapBuffer(input, output);
38 }
33 default: 39 default:
34 break; 40 break;
35 } 41 }
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 33b3c060b..c725baa98 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -37,20 +37,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
37 37
38CDmaPusher::~CDmaPusher() = default; 38CDmaPusher::~CDmaPusher() = default;
39 39
40void CDmaPusher::Push(ChCommandHeaderList&& entries) { 40void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) {
41 cdma_queue.push(std::move(entries));
42}
43
44void CDmaPusher::DispatchCalls() {
45 while (!cdma_queue.empty()) {
46 Step();
47 }
48}
49
50void CDmaPusher::Step() {
51 const auto entries{cdma_queue.front()};
52 cdma_queue.pop();
53
54 std::vector<u32> values(entries.size()); 41 std::vector<u32> values(entries.size());
55 std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); 42 std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
56 43
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index e5f212c1a..de7a3a35b 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -99,19 +99,13 @@ public:
99 explicit CDmaPusher(GPU& gpu_); 99 explicit CDmaPusher(GPU& gpu_);
100 ~CDmaPusher(); 100 ~CDmaPusher();
101 101
102 /// Push NVDEC command buffer entries into queue 102 /// Process the command entry
103 void Push(ChCommandHeaderList&& entries); 103 void ProcessEntries(ChCommandHeaderList&& entries);
104
105 /// Process queued command buffer entries
106 void DispatchCalls();
107
108 /// Process one queue element
109 void Step();
110 104
105private:
111 /// Invoke command class devices to execute the command based on the current state 106 /// Invoke command class devices to execute the command based on the current state
112 void ExecuteCommand(u32 state_offset, u32 data); 107 void ExecuteCommand(u32 state_offset, u32 data);
113 108
114private:
115 /// Write arguments value to the ThiRegisters member at the specified offset 109 /// Write arguments value to the ThiRegisters member at the specified offset
116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); 110 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
117 111
@@ -128,9 +122,6 @@ private:
128 s32 offset{}; 122 s32 offset{};
129 u32 mask{}; 123 u32 mask{};
130 bool incrementing{}; 124 bool incrementing{};
131
132 // Queue of command lists to be processed
133 std::queue<ChCommandHeaderList> cdma_queue;
134}; 125};
135 126
136} // namespace Tegra 127} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 39bc923a5..d02dc6260 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -44,8 +44,10 @@ Codec::~Codec() {
44} 44}
45 45
46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 46void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
47 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); 47 if (current_codec != codec) {
48 current_codec = codec; 48 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec));
49 current_codec = codec;
50 }
49} 51}
50 52
51void Codec::StateWrite(u32 offset, u64 arguments) { 53void Codec::StateWrite(u32 offset, u64 arguments) {
@@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) {
55 57
56void Codec::Decode() { 58void Codec::Decode() {
57 bool is_first_frame = false; 59 bool is_first_frame = false;
58
59 if (!initialized) { 60 if (!initialized) {
60 if (current_codec == NvdecCommon::VideoCodec::H264) { 61 if (current_codec == NvdecCommon::VideoCodec::H264) {
61 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); 62 av_codec = avcodec_find_decoder(AV_CODEC_ID_H264);
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 2b7569335..73680d057 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -18,7 +18,10 @@ extern "C" {
18namespace Tegra { 18namespace Tegra {
19 19
20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) 20Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
21 : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} 21 : gpu(gpu_),
22 nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free}
23
24{}
22Vic::~Vic() = default; 25Vic::~Vic() = default;
23 26
24void Vic::VicStateWrite(u32 offset, u32 arguments) { 27void Vic::VicStateWrite(u32 offset, u32 arguments) {
@@ -89,8 +92,10 @@ void Vic::Execute() {
89 // Get Converted frame 92 // Get Converted frame
90 const std::size_t linear_size = frame->width * frame->height * 4; 93 const std::size_t linear_size = frame->width * frame->height * 4;
91 94
92 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; 95 // Only allocate frame_buffer once per stream, as the size is not expected to change
93 AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; 96 if (!converted_frame_buffer) {
97 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free};
98 }
94 99
95 const int converted_stride{frame->width * 4}; 100 const int converted_stride{frame->width * 4};
96 u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; 101 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
@@ -104,12 +109,12 @@ void Vic::Execute() {
104 const u32 block_height = static_cast<u32>(config.block_linear_height_log2); 109 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 110 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0); 111 block_height, 0);
107 std::vector<u8> swizzled_data(size); 112 luma_buffer.resize(size);
108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, 113 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 frame->width, 4, swizzled_data.data(), 114 frame->width, 4, luma_buffer.data(),
110 converted_frame_buffer.get(), block_height, 0, 0); 115 converted_frame_buffer.get(), block_height, 0, 0);
111 116
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 117 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
113 } else { 118 } else {
114 // send pitch linear frame 119 // send pitch linear frame
115 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 120 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
@@ -132,8 +137,8 @@ void Vic::Execute() {
132 const auto stride = frame->linesize[0]; 137 const auto stride = frame->linesize[0];
133 const auto half_stride = frame->linesize[1]; 138 const auto half_stride = frame->linesize[1];
134 139
135 std::vector<u8> luma_buffer(aligned_width * surface_height); 140 luma_buffer.resize(aligned_width * surface_height);
136 std::vector<u8> chroma_buffer(aligned_width * half_height); 141 chroma_buffer.resize(aligned_width * half_height);
137 142
138 // Populate luma buffer 143 // Populate luma buffer
139 for (std::size_t y = 0; y < surface_height - 1; ++y) { 144 for (std::size_t y = 0; y < surface_height - 1; ++y) {
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index 8c4e284a1..6eaf72f21 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -97,6 +97,13 @@ private:
97 GPU& gpu; 97 GPU& gpu;
98 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 98 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
99 99
100 /// Avoid reallocation of the following buffers every frame, as their
101 /// size does not change during a stream
102 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
103 AVMallocPtr converted_frame_buffer;
104 std::vector<u8> luma_buffer;
105 std::vector<u8> chroma_buffer;
106
100 GPUVAddr config_struct_address{}; 107 GPUVAddr config_struct_address{};
101 GPUVAddr output_surface_luma_address{}; 108 GPUVAddr output_surface_luma_address{};
102 GPUVAddr output_surface_chroma_u_address{}; 109 GPUVAddr output_surface_chroma_u_address{};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2a9bd4121..3db33faf3 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -30,8 +30,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
30 30
31GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) 31GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
32 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 32 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
33 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, 33 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_},
34 cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 34 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
36 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 35 fermi_2d{std::make_unique<Engines::Fermi2D>()},
37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 36 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 50319f1d5..7644588e3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -48,9 +48,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
48 dma_pusher.DispatchCalls(); 48 dma_pusher.DispatchCalls();
49 } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { 49 } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
50 // NVDEC 50 // NVDEC
51 cdma_pusher.Push(std::move(command_list->entries)); 51 cdma_pusher.ProcessEntries(std::move(command_list->entries));
52 cdma_pusher.DispatchCalls(); 52 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
53 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
54 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 53 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
55 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 54 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
56 rasterizer->ReleaseFences(); 55 rasterizer->ReleaseFences();