diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.h | 15 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/codec.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.h | 7 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 5 |
8 files changed, 38 insertions, 43 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 72499654c..a29abd15b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -28,8 +28,14 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve | |||
| 28 | return GetWaitbase(input, output); | 28 | return GetWaitbase(input, output); |
| 29 | case 0x9: | 29 | case 0x9: |
| 30 | return MapBuffer(input, output); | 30 | return MapBuffer(input, output); |
| 31 | case 0xa: | 31 | case 0xa: { |
| 32 | if (command.length == 0x1c) { | ||
| 33 | Tegra::ChCommandHeaderList cmdlist(1); | ||
| 34 | cmdlist[0] = Tegra::ChCommandHeader{0xDEADB33F}; | ||
| 35 | system.GPU().PushCommandBuffer(cmdlist); | ||
| 36 | } | ||
| 32 | return UnmapBuffer(input, output); | 37 | return UnmapBuffer(input, output); |
| 38 | } | ||
| 33 | default: | 39 | default: |
| 34 | break; | 40 | break; |
| 35 | } | 41 | } |
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 33b3c060b..c725baa98 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -37,20 +37,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) | |||
| 37 | 37 | ||
| 38 | CDmaPusher::~CDmaPusher() = default; | 38 | CDmaPusher::~CDmaPusher() = default; |
| 39 | 39 | ||
| 40 | void CDmaPusher::Push(ChCommandHeaderList&& entries) { | 40 | void CDmaPusher::ProcessEntries(ChCommandHeaderList&& entries) { |
| 41 | cdma_queue.push(std::move(entries)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void CDmaPusher::DispatchCalls() { | ||
| 45 | while (!cdma_queue.empty()) { | ||
| 46 | Step(); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void CDmaPusher::Step() { | ||
| 51 | const auto entries{cdma_queue.front()}; | ||
| 52 | cdma_queue.pop(); | ||
| 53 | |||
| 54 | std::vector<u32> values(entries.size()); | 41 | std::vector<u32> values(entries.size()); |
| 55 | std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); | 42 | std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); |
| 56 | 43 | ||
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index e5f212c1a..de7a3a35b 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -99,19 +99,13 @@ public: | |||
| 99 | explicit CDmaPusher(GPU& gpu_); | 99 | explicit CDmaPusher(GPU& gpu_); |
| 100 | ~CDmaPusher(); | 100 | ~CDmaPusher(); |
| 101 | 101 | ||
| 102 | /// Push NVDEC command buffer entries into queue | 102 | /// Process the command entry |
| 103 | void Push(ChCommandHeaderList&& entries); | 103 | void ProcessEntries(ChCommandHeaderList&& entries); |
| 104 | |||
| 105 | /// Process queued command buffer entries | ||
| 106 | void DispatchCalls(); | ||
| 107 | |||
| 108 | /// Process one queue element | ||
| 109 | void Step(); | ||
| 110 | 104 | ||
| 105 | private: | ||
| 111 | /// Invoke command class devices to execute the command based on the current state | 106 | /// Invoke command class devices to execute the command based on the current state |
| 112 | void ExecuteCommand(u32 state_offset, u32 data); | 107 | void ExecuteCommand(u32 state_offset, u32 data); |
| 113 | 108 | ||
| 114 | private: | ||
| 115 | /// Write arguments value to the ThiRegisters member at the specified offset | 109 | /// Write arguments value to the ThiRegisters member at the specified offset |
| 116 | void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); | 110 | void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); |
| 117 | 111 | ||
| @@ -128,9 +122,6 @@ private: | |||
| 128 | s32 offset{}; | 122 | s32 offset{}; |
| 129 | u32 mask{}; | 123 | u32 mask{}; |
| 130 | bool incrementing{}; | 124 | bool incrementing{}; |
| 131 | |||
| 132 | // Queue of command lists to be processed | ||
| 133 | std::queue<ChCommandHeaderList> cdma_queue; | ||
| 134 | }; | 125 | }; |
| 135 | 126 | ||
| 136 | } // namespace Tegra | 127 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 39bc923a5..d02dc6260 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp | |||
| @@ -44,8 +44,10 @@ Codec::~Codec() { | |||
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | 46 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { |
| 47 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", codec); | 47 | if (current_codec != codec) { |
| 48 | current_codec = codec; | 48 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); |
| 49 | current_codec = codec; | ||
| 50 | } | ||
| 49 | } | 51 | } |
| 50 | 52 | ||
| 51 | void Codec::StateWrite(u32 offset, u64 arguments) { | 53 | void Codec::StateWrite(u32 offset, u64 arguments) { |
| @@ -55,7 +57,6 @@ void Codec::StateWrite(u32 offset, u64 arguments) { | |||
| 55 | 57 | ||
| 56 | void Codec::Decode() { | 58 | void Codec::Decode() { |
| 57 | bool is_first_frame = false; | 59 | bool is_first_frame = false; |
| 58 | |||
| 59 | if (!initialized) { | 60 | if (!initialized) { |
| 60 | if (current_codec == NvdecCommon::VideoCodec::H264) { | 61 | if (current_codec == NvdecCommon::VideoCodec::H264) { |
| 61 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); | 62 | av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); |
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 2b7569335..73680d057 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -18,7 +18,10 @@ extern "C" { | |||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | 19 | ||
| 20 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) | 20 | Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) |
| 21 | : gpu(gpu_), nvdec_processor(std::move(nvdec_processor_)) {} | 21 | : gpu(gpu_), |
| 22 | nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} | ||
| 23 | |||
| 24 | {} | ||
| 22 | Vic::~Vic() = default; | 25 | Vic::~Vic() = default; |
| 23 | 26 | ||
| 24 | void Vic::VicStateWrite(u32 offset, u32 arguments) { | 27 | void Vic::VicStateWrite(u32 offset, u32 arguments) { |
| @@ -89,8 +92,10 @@ void Vic::Execute() { | |||
| 89 | // Get Converted frame | 92 | // Get Converted frame |
| 90 | const std::size_t linear_size = frame->width * frame->height * 4; | 93 | const std::size_t linear_size = frame->width * frame->height * 4; |
| 91 | 94 | ||
| 92 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | 95 | // Only allocate frame_buffer once per stream, as the size is not expected to change |
| 93 | AVMallocPtr converted_frame_buffer{static_cast<u8*>(av_malloc(linear_size)), av_free}; | 96 | if (!converted_frame_buffer) { |
| 97 | converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(linear_size)), av_free}; | ||
| 98 | } | ||
| 94 | 99 | ||
| 95 | const int converted_stride{frame->width * 4}; | 100 | const int converted_stride{frame->width * 4}; |
| 96 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; | 101 | u8* const converted_frame_buf_addr{converted_frame_buffer.get()}; |
| @@ -104,12 +109,12 @@ void Vic::Execute() { | |||
| 104 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | 109 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); |
| 105 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, | 110 | const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, |
| 106 | block_height, 0); | 111 | block_height, 0); |
| 107 | std::vector<u8> swizzled_data(size); | 112 | luma_buffer.resize(size); |
| 108 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, | 113 | Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, |
| 109 | frame->width, 4, swizzled_data.data(), | 114 | frame->width, 4, luma_buffer.data(), |
| 110 | converted_frame_buffer.get(), block_height, 0, 0); | 115 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 116 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 117 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); |
| 113 | } else { | 118 | } else { |
| 114 | // send pitch linear frame | 119 | // send pitch linear frame |
| 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 120 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, |
| @@ -132,8 +137,8 @@ void Vic::Execute() { | |||
| 132 | const auto stride = frame->linesize[0]; | 137 | const auto stride = frame->linesize[0]; |
| 133 | const auto half_stride = frame->linesize[1]; | 138 | const auto half_stride = frame->linesize[1]; |
| 134 | 139 | ||
| 135 | std::vector<u8> luma_buffer(aligned_width * surface_height); | 140 | luma_buffer.resize(aligned_width * surface_height); |
| 136 | std::vector<u8> chroma_buffer(aligned_width * half_height); | 141 | chroma_buffer.resize(aligned_width * half_height); |
| 137 | 142 | ||
| 138 | // Populate luma buffer | 143 | // Populate luma buffer |
| 139 | for (std::size_t y = 0; y < surface_height - 1; ++y) { | 144 | for (std::size_t y = 0; y < surface_height - 1; ++y) { |
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h index 8c4e284a1..6eaf72f21 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/command_classes/vic.h | |||
| @@ -97,6 +97,13 @@ private: | |||
| 97 | GPU& gpu; | 97 | GPU& gpu; |
| 98 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 98 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; |
| 99 | 99 | ||
| 100 | /// Avoid reallocation of the following buffers every frame, as their | ||
| 101 | /// size does not change during a stream | ||
| 102 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | ||
| 103 | AVMallocPtr converted_frame_buffer; | ||
| 104 | std::vector<u8> luma_buffer; | ||
| 105 | std::vector<u8> chroma_buffer; | ||
| 106 | |||
| 100 | GPUVAddr config_struct_address{}; | 107 | GPUVAddr config_struct_address{}; |
| 101 | GPUVAddr output_surface_luma_address{}; | 108 | GPUVAddr output_surface_luma_address{}; |
| 102 | GPUVAddr output_surface_chroma_u_address{}; | 109 | GPUVAddr output_surface_chroma_u_address{}; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 2a9bd4121..3db33faf3 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -30,8 +30,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 30 | 30 | ||
| 31 | GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) | 31 | GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) |
| 32 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, | 32 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, |
| 33 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, | 33 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, |
| 34 | cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_}, | ||
| 35 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | 34 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, |
| 36 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | 35 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, |
| 37 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | 36 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 50319f1d5..7644588e3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -48,9 +48,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 48 | dma_pusher.DispatchCalls(); | 48 | dma_pusher.DispatchCalls(); |
| 49 | } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { | 49 | } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { |
| 50 | // NVDEC | 50 | // NVDEC |
| 51 | cdma_pusher.Push(std::move(command_list->entries)); | 51 | cdma_pusher.ProcessEntries(std::move(command_list->entries)); |
| 52 | cdma_pusher.DispatchCalls(); | 52 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 53 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||
| 54 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 53 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 55 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { | 54 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { |
| 56 | rasterizer->ReleaseFences(); | 55 | rasterizer->ReleaseFences(); |