diff options
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | 85 | ||||
| -rw-r--r-- | src/video_core/command_classes/vic.cpp | 21 |
2 files changed, 50 insertions, 56 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 98e6296f1..1403a39d0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -19,26 +19,29 @@ | |||
| 19 | namespace Service::Nvidia::Devices { | 19 | namespace Service::Nvidia::Devices { |
| 20 | 20 | ||
| 21 | namespace { | 21 | namespace { |
| 22 | // Splice vectors will copy count amount of type T from the input vector into the dst vector. | 22 | // Copies count amount of type T from the input vector into the dst vector. |
| 23 | // Returns the number of bytes written into dst. | ||
| 23 | template <typename T> | 24 | template <typename T> |
| 24 | std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count, | 25 | std::size_t SliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count, |
| 25 | std::size_t offset) { | 26 | std::size_t offset) { |
| 26 | if (!dst.empty()) { | 27 | if (dst.empty()) { |
| 27 | std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); | 28 | return 0; |
| 28 | } | 29 | } |
| 29 | return 0; | 30 | const size_t bytes_copied = count * sizeof(T); |
| 31 | std::memcpy(dst.data(), input.data() + offset, bytes_copied); | ||
| 32 | return bytes_copied; | ||
| 30 | } | 33 | } |
| 31 | 34 | ||
| 32 | // Write vectors will write data to the output buffer | 35 | // Writes the data in src to an offset into the dst vector. The offset is specified in bytes |
| 36 | // Returns the number of bytes written into dst. | ||
| 33 | template <typename T> | 37 | template <typename T> |
| 34 | std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { | 38 | std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { |
| 35 | if (src.empty()) { | 39 | if (src.empty()) { |
| 36 | return 0; | 40 | return 0; |
| 37 | } else { | ||
| 38 | std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T)); | ||
| 39 | offset += src.size() * sizeof(T); | ||
| 40 | return offset; | ||
| 41 | } | 41 | } |
| 42 | const size_t bytes_copied = src.size() * sizeof(T); | ||
| 43 | std::memcpy(dst.data() + offset, src.data(), bytes_copied); | ||
| 44 | return bytes_copied; | ||
| 42 | } | 45 | } |
| 43 | } // Anonymous namespace | 46 | } // Anonymous namespace |
| 44 | 47 | ||
| @@ -62,7 +65,6 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 62 | LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); | 65 | LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); |
| 63 | 66 | ||
| 64 | // Instantiate param buffers | 67 | // Instantiate param buffers |
| 65 | std::size_t offset = sizeof(IoctlSubmit); | ||
| 66 | std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count); | 68 | std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count); |
| 67 | std::vector<Reloc> relocs(params.relocation_count); | 69 | std::vector<Reloc> relocs(params.relocation_count); |
| 68 | std::vector<u32> reloc_shifts(params.relocation_count); | 70 | std::vector<u32> reloc_shifts(params.relocation_count); |
| @@ -70,13 +72,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 70 | std::vector<SyncptIncr> wait_checks(params.syncpoint_count); | 72 | std::vector<SyncptIncr> wait_checks(params.syncpoint_count); |
| 71 | std::vector<Fence> fences(params.fence_count); | 73 | std::vector<Fence> fences(params.fence_count); |
| 72 | 74 | ||
| 73 | // Splice input into their respective buffers | 75 | // Slice input into their respective buffers |
| 74 | offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); | 76 | std::size_t offset = sizeof(IoctlSubmit); |
| 75 | offset = SpliceVectors(input, relocs, params.relocation_count, offset); | 77 | offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset); |
| 76 | offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); | 78 | offset += SliceVectors(input, relocs, params.relocation_count, offset); |
| 77 | offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); | 79 | offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset); |
| 78 | offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); | 80 | offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset); |
| 79 | offset = SpliceVectors(input, fences, params.fence_count, offset); | 81 | offset += SliceVectors(input, wait_checks, params.syncpoint_count, offset); |
| 82 | offset += SliceVectors(input, fences, params.fence_count, offset); | ||
| 80 | 83 | ||
| 81 | auto& gpu = system.GPU(); | 84 | auto& gpu = system.GPU(); |
| 82 | if (gpu.UseNvdec()) { | 85 | if (gpu.UseNvdec()) { |
| @@ -88,35 +91,27 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u | |||
| 88 | } | 91 | } |
| 89 | } | 92 | } |
| 90 | for (const auto& cmd_buffer : command_buffers) { | 93 | for (const auto& cmd_buffer : command_buffers) { |
| 91 | auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); | 94 | const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); |
| 92 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | 95 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |
| 93 | const auto map = FindBufferMap(object->dma_map_addr); | ||
| 94 | if (!map) { | ||
| 95 | LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}", | ||
| 96 | object->addr, object->dma_map_addr); | ||
| 97 | return NvResult::Success; | ||
| 98 | } | ||
| 99 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | 96 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); |
| 100 | gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(), | 97 | system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(), |
| 101 | cmdlist.size() * sizeof(u32)); | 98 | cmdlist.size() * sizeof(u32)); |
| 102 | gpu.PushCommandBuffer(cmdlist); | 99 | gpu.PushCommandBuffer(cmdlist); |
| 103 | } | 100 | } |
| 104 | if (gpu.UseNvdec()) { | 101 | if (gpu.UseNvdec()) { |
| 105 | |||
| 106 | fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); | 102 | fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); |
| 107 | |||
| 108 | Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; | 103 | Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; |
| 109 | gpu.PushCommandBuffer(cmdlist); | 104 | gpu.PushCommandBuffer(cmdlist); |
| 110 | } | 105 | } |
| 111 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); | 106 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); |
| 112 | // Some games expect command_buffers to be written back | 107 | // Some games expect command_buffers to be written back |
| 113 | offset = sizeof(IoctlSubmit); | 108 | offset = sizeof(IoctlSubmit); |
| 114 | offset = WriteVectors(output, command_buffers, offset); | 109 | offset += WriteVectors(output, command_buffers, offset); |
| 115 | offset = WriteVectors(output, relocs, offset); | 110 | offset += WriteVectors(output, relocs, offset); |
| 116 | offset = WriteVectors(output, reloc_shifts, offset); | 111 | offset += WriteVectors(output, reloc_shifts, offset); |
| 117 | offset = WriteVectors(output, syncpt_increments, offset); | 112 | offset += WriteVectors(output, syncpt_increments, offset); |
| 118 | offset = WriteVectors(output, wait_checks, offset); | 113 | offset += WriteVectors(output, wait_checks, offset); |
| 119 | offset = WriteVectors(output, fences, offset); | 114 | offset += WriteVectors(output, fences, offset); |
| 120 | 115 | ||
| 121 | return NvResult::Success; | 116 | return NvResult::Success; |
| 122 | } | 117 | } |
| @@ -148,14 +143,14 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto | |||
| 148 | std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); | 143 | std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); |
| 149 | std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); | 144 | std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); |
| 150 | 145 | ||
| 151 | SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); | 146 | SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); |
| 152 | 147 | ||
| 153 | auto& gpu = system.GPU(); | 148 | auto& gpu = system.GPU(); |
| 154 | 149 | ||
| 155 | for (auto& cmf_buff : cmd_buffer_handles) { | 150 | for (auto& cmd_buffer : cmd_buffer_handles) { |
| 156 | auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; | 151 | auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)}; |
| 157 | if (!object) { | 152 | if (!object) { |
| 158 | LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); | 153 | LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle); |
| 159 | std::memcpy(output.data(), ¶ms, output.size()); | 154 | std::memcpy(output.data(), ¶ms, output.size()); |
| 160 | return NvResult::InvalidState; | 155 | return NvResult::InvalidState; |
| 161 | } | 156 | } |
| @@ -170,7 +165,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto | |||
| 170 | if (!object->dma_map_addr) { | 165 | if (!object->dma_map_addr) { |
| 171 | LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); | 166 | LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); |
| 172 | } else { | 167 | } else { |
| 173 | cmf_buff.map_address = object->dma_map_addr; | 168 | cmd_buffer.map_address = object->dma_map_addr; |
| 174 | AddBufferMap(object->dma_map_addr, object->size, object->addr, | 169 | AddBufferMap(object->dma_map_addr, object->size, object->addr, |
| 175 | object->status == nvmap::Object::Status::Allocated); | 170 | object->status == nvmap::Object::Status::Allocated); |
| 176 | } | 171 | } |
| @@ -186,14 +181,14 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec | |||
| 186 | IoctlMapBuffer params{}; | 181 | IoctlMapBuffer params{}; |
| 187 | std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); | 182 | std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); |
| 188 | std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); | 183 | std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); |
| 189 | SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); | 184 | SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); |
| 190 | 185 | ||
| 191 | auto& gpu = system.GPU(); | 186 | auto& gpu = system.GPU(); |
| 192 | 187 | ||
| 193 | for (auto& cmf_buff : cmd_buffer_handles) { | 188 | for (auto& cmd_buffer : cmd_buffer_handles) { |
| 194 | const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; | 189 | const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)}; |
| 195 | if (!object) { | 190 | if (!object) { |
| 196 | LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); | 191 | LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle); |
| 197 | std::memcpy(output.data(), ¶ms, output.size()); | 192 | std::memcpy(output.data(), ¶ms, output.size()); |
| 198 | return NvResult::InvalidState; | 193 | return NvResult::InvalidState; |
| 199 | } | 194 | } |
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index ff3db0aee..ffb7c82a1 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -129,28 +129,27 @@ void Vic::Execute() { | |||
| 129 | 129 | ||
| 130 | const std::size_t surface_width = config.surface_width_minus1 + 1; | 130 | const std::size_t surface_width = config.surface_width_minus1 + 1; |
| 131 | const std::size_t surface_height = config.surface_height_minus1 + 1; | 131 | const std::size_t surface_height = config.surface_height_minus1 + 1; |
| 132 | const std::size_t half_width = surface_width / 2; | 132 | const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); |
| 133 | const std::size_t half_height = config.surface_height_minus1 / 2; | 133 | const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); |
| 134 | const std::size_t half_width = frame_width / 2; | ||
| 135 | const std::size_t half_height = frame_height / 2; | ||
| 134 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; | 136 | const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; |
| 135 | 137 | ||
| 136 | const auto* luma_ptr = frame->data[0]; | 138 | const auto* luma_ptr = frame->data[0]; |
| 137 | const auto* chroma_b_ptr = frame->data[1]; | 139 | const auto* chroma_b_ptr = frame->data[1]; |
| 138 | const auto* chroma_r_ptr = frame->data[2]; | 140 | const auto* chroma_r_ptr = frame->data[2]; |
| 139 | const auto stride = frame->linesize[0]; | 141 | const auto stride = static_cast<size_t>(frame->linesize[0]); |
| 140 | const auto half_stride = frame->linesize[1]; | 142 | const auto half_stride = static_cast<size_t>(frame->linesize[1]); |
| 141 | 143 | ||
| 142 | luma_buffer.resize(aligned_width * surface_height); | 144 | luma_buffer.resize(aligned_width * surface_height); |
| 143 | chroma_buffer.resize(aligned_width * half_height); | 145 | chroma_buffer.resize(aligned_width * surface_height / 2); |
| 144 | 146 | ||
| 145 | // Populate luma buffer | 147 | // Populate luma buffer |
| 146 | for (std::size_t y = 0; y < surface_height - 1; ++y) { | 148 | for (std::size_t y = 0; y < frame_height; ++y) { |
| 147 | const std::size_t src = y * stride; | 149 | const std::size_t src = y * stride; |
| 148 | const std::size_t dst = y * aligned_width; | 150 | const std::size_t dst = y * aligned_width; |
| 149 | 151 | for (std::size_t x = 0; x < frame_width; ++x) { | |
| 150 | const std::size_t size = surface_width; | 152 | luma_buffer[dst + x] = luma_ptr[src + x]; |
| 151 | |||
| 152 | for (std::size_t offset = 0; offset < size; ++offset) { | ||
| 153 | luma_buffer[dst + offset] = luma_ptr[src + offset]; | ||
| 154 | } | 153 | } |
| 155 | } | 154 | } |
| 156 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | 155 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), |