summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp85
-rw-r--r--src/video_core/command_classes/vic.cpp21
2 files changed, 50 insertions, 56 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 98e6296f1..1403a39d0 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -19,26 +19,29 @@
19namespace Service::Nvidia::Devices { 19namespace Service::Nvidia::Devices {
20 20
21namespace { 21namespace {
22// Splice vectors will copy count amount of type T from the input vector into the dst vector. 22// Copies count amount of type T from the input vector into the dst vector.
23// Returns the number of bytes written into dst.
23template <typename T> 24template <typename T>
24std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count, 25std::size_t SliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
25 std::size_t offset) { 26 std::size_t offset) {
26 if (!dst.empty()) { 27 if (dst.empty()) {
27 std::memcpy(dst.data(), input.data() + offset, count * sizeof(T)); 28 return 0;
28 } 29 }
29 return 0; 30 const size_t bytes_copied = count * sizeof(T);
31 std::memcpy(dst.data(), input.data() + offset, bytes_copied);
32 return bytes_copied;
30} 33}
31 34
32// Write vectors will write data to the output buffer 35// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
36// Returns the number of bytes written into dst.
33template <typename T> 37template <typename T>
34std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { 38std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
35 if (src.empty()) { 39 if (src.empty()) {
36 return 0; 40 return 0;
37 } else {
38 std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
39 offset += src.size() * sizeof(T);
40 return offset;
41 } 41 }
42 const size_t bytes_copied = src.size() * sizeof(T);
43 std::memcpy(dst.data() + offset, src.data(), bytes_copied);
44 return bytes_copied;
42} 45}
43} // Anonymous namespace 46} // Anonymous namespace
44 47
@@ -62,7 +65,6 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
62 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); 65 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
63 66
64 // Instantiate param buffers 67 // Instantiate param buffers
65 std::size_t offset = sizeof(IoctlSubmit);
66 std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count); 68 std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
67 std::vector<Reloc> relocs(params.relocation_count); 69 std::vector<Reloc> relocs(params.relocation_count);
68 std::vector<u32> reloc_shifts(params.relocation_count); 70 std::vector<u32> reloc_shifts(params.relocation_count);
@@ -70,13 +72,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
70 std::vector<SyncptIncr> wait_checks(params.syncpoint_count); 72 std::vector<SyncptIncr> wait_checks(params.syncpoint_count);
71 std::vector<Fence> fences(params.fence_count); 73 std::vector<Fence> fences(params.fence_count);
72 74
73 // Splice input into their respective buffers 75 // Slice input into their respective buffers
74 offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset); 76 std::size_t offset = sizeof(IoctlSubmit);
75 offset = SpliceVectors(input, relocs, params.relocation_count, offset); 77 offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
76 offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset); 78 offset += SliceVectors(input, relocs, params.relocation_count, offset);
77 offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset); 79 offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset);
78 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); 80 offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
79 offset = SpliceVectors(input, fences, params.fence_count, offset); 81 offset += SliceVectors(input, wait_checks, params.syncpoint_count, offset);
82 offset += SliceVectors(input, fences, params.fence_count, offset);
80 83
81 auto& gpu = system.GPU(); 84 auto& gpu = system.GPU();
82 if (gpu.UseNvdec()) { 85 if (gpu.UseNvdec()) {
@@ -88,35 +91,27 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
88 } 91 }
89 } 92 }
90 for (const auto& cmd_buffer : command_buffers) { 93 for (const auto& cmd_buffer : command_buffers) {
91 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); 94 const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
92 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 95 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
93 const auto map = FindBufferMap(object->dma_map_addr);
94 if (!map) {
95 LOG_ERROR(Service_NVDRV, "Tried to submit an invalid offset 0x{:X} dma 0x{:X}",
96 object->addr, object->dma_map_addr);
97 return NvResult::Success;
98 }
99 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); 96 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
100 gpu.MemoryManager().ReadBlock(map->StartAddr() + cmd_buffer.offset, cmdlist.data(), 97 system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(),
101 cmdlist.size() * sizeof(u32)); 98 cmdlist.size() * sizeof(u32));
102 gpu.PushCommandBuffer(cmdlist); 99 gpu.PushCommandBuffer(cmdlist);
103 } 100 }
104 if (gpu.UseNvdec()) { 101 if (gpu.UseNvdec()) {
105
106 fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1); 102 fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
107
108 Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}}; 103 Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
109 gpu.PushCommandBuffer(cmdlist); 104 gpu.PushCommandBuffer(cmdlist);
110 } 105 }
111 std::memcpy(output.data(), &params, sizeof(IoctlSubmit)); 106 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
112 // Some games expect command_buffers to be written back 107 // Some games expect command_buffers to be written back
113 offset = sizeof(IoctlSubmit); 108 offset = sizeof(IoctlSubmit);
114 offset = WriteVectors(output, command_buffers, offset); 109 offset += WriteVectors(output, command_buffers, offset);
115 offset = WriteVectors(output, relocs, offset); 110 offset += WriteVectors(output, relocs, offset);
116 offset = WriteVectors(output, reloc_shifts, offset); 111 offset += WriteVectors(output, reloc_shifts, offset);
117 offset = WriteVectors(output, syncpt_increments, offset); 112 offset += WriteVectors(output, syncpt_increments, offset);
118 offset = WriteVectors(output, wait_checks, offset); 113 offset += WriteVectors(output, wait_checks, offset);
119 offset = WriteVectors(output, fences, offset); 114 offset += WriteVectors(output, fences, offset);
120 115
121 return NvResult::Success; 116 return NvResult::Success;
122} 117}
@@ -148,14 +143,14 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
148 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 143 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
149 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 144 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
150 145
151 SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); 146 SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
152 147
153 auto& gpu = system.GPU(); 148 auto& gpu = system.GPU();
154 149
155 for (auto& cmf_buff : cmd_buffer_handles) { 150 for (auto& cmd_buffer : cmd_buffer_handles) {
156 auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; 151 auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
157 if (!object) { 152 if (!object) {
158 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); 153 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
159 std::memcpy(output.data(), &params, output.size()); 154 std::memcpy(output.data(), &params, output.size());
160 return NvResult::InvalidState; 155 return NvResult::InvalidState;
161 } 156 }
@@ -170,7 +165,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
170 if (!object->dma_map_addr) { 165 if (!object->dma_map_addr) {
171 LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size); 166 LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
172 } else { 167 } else {
173 cmf_buff.map_address = object->dma_map_addr; 168 cmd_buffer.map_address = object->dma_map_addr;
174 AddBufferMap(object->dma_map_addr, object->size, object->addr, 169 AddBufferMap(object->dma_map_addr, object->size, object->addr,
175 object->status == nvmap::Object::Status::Allocated); 170 object->status == nvmap::Object::Status::Allocated);
176 } 171 }
@@ -186,14 +181,14 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec
186 IoctlMapBuffer params{}; 181 IoctlMapBuffer params{};
187 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 182 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
188 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 183 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
189 SpliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); 184 SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer));
190 185
191 auto& gpu = system.GPU(); 186 auto& gpu = system.GPU();
192 187
193 for (auto& cmf_buff : cmd_buffer_handles) { 188 for (auto& cmd_buffer : cmd_buffer_handles) {
194 const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)}; 189 const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
195 if (!object) { 190 if (!object) {
196 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle); 191 LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
197 std::memcpy(output.data(), &params, output.size()); 192 std::memcpy(output.data(), &params, output.size());
198 return NvResult::InvalidState; 193 return NvResult::InvalidState;
199 } 194 }
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index ff3db0aee..ffb7c82a1 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -129,28 +129,27 @@ void Vic::Execute() {
129 129
130 const std::size_t surface_width = config.surface_width_minus1 + 1; 130 const std::size_t surface_width = config.surface_width_minus1 + 1;
131 const std::size_t surface_height = config.surface_height_minus1 + 1; 131 const std::size_t surface_height = config.surface_height_minus1 + 1;
132 const std::size_t half_width = surface_width / 2; 132 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
133 const std::size_t half_height = config.surface_height_minus1 / 2; 133 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
134 const std::size_t half_width = frame_width / 2;
135 const std::size_t half_height = frame_height / 2;
134 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff; 136 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
135 137
136 const auto* luma_ptr = frame->data[0]; 138 const auto* luma_ptr = frame->data[0];
137 const auto* chroma_b_ptr = frame->data[1]; 139 const auto* chroma_b_ptr = frame->data[1];
138 const auto* chroma_r_ptr = frame->data[2]; 140 const auto* chroma_r_ptr = frame->data[2];
139 const auto stride = frame->linesize[0]; 141 const auto stride = static_cast<size_t>(frame->linesize[0]);
140 const auto half_stride = frame->linesize[1]; 142 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
141 143
142 luma_buffer.resize(aligned_width * surface_height); 144 luma_buffer.resize(aligned_width * surface_height);
143 chroma_buffer.resize(aligned_width * half_height); 145 chroma_buffer.resize(aligned_width * surface_height / 2);
144 146
145 // Populate luma buffer 147 // Populate luma buffer
146 for (std::size_t y = 0; y < surface_height - 1; ++y) { 148 for (std::size_t y = 0; y < frame_height; ++y) {
147 const std::size_t src = y * stride; 149 const std::size_t src = y * stride;
148 const std::size_t dst = y * aligned_width; 150 const std::size_t dst = y * aligned_width;
149 151 for (std::size_t x = 0; x < frame_width; ++x) {
150 const std::size_t size = surface_width; 152 luma_buffer[dst + x] = luma_ptr[src + x];
151
152 for (std::size_t offset = 0; offset < size; ++offset) {
153 luma_buffer[dst + offset] = luma_ptr[src + offset];
154 } 153 }
155 } 154 }
156 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), 155 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),