summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/arm/arm_interface.cpp12
-rw-r--r--src/core/core.cpp10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp33
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp135
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h20
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp17
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h14
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.cpp39
-rw-r--r--src/core/hle/service/nvdrv/syncpoint_manager.h85
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp4
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp4
-rw-r--r--src/video_core/command_classes/codecs/codec.h6
-rw-r--r--src/video_core/command_classes/codecs/h264.cpp3
-rw-r--r--src/video_core/command_classes/codecs/h264.h10
-rw-r--r--src/video_core/command_classes/codecs/vp9.cpp2
-rw-r--r--src/video_core/command_classes/codecs/vp9.h2
-rw-r--r--src/video_core/command_classes/codecs/vp9_types.h5
-rw-r--r--src/video_core/command_classes/nvdec.h4
-rw-r--r--src/video_core/dma_pusher.cpp80
-rw-r--r--src/video_core/dma_pusher.h49
-rw-r--r--src/video_core/gpu.cpp48
-rw-r--r--src/video_core/gpu.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp2
-rw-r--r--src/video_core/texture_cache/surface_params.cpp1
27 files changed, 488 insertions, 137 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e0f207f3e..9a983e81d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -454,6 +454,8 @@ add_library(core STATIC
454 hle/service/nvdrv/nvdrv.h 454 hle/service/nvdrv/nvdrv.h
455 hle/service/nvdrv/nvmemp.cpp 455 hle/service/nvdrv/nvmemp.cpp
456 hle/service/nvdrv/nvmemp.h 456 hle/service/nvdrv/nvmemp.h
457 hle/service/nvdrv/syncpoint_manager.cpp
458 hle/service/nvdrv/syncpoint_manager.h
457 hle/service/nvflinger/buffer_queue.cpp 459 hle/service/nvflinger/buffer_queue.cpp
458 hle/service/nvflinger/buffer_queue.h 460 hle/service/nvflinger/buffer_queue.h
459 hle/service/nvflinger/nvflinger.cpp 461 hle/service/nvflinger/nvflinger.cpp
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index d2295ed90..0951e1976 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -147,10 +147,18 @@ std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContex
147 auto fp = ctx.cpu_registers[29]; 147 auto fp = ctx.cpu_registers[29];
148 auto lr = ctx.cpu_registers[30]; 148 auto lr = ctx.cpu_registers[30];
149 while (true) { 149 while (true) {
150 out.push_back({"", 0, lr, 0}); 150 out.push_back({
151 if (!fp) { 151 .module = "",
152 .address = 0,
153 .original_address = lr,
154 .offset = 0,
155 .name = {},
156 });
157
158 if (fp == 0) {
152 break; 159 break;
153 } 160 }
161
154 lr = memory.Read64(fp + 8) - 4; 162 lr = memory.Read64(fp + 8) - 4;
155 fp = memory.Read64(fp); 163 fp = memory.Read64(fp);
156 } 164 }
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fde2ccc09..242796008 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -179,16 +179,18 @@ struct System::Impl {
179 arp_manager.ResetAll(); 179 arp_manager.ResetAll();
180 180
181 telemetry_session = std::make_unique<Core::TelemetrySession>(); 181 telemetry_session = std::make_unique<Core::TelemetrySession>();
182
183 gpu_core = VideoCore::CreateGPU(emu_window, system);
184 if (!gpu_core) {
185 return ResultStatus::ErrorVideoCore;
186 }
187
182 service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); 188 service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
183 189
184 Service::Init(service_manager, system); 190 Service::Init(service_manager, system);
185 GDBStub::DeferStart(); 191 GDBStub::DeferStart();
186 192
187 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); 193 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
188 gpu_core = VideoCore::CreateGPU(emu_window, system);
189 if (!gpu_core) {
190 return ResultStatus::ErrorVideoCore;
191 }
192 194
193 // Initialize time manager, which must happen after kernel is created 195 // Initialize time manager, which must happen after kernel is created
194 time_manager.Initialize(); 196 time_manager.Initialize();
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 75d9191ff..8356a8139 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -15,8 +15,9 @@
15 15
16namespace Service::Nvidia::Devices { 16namespace Service::Nvidia::Devices {
17 17
18nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) 18nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
19 : nvdevice(system), events_interface{events_interface} {} 19 SyncpointManager& syncpoint_manager)
20 : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
20nvhost_ctrl::~nvhost_ctrl() = default; 21nvhost_ctrl::~nvhost_ctrl() = default;
21 22
22u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 23u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
70 return NvResult::BadParameter; 71 return NvResult::BadParameter;
71 } 72 }
72 73
74 if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
75 params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id);
76 std::memcpy(output.data(), &params, sizeof(params));
77 return NvResult::Success;
78 }
79
80 if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id);
81 syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) {
82 params.value = new_value;
83 std::memcpy(output.data(), &params, sizeof(params));
84 return NvResult::Success;
85 }
86
73 auto event = events_interface.events[event_id]; 87 auto event = events_interface.events[event_id];
74 auto& gpu = system.GPU(); 88 auto& gpu = system.GPU();
89
75 // This is mostly to take into account unimplemented features. As synced 90 // This is mostly to take into account unimplemented features. As synced
76 // gpu is always synced. 91 // gpu is always synced.
77 if (!gpu.IsAsync()) { 92 if (!gpu.IsAsync()) {
78 event.writable->Signal(); 93 event.event.writable->Signal();
79 return NvResult::Success; 94 return NvResult::Success;
80 } 95 }
81 auto lock = gpu.LockSync(); 96 auto lock = gpu.LockSync();
82 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); 97 const u32 current_syncpoint_value = event.fence.value;
83 const s32 diff = current_syncpoint_value - params.threshold; 98 const s32 diff = current_syncpoint_value - params.threshold;
84 if (diff >= 0) { 99 if (diff >= 0) {
85 event.writable->Signal(); 100 event.event.writable->Signal();
86 params.value = current_syncpoint_value; 101 params.value = current_syncpoint_value;
87 std::memcpy(output.data(), &params, sizeof(params)); 102 std::memcpy(output.data(), &params, sizeof(params));
88 return NvResult::Success; 103 return NvResult::Success;
@@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
109 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; 124 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
110 } 125 }
111 params.value |= event_id; 126 params.value |= event_id;
112 event.writable->Clear(); 127 event.event.writable->Clear();
113 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); 128 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
114 if (!is_async && ctrl.fresh_call) { 129 if (!is_async && ctrl.fresh_call) {
115 ctrl.must_delay = true; 130 ctrl.must_delay = true;
@@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto
157u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { 172u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) {
158 IocCtrlEventSignalParams params{}; 173 IocCtrlEventSignalParams params{};
159 std::memcpy(&params, input.data(), sizeof(params)); 174 std::memcpy(&params, input.data(), sizeof(params));
175
160 u32 event_id = params.event_id & 0x00FF; 176 u32 event_id = params.event_id & 0x00FF;
161 LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); 177 LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id);
178
162 if (event_id >= MaxNvEvents) { 179 if (event_id >= MaxNvEvents) {
163 return NvResult::BadParameter; 180 return NvResult::BadParameter;
164 } 181 }
165 if (events_interface.status[event_id] == EventState::Waiting) { 182 if (events_interface.status[event_id] == EventState::Waiting) {
166 events_interface.LiberateEvent(event_id); 183 events_interface.LiberateEvent(event_id);
167 events_interface.events[event_id].writable->Signal();
168 } 184 }
185
186 syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id);
187
169 return NvResult::Success; 188 return NvResult::Success;
170} 189}
171 190
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index f7b04d9f1..24ad96cb9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices {
14 14
15class nvhost_ctrl final : public nvdevice { 15class nvhost_ctrl final : public nvdevice {
16public: 16public:
17 explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); 17 explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface,
18 SyncpointManager& syncpoint_manager);
18 ~nvhost_ctrl() override; 19 ~nvhost_ctrl() override;
19 20
20 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 21 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -145,6 +146,7 @@ private:
145 u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); 146 u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
146 147
147 EventInterface& events_interface; 148 EventInterface& events_interface;
149 SyncpointManager& syncpoint_manager;
148}; 150};
149 151
150} // namespace Service::Nvidia::Devices 152} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index f1966ac0e..152019548 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -7,14 +7,20 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/service/nvdrv/devices/nvhost_gpu.h" 9#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
10#include "core/hle/service/nvdrv/syncpoint_manager.h"
10#include "core/memory.h" 11#include "core/memory.h"
11#include "video_core/gpu.h" 12#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13 14
14namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
15 16
16nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 17nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 18 SyncpointManager& syncpoint_manager)
19 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {
20 channel_fence.id = syncpoint_manager.AllocateSyncpoint();
21 channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
22}
23
18nvhost_gpu::~nvhost_gpu() = default; 24nvhost_gpu::~nvhost_gpu() = default;
19 25
20u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 26u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
126 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, 132 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
127 params.unk3); 133 params.unk3);
128 134
129 auto& gpu = system.GPU(); 135 channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
130 params.fence_out.id = assigned_syncpoints; 136
131 params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); 137 params.fence_out = channel_fence;
132 assigned_syncpoints++; 138
133 std::memcpy(output.data(), &params, output.size()); 139 std::memcpy(output.data(), &params, output.size());
134 return 0; 140 return 0;
135} 141}
@@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
145 return 0; 151 return 0;
146} 152}
147 153
148u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 154static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
149 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 155 return {
150 UNIMPLEMENTED(); 156 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
157 Tegra::SubmissionMode::Increasing),
158 {fence.value},
159 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
160 Tegra::SubmissionMode::Increasing),
161 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id),
162 };
163}
164
165static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) {
166 std::vector<Tegra::CommandHeader> result{
167 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1,
168 Tegra::SubmissionMode::Increasing),
169 {}};
170
171 for (u32 count = 0; count < add_increment; ++count) {
172 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
173 Tegra::SubmissionMode::Increasing));
174 result.emplace_back(
175 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
151 } 176 }
152 IoctlSubmitGpfifo params{}; 177
153 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 178 return result;
179}
180
181static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence,
182 u32 add_increment) {
183 std::vector<Tegra::CommandHeader> result{
184 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1,
185 Tegra::SubmissionMode::Increasing),
186 {}};
187 const std::vector<Tegra::CommandHeader> increment{
188 BuildIncrementCommandList(fence, add_increment)};
189
190 result.insert(result.end(), increment.begin(), increment.end());
191
192 return result;
193}
194
195u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
196 Tegra::CommandList&& entries) {
154 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, 197 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
155 params.num_entries, params.flags.raw); 198 params.num_entries, params.flags.raw);
156 199
157 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + 200 auto& gpu = system.GPU();
158 params.num_entries * sizeof(Tegra::CommandListHeader),
159 "Incorrect input size");
160 201
161 Tegra::CommandList entries(params.num_entries); 202 params.fence_out.id = channel_fence.id;
162 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
163 params.num_entries * sizeof(Tegra::CommandListHeader));
164 203
165 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); 204 if (params.flags.add_wait.Value() &&
166 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); 205 !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
206 gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
207 }
167 208
168 auto& gpu = system.GPU(); 209 if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
169 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); 210 const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0;
170 if (params.flags.increment.Value()) { 211 params.fence_out.value = syncpoint_manager.IncreaseSyncpoint(
171 params.fence_out.value += current_syncpoint_value; 212 params.fence_out.id, params.AddIncrementValue() + increment_value);
172 } else { 213 } else {
173 params.fence_out.value = current_syncpoint_value; 214 params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
174 } 215 }
216
217 entries.RefreshIntegrityChecks(gpu);
175 gpu.PushGPUEntries(std::move(entries)); 218 gpu.PushGPUEntries(std::move(entries));
176 219
220 if (params.flags.add_increment.Value()) {
221 if (params.flags.suppress_wfi) {
222 gpu.PushGPUEntries(Tegra::CommandList{
223 BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
224 } else {
225 gpu.PushGPUEntries(Tegra::CommandList{
226 BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
227 }
228 }
229
177 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo)); 230 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
178 return 0; 231 return 0;
179} 232}
180 233
234u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
235 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
236 UNIMPLEMENTED();
237 }
238 IoctlSubmitGpfifo params{};
239 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
240
241 Tegra::CommandList entries(params.num_entries);
242 std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)],
243 params.num_entries * sizeof(Tegra::CommandListHeader));
244
245 return SubmitGPFIFOImpl(params, output, std::move(entries));
246}
247
181u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, 248u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
182 const std::vector<u8>& input2, IoctlVersion version) { 249 const std::vector<u8>& input2, IoctlVersion version) {
183 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 250 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
@@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
185 } 252 }
186 IoctlSubmitGpfifo params{}; 253 IoctlSubmitGpfifo params{};
187 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 254 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
188 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
189 params.num_entries, params.flags.raw);
190 255
191 Tegra::CommandList entries(params.num_entries); 256 Tegra::CommandList entries(params.num_entries);
192 if (version == IoctlVersion::Version2) { 257 if (version == IoctlVersion::Version2) {
193 std::memcpy(entries.data(), input2.data(), 258 std::memcpy(entries.command_lists.data(), input2.data(),
194 params.num_entries * sizeof(Tegra::CommandListHeader)); 259 params.num_entries * sizeof(Tegra::CommandListHeader));
195 } else { 260 } else {
196 system.Memory().ReadBlock(params.address, entries.data(), 261 system.Memory().ReadBlock(params.address, entries.command_lists.data(),
197 params.num_entries * sizeof(Tegra::CommandListHeader)); 262 params.num_entries * sizeof(Tegra::CommandListHeader));
198 } 263 }
199 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
200 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
201
202 auto& gpu = system.GPU();
203 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
204 if (params.flags.increment.Value()) {
205 params.fence_out.value += current_syncpoint_value;
206 } else {
207 params.fence_out.value = current_syncpoint_value;
208 }
209 gpu.PushGPUEntries(std::move(entries));
210 264
211 std::memcpy(output.data(), &params, output.size()); 265 return SubmitGPFIFOImpl(params, output, std::move(entries));
212 return 0;
213} 266}
214 267
215u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { 268u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 2ac74743f..a252fc06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -11,6 +11,11 @@
11#include "common/swap.h" 11#include "common/swap.h"
12#include "core/hle/service/nvdrv/devices/nvdevice.h" 12#include "core/hle/service/nvdrv/devices/nvdevice.h"
13#include "core/hle/service/nvdrv/nvdata.h" 13#include "core/hle/service/nvdrv/nvdata.h"
14#include "video_core/dma_pusher.h"
15
16namespace Service::Nvidia {
17class SyncpointManager;
18}
14 19
15namespace Service::Nvidia::Devices { 20namespace Service::Nvidia::Devices {
16 21
@@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
21 26
22class nvhost_gpu final : public nvdevice { 27class nvhost_gpu final : public nvdevice {
23public: 28public:
24 explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 29 explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
30 SyncpointManager& syncpoint_manager);
25 ~nvhost_gpu() override; 31 ~nvhost_gpu() override;
26 32
27 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, 33 u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2,
@@ -162,10 +168,15 @@ private:
162 u32_le raw; 168 u32_le raw;
163 BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list 169 BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
164 BitField<1, 1, u32_le> add_increment; // append an increment to the list 170 BitField<1, 1, u32_le> add_increment; // append an increment to the list
165 BitField<2, 1, u32_le> new_hw_format; // Mostly ignored 171 BitField<2, 1, u32_le> new_hw_format; // mostly ignored
172 BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt
166 BitField<8, 1, u32_le> increment; // increment the returned fence 173 BitField<8, 1, u32_le> increment; // increment the returned fence
167 } flags; 174 } flags;
168 Fence fence_out; // returned new fence object for others to wait on 175 Fence fence_out; // returned new fence object for others to wait on
176
177 u32 AddIncrementValue() const {
178 return flags.add_increment.Value() << 1;
179 }
169 }; 180 };
170 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), 181 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
171 "IoctlSubmitGpfifo is incorrect size"); 182 "IoctlSubmitGpfifo is incorrect size");
@@ -190,6 +201,8 @@ private:
190 u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); 201 u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output);
191 u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); 202 u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
192 u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); 203 u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
204 u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output,
205 Tegra::CommandList&& entries);
193 u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); 206 u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
194 u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, 207 u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output,
195 const std::vector<u8>& input2, IoctlVersion version); 208 const std::vector<u8>& input2, IoctlVersion version);
@@ -198,7 +211,8 @@ private:
198 u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); 211 u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output);
199 212
200 std::shared_ptr<nvmap> nvmap_dev; 213 std::shared_ptr<nvmap> nvmap_dev;
201 u32 assigned_syncpoints{}; 214 SyncpointManager& syncpoint_manager;
215 Fence channel_fence;
202}; 216};
203 217
204} // namespace Service::Nvidia::Devices 218} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 803c1a984..a46755cdc 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -21,6 +21,7 @@
21#include "core/hle/service/nvdrv/interface.h" 21#include "core/hle/service/nvdrv/interface.h"
22#include "core/hle/service/nvdrv/nvdrv.h" 22#include "core/hle/service/nvdrv/nvdrv.h"
23#include "core/hle/service/nvdrv/nvmemp.h" 23#include "core/hle/service/nvdrv/nvmemp.h"
24#include "core/hle/service/nvdrv/syncpoint_manager.h"
24#include "core/hle/service/nvflinger/nvflinger.h" 25#include "core/hle/service/nvflinger/nvflinger.h"
25 26
26namespace Service::Nvidia { 27namespace Service::Nvidia {
@@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
36 nvflinger.SetNVDrvInstance(module_); 37 nvflinger.SetNVDrvInstance(module_);
37} 38}
38 39
39Module::Module(Core::System& system) { 40Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
40 auto& kernel = system.Kernel(); 41 auto& kernel = system.Kernel();
41 for (u32 i = 0; i < MaxNvEvents; i++) { 42 for (u32 i = 0; i < MaxNvEvents; i++) {
42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); 43 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
43 events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); 44 events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)};
44 events_interface.status[i] = EventState::Free; 45 events_interface.status[i] = EventState::Free;
45 events_interface.registered[i] = false; 46 events_interface.registered[i] = false;
46 } 47 }
47 auto nvmap_dev = std::make_shared<Devices::nvmap>(system); 48 auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
48 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); 49 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
49 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); 50 devices["/dev/nvhost-gpu"] =
51 std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager);
50 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); 52 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
51 devices["/dev/nvmap"] = nvmap_dev; 53 devices["/dev/nvmap"] = nvmap_dev;
52 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); 54 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
53 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); 55 devices["/dev/nvhost-ctrl"] =
56 std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
54 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); 57 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
55 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); 58 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
56 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); 59 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
@@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
95 if (events_interface.assigned_syncpt[i] == syncpoint_id && 98 if (events_interface.assigned_syncpt[i] == syncpoint_id &&
96 events_interface.assigned_value[i] == value) { 99 events_interface.assigned_value[i] == value) {
97 events_interface.LiberateEvent(i); 100 events_interface.LiberateEvent(i);
98 events_interface.events[i].writable->Signal(); 101 events_interface.events[i].event.writable->Signal();
99 } 102 }
100 } 103 }
101} 104}
102 105
103std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { 106std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
104 return events_interface.events[event_id].readable; 107 return events_interface.events[event_id].event.readable;
105} 108}
106 109
107std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { 110std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
108 return events_interface.events[event_id].writable; 111 return events_interface.events[event_id].event.writable;
109} 112}
110 113
111} // namespace Service::Nvidia 114} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 7706a5590..f3d863dac 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -10,6 +10,7 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h" 11#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/nvdata.h" 12#include "core/hle/service/nvdrv/nvdata.h"
13#include "core/hle/service/nvdrv/syncpoint_manager.h"
13#include "core/hle/service/service.h" 14#include "core/hle/service/service.h"
14 15
15namespace Core { 16namespace Core {
@@ -22,15 +23,23 @@ class NVFlinger;
22 23
23namespace Service::Nvidia { 24namespace Service::Nvidia {
24 25
26class SyncpointManager;
27
25namespace Devices { 28namespace Devices {
26class nvdevice; 29class nvdevice;
27} 30}
28 31
32/// Represents an Nvidia event
33struct NvEvent {
34 Kernel::EventPair event;
35 Fence fence{};
36};
37
29struct EventInterface { 38struct EventInterface {
30 // Mask representing currently busy events 39 // Mask representing currently busy events
31 u64 events_mask{}; 40 u64 events_mask{};
32 // Each kernel event associated to an NV event 41 // Each kernel event associated to an NV event
33 std::array<Kernel::EventPair, MaxNvEvents> events; 42 std::array<NvEvent, MaxNvEvents> events;
34 // The status of the current NVEvent 43 // The status of the current NVEvent
35 std::array<EventState, MaxNvEvents> status{}; 44 std::array<EventState, MaxNvEvents> status{};
36 // Tells if an NVEvent is registered or not 45 // Tells if an NVEvent is registered or not
@@ -119,6 +128,9 @@ public:
119 std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; 128 std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
120 129
121private: 130private:
131 /// Manages syncpoints on the host
132 SyncpointManager syncpoint_manager;
133
122 /// Id to use for the next open file descriptor. 134 /// Id to use for the next open file descriptor.
123 u32 next_fd = 1; 135 u32 next_fd = 1;
124 136
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
new file mode 100644
index 000000000..0151a03b7
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp
@@ -0,0 +1,39 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "core/hle/service/nvdrv/syncpoint_manager.h"
7#include "video_core/gpu.h"
8
9namespace Service::Nvidia {
10
11SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {}
12
13SyncpointManager::~SyncpointManager() = default;
14
15u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) {
16 syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id);
17 return GetSyncpointMin(syncpoint_id);
18}
19
20u32 SyncpointManager::AllocateSyncpoint() {
21 for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) {
22 if (!syncpoints[syncpoint_id].is_allocated) {
23 syncpoints[syncpoint_id].is_allocated = true;
24 return syncpoint_id;
25 }
26 }
27 UNREACHABLE_MSG("No more available syncpoints!");
28 return {};
29}
30
31u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) {
32 for (u32 index = 0; index < value; ++index) {
33 syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed);
34 }
35
36 return GetSyncpointMax(syncpoint_id);
37}
38
39} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h
new file mode 100644
index 000000000..4168b6c7e
--- /dev/null
+++ b/src/core/hle/service/nvdrv/syncpoint_manager.h
@@ -0,0 +1,85 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9
10#include "common/common_types.h"
11#include "core/hle/service/nvdrv/nvdata.h"
12
13namespace Tegra {
14class GPU;
15}
16
17namespace Service::Nvidia {
18
19class SyncpointManager final {
20public:
21 explicit SyncpointManager(Tegra::GPU& gpu);
22 ~SyncpointManager();
23
24 /**
25 * Returns true if the specified syncpoint is expired for the given value.
26 * @param syncpoint_id Syncpoint ID to check.
27 * @param value Value to check against the specified syncpoint.
28 * @returns True if the specified syncpoint is expired for the given value, otherwise False.
29 */
30 bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const {
31 return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value);
32 }
33
34 /**
35 * Gets the lower bound for the specified syncpoint.
36 * @param syncpoint_id Syncpoint ID to get the lower bound for.
37 * @returns The lower bound for the specified syncpoint.
38 */
39 u32 GetSyncpointMin(u32 syncpoint_id) const {
40 return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed);
41 }
42
43 /**
44 * Gets the uper bound for the specified syncpoint.
45 * @param syncpoint_id Syncpoint ID to get the upper bound for.
46 * @returns The upper bound for the specified syncpoint.
47 */
48 u32 GetSyncpointMax(u32 syncpoint_id) const {
49 return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed);
50 }
51
52 /**
53 * Refreshes the minimum value for the specified syncpoint.
54 * @param syncpoint_id Syncpoint ID to be refreshed.
55 * @returns The new syncpoint minimum value.
56 */
57 u32 RefreshSyncpoint(u32 syncpoint_id);
58
59 /**
60 * Allocates a new syncoint.
61 * @returns The syncpoint ID for the newly allocated syncpoint.
62 */
63 u32 AllocateSyncpoint();
64
65 /**
66 * Increases the maximum value for the specified syncpoint.
67 * @param syncpoint_id Syncpoint ID to be increased.
68 * @param value Value to increase the specified syncpoint by.
69 * @returns The new syncpoint maximum value.
70 */
71 u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value);
72
73private:
74 struct Syncpoint {
75 std::atomic<u32> min;
76 std::atomic<u32> max;
77 std::atomic<bool> is_allocated;
78 };
79
80 std::array<Syncpoint, MaxSyncPoints> syncpoints{};
81
82 Tegra::GPU& gpu;
83};
84
85} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 4f1e210b1..b89a2d41b 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -29,6 +29,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
29 .slot = slot, 29 .slot = slot,
30 .status = Buffer::Status::Free, 30 .status = Buffer::Status::Free,
31 .igbp_buffer = igbp_buffer, 31 .igbp_buffer = igbp_buffer,
32 .transform = {},
33 .crop_rect = {},
34 .swap_interval = 0,
35 .multi_fence = {},
32 }); 36 });
33 37
34 buffer_wait_event.writable->Signal(); 38 buffer_wait_event.writable->Signal();
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index c64673dba..44aa2bdae 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -242,6 +242,10 @@ void NVFlinger::Compose() {
242 242
243 const auto& igbp_buffer = buffer->get().igbp_buffer; 243 const auto& igbp_buffer = buffer->get().igbp_buffer;
244 244
245 if (!system.IsPoweredOn()) {
246 return; // We are likely shutting down
247 }
248
245 auto& gpu = system.GPU(); 249 auto& gpu = system.GPU();
246 const auto& multi_fence = buffer->get().multi_fence; 250 const auto& multi_fence = buffer->get().multi_fence;
247 guard->unlock(); 251 guard->unlock();
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index cb67094f6..5bbe6a332 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -42,11 +42,11 @@ public:
42 void Decode(); 42 void Decode();
43 43
44 /// Returns most recently decoded frame 44 /// Returns most recently decoded frame
45 AVFrame* GetCurrentFrame(); 45 [[nodiscard]] AVFrame* GetCurrentFrame();
46 const AVFrame* GetCurrentFrame() const; 46 [[nodiscard]] const AVFrame* GetCurrentFrame() const;
47 47
48 /// Returns the value of current_codec 48 /// Returns the value of current_codec
49 NvdecCommon::VideoCodec GetCurrentCodec() const; 49 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
50 50
51private: 51private:
52 bool initialized{}; 52 bool initialized{};
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp
index 549a40f52..33e063e20 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/command_classes/codecs/h264.cpp
@@ -43,7 +43,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
43 43
44H264::~H264() = default; 44H264::~H264() = default;
45 45
46std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { 46const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
47 bool is_first_frame) {
47 H264DecoderContext context{}; 48 H264DecoderContext context{};
48 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); 49 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
49 50
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h
index f2292fd2f..273449495 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/command_classes/codecs/h264.h
@@ -51,14 +51,14 @@ public:
51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); 51 void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count);
52 52
53 /// Return the bitstream as a vector. 53 /// Return the bitstream as a vector.
54 std::vector<u8>& GetByteArray(); 54 [[nodiscard]] std::vector<u8>& GetByteArray();
55 const std::vector<u8>& GetByteArray() const; 55 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
56 56
57private: 57private:
58 void WriteBits(s32 value, s32 bit_count); 58 void WriteBits(s32 value, s32 bit_count);
59 void WriteExpGolombCodedInt(s32 value); 59 void WriteExpGolombCodedInt(s32 value);
60 void WriteExpGolombCodedUInt(u32 value); 60 void WriteExpGolombCodedUInt(u32 value);
61 s32 GetFreeBufferBits(); 61 [[nodiscard]] s32 GetFreeBufferBits();
62 void Flush(); 62 void Flush();
63 63
64 s32 buffer_size{8}; 64 s32 buffer_size{8};
@@ -74,8 +74,8 @@ public:
74 ~H264(); 74 ~H264();
75 75
76 /// Compose the H264 header of the frame for FFmpeg decoding 76 /// Compose the H264 header of the frame for FFmpeg decoding
77 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, 77 [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state,
78 bool is_first_frame = false); 78 bool is_first_frame = false);
79 79
80private: 80private:
81 struct H264ParameterSet { 81 struct H264ParameterSet {
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp
index 42520f856..ab44fdc9e 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/command_classes/codecs/vp9.cpp
@@ -854,7 +854,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
854 return uncomp_writer; 854 return uncomp_writer;
855} 855}
856 856
857std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { 857const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) {
858 std::vector<u8> bitstream; 858 std::vector<u8> bitstream;
859 { 859 {
860 Vp9FrameContainer curr_frame = GetCurrentFrame(state); 860 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h
index 05c9682fa..e2504512c 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/command_classes/codecs/vp9.h
@@ -119,7 +119,7 @@ public:
119 119
120 /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec 120 /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec
121 /// documentation 121 /// documentation
122 std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); 122 [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state);
123 123
124 /// Returns true if the most recent frame was a hidden frame. 124 /// Returns true if the most recent frame was a hidden frame.
125 [[nodiscard]] bool WasFrameHidden() const { 125 [[nodiscard]] bool WasFrameHidden() const {
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h
index a50acf6e8..4f0b05d22 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/command_classes/codecs/vp9_types.h
@@ -231,9 +231,8 @@ struct PictureInfo {
231 u32 surface_params{}; 231 u32 surface_params{};
232 INSERT_PADDING_WORDS(3); 232 INSERT_PADDING_WORDS(3);
233 233
234 Vp9PictureInfo Convert() const { 234 [[nodiscard]] Vp9PictureInfo Convert() const {
235 235 return {
236 return Vp9PictureInfo{
237 .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, 236 .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0,
238 .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, 237 .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0,
239 .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, 238 .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0,
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h
index af14f9857..eec4443f9 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/command_classes/nvdec.h
@@ -26,8 +26,8 @@ public:
26 void ProcessMethod(Method method, const std::vector<u32>& arguments); 26 void ProcessMethod(Method method, const std::vector<u32>& arguments);
27 27
28 /// Return most recently decoded frame 28 /// Return most recently decoded frame
29 AVFrame* GetFrame(); 29 [[nodiscard]] AVFrame* GetFrame();
30 const AVFrame* GetFrame() const; 30 [[nodiscard]] const AVFrame* GetFrame() const;
31 31
32private: 32private:
33 /// Invoke codec to decode a frame 33 /// Invoke codec to decode a frame
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index f2f96ac33..105b85a92 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/cityhash.h"
5#include "common/microprofile.h" 6#include "common/microprofile.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/memory.h" 8#include "core/memory.h"
@@ -12,6 +13,20 @@
12 13
13namespace Tegra { 14namespace Tegra {
14 15
16void CommandList::RefreshIntegrityChecks(GPU& gpu) {
17 command_list_hashes.resize(command_lists.size());
18
19 for (std::size_t index = 0; index < command_lists.size(); ++index) {
20 const CommandListHeader command_list_header = command_lists[index];
21 std::vector<CommandHeader> command_headers(command_list_header.size);
22 gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(),
23 command_list_header.size * sizeof(u32));
24 command_list_hashes[index] =
25 Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
26 command_list_header.size * sizeof(u32));
27 }
28}
29
15DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} 30DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
16 31
17DmaPusher::~DmaPusher() = default; 32DmaPusher::~DmaPusher() = default;
@@ -45,32 +60,51 @@ bool DmaPusher::Step() {
45 return false; 60 return false;
46 } 61 }
47 62
48 const CommandList& command_list{dma_pushbuffer.front()}; 63 CommandList& command_list{dma_pushbuffer.front()};
49 ASSERT_OR_EXECUTE(!command_list.empty(), {
50 // Somehow the command_list is empty, in order to avoid a crash
51 // We ignore it and assume its size is 0.
52 dma_pushbuffer.pop();
53 dma_pushbuffer_subindex = 0;
54 return true;
55 });
56 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
57 const GPUVAddr dma_get = command_list_header.addr;
58
59 if (dma_pushbuffer_subindex >= command_list.size()) {
60 // We've gone through the current list, remove it from the queue
61 dma_pushbuffer.pop();
62 dma_pushbuffer_subindex = 0;
63 }
64 64
65 if (command_list_header.size == 0) { 65 ASSERT_OR_EXECUTE(
66 return true; 66 command_list.command_lists.size() || command_list.prefetch_command_list.size(), {
67 } 67 // Somehow the command_list is empty, in order to avoid a crash
68 // We ignore it and assume its size is 0.
69 dma_pushbuffer.pop();
70 dma_pushbuffer_subindex = 0;
71 return true;
72 });
68 73
69 // Push buffer non-empty, read a word 74 if (command_list.prefetch_command_list.size()) {
70 command_headers.resize(command_list_header.size); 75 // Prefetched command list from nvdrv, used for things like synchronization
71 gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), 76 command_headers = std::move(command_list.prefetch_command_list);
72 command_list_header.size * sizeof(u32)); 77 dma_pushbuffer.pop();
78 } else {
79 const CommandListHeader command_list_header{
80 command_list.command_lists[dma_pushbuffer_subindex]};
81 const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++];
82 const GPUVAddr dma_get = command_list_header.addr;
83
84 if (dma_pushbuffer_subindex >= command_list.command_lists.size()) {
85 // We've gone through the current list, remove it from the queue
86 dma_pushbuffer.pop();
87 dma_pushbuffer_subindex = 0;
88 }
73 89
90 if (command_list_header.size == 0) {
91 return true;
92 }
93
94 // Push buffer non-empty, read a word
95 command_headers.resize(command_list_header.size);
96 gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
97 command_list_header.size * sizeof(u32));
98
99 // Integrity check
100 const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()),
101 command_list_header.size * sizeof(u32));
102 if (new_hash != next_hash) {
103 LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get);
104 dma_pushbuffer.pop();
105 return true;
106 }
107 }
74 for (std::size_t index = 0; index < command_headers.size();) { 108 for (std::size_t index = 0; index < command_headers.size();) {
75 const CommandHeader& command_header = command_headers[index]; 109 const CommandHeader& command_header = command_headers[index];
76 110
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index efa90d170..8496ba2da 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -27,6 +27,31 @@ enum class SubmissionMode : u32 {
27 IncreaseOnce = 5 27 IncreaseOnce = 5
28}; 28};
29 29
30// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
31// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
32// So the values you see in docs might be multiplied by 4.
33enum class BufferMethods : u32 {
34 BindObject = 0x0,
35 Nop = 0x2,
36 SemaphoreAddressHigh = 0x4,
37 SemaphoreAddressLow = 0x5,
38 SemaphoreSequence = 0x6,
39 SemaphoreTrigger = 0x7,
40 NotifyIntr = 0x8,
41 WrcacheFlush = 0x9,
42 Unk28 = 0xA,
43 UnkCacheFlush = 0xB,
44 RefCnt = 0x14,
45 SemaphoreAcquire = 0x1A,
46 SemaphoreRelease = 0x1B,
47 FenceValue = 0x1C,
48 FenceAction = 0x1D,
49 WaitForInterrupt = 0x1E,
50 Unk7c = 0x1F,
51 Yield = 0x20,
52 NonPullerMethods = 0x40,
53};
54
30struct CommandListHeader { 55struct CommandListHeader {
31 union { 56 union {
32 u64 raw; 57 u64 raw;
@@ -49,9 +74,29 @@ union CommandHeader {
49static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); 74static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
50static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); 75static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
51 76
77static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count,
78 SubmissionMode mode) {
79 CommandHeader result{};
80 result.method.Assign(static_cast<u32>(method));
81 result.arg_count.Assign(arg_count);
82 result.mode.Assign(mode);
83 return result;
84}
85
52class GPU; 86class GPU;
53 87
54using CommandList = std::vector<Tegra::CommandListHeader>; 88struct CommandList final {
89 CommandList() = default;
90 explicit CommandList(std::size_t size) : command_lists(size) {}
91 explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list)
92 : prefetch_command_list{std::move(prefetch_command_list)} {}
93
94 void RefreshIntegrityChecks(GPU& gpu);
95
96 std::vector<Tegra::CommandListHeader> command_lists;
97 std::vector<u64> command_list_hashes;
98 std::vector<Tegra::CommandHeader> prefetch_command_list;
99};
55 100
56/** 101/**
57 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the 102 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
@@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
60 * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for 105 * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
61 * details on this implementation. 106 * details on this implementation.
62 */ 107 */
63class DmaPusher { 108class DmaPusher final {
64public: 109public:
65 explicit DmaPusher(Core::System& system, GPU& gpu); 110 explicit DmaPusher(Core::System& system, GPU& gpu);
66 ~DmaPusher(); 111 ~DmaPusher();
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 171f78183..ebd149c3a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -194,30 +194,6 @@ void GPU::SyncGuestHost() {
194void GPU::OnCommandListEnd() { 194void GPU::OnCommandListEnd() {
195 renderer->Rasterizer().ReleaseFences(); 195 renderer->Rasterizer().ReleaseFences();
196} 196}
197// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
198// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
199// So the values you see in docs might be multiplied by 4.
200enum class BufferMethods {
201 BindObject = 0x0,
202 Nop = 0x2,
203 SemaphoreAddressHigh = 0x4,
204 SemaphoreAddressLow = 0x5,
205 SemaphoreSequence = 0x6,
206 SemaphoreTrigger = 0x7,
207 NotifyIntr = 0x8,
208 WrcacheFlush = 0x9,
209 Unk28 = 0xA,
210 UnkCacheFlush = 0xB,
211 RefCnt = 0x14,
212 SemaphoreAcquire = 0x1A,
213 SemaphoreRelease = 0x1B,
214 FenceValue = 0x1C,
215 FenceAction = 0x1D,
216 Unk78 = 0x1E,
217 Unk7c = 0x1F,
218 Yield = 0x20,
219 NonPullerMethods = 0x40,
220};
221 197
222enum class GpuSemaphoreOperation { 198enum class GpuSemaphoreOperation {
223 AcquireEqual = 0x1, 199 AcquireEqual = 0x1,
@@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
277 case BufferMethods::UnkCacheFlush: 253 case BufferMethods::UnkCacheFlush:
278 case BufferMethods::WrcacheFlush: 254 case BufferMethods::WrcacheFlush:
279 case BufferMethods::FenceValue: 255 case BufferMethods::FenceValue:
256 break;
280 case BufferMethods::FenceAction: 257 case BufferMethods::FenceAction:
258 ProcessFenceActionMethod();
259 break;
260 case BufferMethods::WaitForInterrupt:
261 ProcessWaitForInterruptMethod();
281 break; 262 break;
282 case BufferMethods::SemaphoreTrigger: { 263 case BufferMethods::SemaphoreTrigger: {
283 ProcessSemaphoreTriggerMethod(); 264 ProcessSemaphoreTriggerMethod();
@@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
391 } 372 }
392} 373}
393 374
375void GPU::ProcessFenceActionMethod() {
376 switch (regs.fence_action.op) {
377 case FenceOperation::Acquire:
378 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
379 break;
380 case FenceOperation::Increment:
381 IncrementSyncPoint(regs.fence_action.syncpoint_id);
382 break;
383 default:
384 UNIMPLEMENTED_MSG("Unimplemented operation {}",
385 static_cast<u32>(regs.fence_action.op.Value()));
386 }
387}
388
389void GPU::ProcessWaitForInterruptMethod() {
390 // TODO(bunnei) ImplementMe
391 LOG_WARNING(HW_GPU, "(STUBBED) called");
392}
393
394void GPU::ProcessSemaphoreTriggerMethod() { 394void GPU::ProcessSemaphoreTriggerMethod() {
395 const auto semaphoreOperationMask = 0xF; 395 const auto semaphoreOperationMask = 0xF;
396 const auto op = 396 const auto op =
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b8c613b11..5444b49f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -263,6 +263,24 @@ public:
263 return use_nvdec; 263 return use_nvdec;
264 } 264 }
265 265
266 enum class FenceOperation : u32 {
267 Acquire = 0,
268 Increment = 1,
269 };
270
271 union FenceAction {
272 u32 raw;
273 BitField<0, 1, FenceOperation> op;
274 BitField<8, 24, u32> syncpoint_id;
275
276 static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
277 FenceAction result{};
278 result.op.Assign(op);
279 result.syncpoint_id.Assign(syncpoint_id);
280 return {result.raw};
281 }
282 };
283
266 struct Regs { 284 struct Regs {
267 static constexpr size_t NUM_REGS = 0x40; 285 static constexpr size_t NUM_REGS = 0x40;
268 286
@@ -291,10 +309,7 @@ public:
291 u32 semaphore_acquire; 309 u32 semaphore_acquire;
292 u32 semaphore_release; 310 u32 semaphore_release;
293 u32 fence_value; 311 u32 fence_value;
294 union { 312 FenceAction fence_action;
295 BitField<4, 4, u32> operation;
296 BitField<8, 8, u32> id;
297 } fence_action;
298 INSERT_UNION_PADDING_WORDS(0xE2); 313 INSERT_UNION_PADDING_WORDS(0xE2);
299 314
300 // Puller state 315 // Puller state
@@ -342,6 +357,8 @@ protected:
342 357
343private: 358private:
344 void ProcessBindMethod(const MethodCall& method_call); 359 void ProcessBindMethod(const MethodCall& method_call);
360 void ProcessFenceActionMethod();
361 void ProcessWaitForInterruptMethod();
345 void ProcessSemaphoreTriggerMethod(); 362 void ProcessSemaphoreTriggerMethod();
346 void ProcessSemaphoreRelease(); 363 void ProcessSemaphoreRelease();
347 void ProcessSemaphoreAcquire(); 364 void ProcessSemaphoreAcquire();
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index e1217ca83..f34ed6735 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() {
771 VkPhysicalDeviceDriverPropertiesKHR driver{ 771 VkPhysicalDeviceDriverPropertiesKHR driver{
772 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, 772 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
773 .pNext = nullptr, 773 .pNext = nullptr,
774 .driverID = {},
775 .driverName = {},
776 .driverInfo = {},
777 .conformanceVersion = {},
774 }; 778 };
775 779
776 VkPhysicalDeviceProperties2KHR properties{ 780 VkPhysicalDeviceProperties2KHR device_properties{
777 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, 781 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
778 .pNext = &driver, 782 .pNext = &driver,
783 .properties = {},
779 }; 784 };
780 physical.GetProperties2KHR(properties); 785 physical.GetProperties2KHR(device_properties);
781 786
782 driver_id = driver.driverID; 787 driver_id = driver.driverID;
783 vendor_name = driver.driverName; 788 vendor_name = driver.driverName;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 696eaeb5f..0e8f9c352 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -159,6 +159,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
159 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 159 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
160 .pNext = nullptr, 160 .pNext = nullptr,
161 .flags = 0, 161 .flags = 0,
162 .codeSize = 0,
162 }; 163 };
163 164
164 std::vector<vk::ShaderModule> modules; 165 std::vector<vk::ShaderModule> modules;
@@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
388 .logicOp = VK_LOGIC_OP_COPY, 389 .logicOp = VK_LOGIC_OP_COPY,
389 .attachmentCount = static_cast<u32>(num_attachments), 390 .attachmentCount = static_cast<u32>(num_attachments),
390 .pAttachments = cb_attachments.data(), 391 .pAttachments = cb_attachments.data(),
392 .blendConstants = {},
391 }; 393 };
392 394
393 std::vector dynamic_states{ 395 std::vector dynamic_states{
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index e8515321b..13dd16356 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
240 .is_tiled = is_tiled, 240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || 241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, 242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .is_layered = false,
243 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, 244 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
244 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, 245 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
245 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, 246 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,