diff options
Diffstat (limited to 'src')
27 files changed, 488 insertions, 137 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e0f207f3e..9a983e81d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -454,6 +454,8 @@ add_library(core STATIC | |||
| 454 | hle/service/nvdrv/nvdrv.h | 454 | hle/service/nvdrv/nvdrv.h |
| 455 | hle/service/nvdrv/nvmemp.cpp | 455 | hle/service/nvdrv/nvmemp.cpp |
| 456 | hle/service/nvdrv/nvmemp.h | 456 | hle/service/nvdrv/nvmemp.h |
| 457 | hle/service/nvdrv/syncpoint_manager.cpp | ||
| 458 | hle/service/nvdrv/syncpoint_manager.h | ||
| 457 | hle/service/nvflinger/buffer_queue.cpp | 459 | hle/service/nvflinger/buffer_queue.cpp |
| 458 | hle/service/nvflinger/buffer_queue.h | 460 | hle/service/nvflinger/buffer_queue.h |
| 459 | hle/service/nvflinger/nvflinger.cpp | 461 | hle/service/nvflinger/nvflinger.cpp |
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index d2295ed90..0951e1976 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp | |||
| @@ -147,10 +147,18 @@ std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContex | |||
| 147 | auto fp = ctx.cpu_registers[29]; | 147 | auto fp = ctx.cpu_registers[29]; |
| 148 | auto lr = ctx.cpu_registers[30]; | 148 | auto lr = ctx.cpu_registers[30]; |
| 149 | while (true) { | 149 | while (true) { |
| 150 | out.push_back({"", 0, lr, 0}); | 150 | out.push_back({ |
| 151 | if (!fp) { | 151 | .module = "", |
| 152 | .address = 0, | ||
| 153 | .original_address = lr, | ||
| 154 | .offset = 0, | ||
| 155 | .name = {}, | ||
| 156 | }); | ||
| 157 | |||
| 158 | if (fp == 0) { | ||
| 152 | break; | 159 | break; |
| 153 | } | 160 | } |
| 161 | |||
| 154 | lr = memory.Read64(fp + 8) - 4; | 162 | lr = memory.Read64(fp + 8) - 4; |
| 155 | fp = memory.Read64(fp); | 163 | fp = memory.Read64(fp); |
| 156 | } | 164 | } |
diff --git a/src/core/core.cpp b/src/core/core.cpp index fde2ccc09..242796008 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -179,16 +179,18 @@ struct System::Impl { | |||
| 179 | arp_manager.ResetAll(); | 179 | arp_manager.ResetAll(); |
| 180 | 180 | ||
| 181 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 181 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| 182 | |||
| 183 | gpu_core = VideoCore::CreateGPU(emu_window, system); | ||
| 184 | if (!gpu_core) { | ||
| 185 | return ResultStatus::ErrorVideoCore; | ||
| 186 | } | ||
| 187 | |||
| 182 | service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); | 188 | service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); |
| 183 | 189 | ||
| 184 | Service::Init(service_manager, system); | 190 | Service::Init(service_manager, system); |
| 185 | GDBStub::DeferStart(); | 191 | GDBStub::DeferStart(); |
| 186 | 192 | ||
| 187 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | 193 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); |
| 188 | gpu_core = VideoCore::CreateGPU(emu_window, system); | ||
| 189 | if (!gpu_core) { | ||
| 190 | return ResultStatus::ErrorVideoCore; | ||
| 191 | } | ||
| 192 | 194 | ||
| 193 | // Initialize time manager, which must happen after kernel is created | 195 | // Initialize time manager, which must happen after kernel is created |
| 194 | time_manager.Initialize(); | 196 | time_manager.Initialize(); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 75d9191ff..8356a8139 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -15,8 +15,9 @@ | |||
| 15 | 15 | ||
| 16 | namespace Service::Nvidia::Devices { | 16 | namespace Service::Nvidia::Devices { |
| 17 | 17 | ||
| 18 | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) | 18 | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, |
| 19 | : nvdevice(system), events_interface{events_interface} {} | 19 | SyncpointManager& syncpoint_manager) |
| 20 | : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} | ||
| 20 | nvhost_ctrl::~nvhost_ctrl() = default; | 21 | nvhost_ctrl::~nvhost_ctrl() = default; |
| 21 | 22 | ||
| 22 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | 23 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |
| @@ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 70 | return NvResult::BadParameter; | 71 | return NvResult::BadParameter; |
| 71 | } | 72 | } |
| 72 | 73 | ||
| 74 | if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { | ||
| 75 | params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id); | ||
| 76 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 77 | return NvResult::Success; | ||
| 78 | } | ||
| 79 | |||
| 80 | if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id); | ||
| 81 | syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { | ||
| 82 | params.value = new_value; | ||
| 83 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 84 | return NvResult::Success; | ||
| 85 | } | ||
| 86 | |||
| 73 | auto event = events_interface.events[event_id]; | 87 | auto event = events_interface.events[event_id]; |
| 74 | auto& gpu = system.GPU(); | 88 | auto& gpu = system.GPU(); |
| 89 | |||
| 75 | // This is mostly to take into account unimplemented features. As synced | 90 | // This is mostly to take into account unimplemented features. As synced |
| 76 | // gpu is always synced. | 91 | // gpu is always synced. |
| 77 | if (!gpu.IsAsync()) { | 92 | if (!gpu.IsAsync()) { |
| 78 | event.writable->Signal(); | 93 | event.event.writable->Signal(); |
| 79 | return NvResult::Success; | 94 | return NvResult::Success; |
| 80 | } | 95 | } |
| 81 | auto lock = gpu.LockSync(); | 96 | auto lock = gpu.LockSync(); |
| 82 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | 97 | const u32 current_syncpoint_value = event.fence.value; |
| 83 | const s32 diff = current_syncpoint_value - params.threshold; | 98 | const s32 diff = current_syncpoint_value - params.threshold; |
| 84 | if (diff >= 0) { | 99 | if (diff >= 0) { |
| 85 | event.writable->Signal(); | 100 | event.event.writable->Signal(); |
| 86 | params.value = current_syncpoint_value; | 101 | params.value = current_syncpoint_value; |
| 87 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 102 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 88 | return NvResult::Success; | 103 | return NvResult::Success; |
| @@ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 109 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | 124 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; |
| 110 | } | 125 | } |
| 111 | params.value |= event_id; | 126 | params.value |= event_id; |
| 112 | event.writable->Clear(); | 127 | event.event.writable->Clear(); |
| 113 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | 128 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); |
| 114 | if (!is_async && ctrl.fresh_call) { | 129 | if (!is_async && ctrl.fresh_call) { |
| 115 | ctrl.must_delay = true; | 130 | ctrl.must_delay = true; |
| @@ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto | |||
| 157 | u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { | 172 | u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { |
| 158 | IocCtrlEventSignalParams params{}; | 173 | IocCtrlEventSignalParams params{}; |
| 159 | std::memcpy(¶ms, input.data(), sizeof(params)); | 174 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 175 | |||
| 160 | u32 event_id = params.event_id & 0x00FF; | 176 | u32 event_id = params.event_id & 0x00FF; |
| 161 | LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); | 177 | LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); |
| 178 | |||
| 162 | if (event_id >= MaxNvEvents) { | 179 | if (event_id >= MaxNvEvents) { |
| 163 | return NvResult::BadParameter; | 180 | return NvResult::BadParameter; |
| 164 | } | 181 | } |
| 165 | if (events_interface.status[event_id] == EventState::Waiting) { | 182 | if (events_interface.status[event_id] == EventState::Waiting) { |
| 166 | events_interface.LiberateEvent(event_id); | 183 | events_interface.LiberateEvent(event_id); |
| 167 | events_interface.events[event_id].writable->Signal(); | ||
| 168 | } | 184 | } |
| 185 | |||
| 186 | syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id); | ||
| 187 | |||
| 169 | return NvResult::Success; | 188 | return NvResult::Success; |
| 170 | } | 189 | } |
| 171 | 190 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index f7b04d9f1..24ad96cb9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -14,7 +14,8 @@ namespace Service::Nvidia::Devices { | |||
| 14 | 14 | ||
| 15 | class nvhost_ctrl final : public nvdevice { | 15 | class nvhost_ctrl final : public nvdevice { |
| 16 | public: | 16 | public: |
| 17 | explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); | 17 | explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface, |
| 18 | SyncpointManager& syncpoint_manager); | ||
| 18 | ~nvhost_ctrl() override; | 19 | ~nvhost_ctrl() override; |
| 19 | 20 | ||
| 20 | u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | 21 | u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |
| @@ -145,6 +146,7 @@ private: | |||
| 145 | u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); | 146 | u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); |
| 146 | 147 | ||
| 147 | EventInterface& events_interface; | 148 | EventInterface& events_interface; |
| 149 | SyncpointManager& syncpoint_manager; | ||
| 148 | }; | 150 | }; |
| 149 | 151 | ||
| 150 | } // namespace Service::Nvidia::Devices | 152 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index f1966ac0e..152019548 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -7,14 +7,20 @@ | |||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | 9 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" |
| 10 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||
| 10 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
| 16 | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | 17 | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | 18 | SyncpointManager& syncpoint_manager) |
| 19 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} { | ||
| 20 | channel_fence.id = syncpoint_manager.AllocateSyncpoint(); | ||
| 21 | channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); | ||
| 22 | } | ||
| 23 | |||
| 18 | nvhost_gpu::~nvhost_gpu() = default; | 24 | nvhost_gpu::~nvhost_gpu() = default; |
| 19 | 25 | ||
| 20 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | 26 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |
| @@ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 126 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | 132 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |
| 127 | params.unk3); | 133 | params.unk3); |
| 128 | 134 | ||
| 129 | auto& gpu = system.GPU(); | 135 | channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); |
| 130 | params.fence_out.id = assigned_syncpoints; | 136 | |
| 131 | params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); | 137 | params.fence_out = channel_fence; |
| 132 | assigned_syncpoints++; | 138 | |
| 133 | std::memcpy(output.data(), ¶ms, output.size()); | 139 | std::memcpy(output.data(), ¶ms, output.size()); |
| 134 | return 0; | 140 | return 0; |
| 135 | } | 141 | } |
| @@ -145,39 +151,100 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< | |||
| 145 | return 0; | 151 | return 0; |
| 146 | } | 152 | } |
| 147 | 153 | ||
| 148 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | 154 | static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { |
| 149 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { | 155 | return { |
| 150 | UNIMPLEMENTED(); | 156 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, |
| 157 | Tegra::SubmissionMode::Increasing), | ||
| 158 | {fence.value}, | ||
| 159 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||
| 160 | Tegra::SubmissionMode::Increasing), | ||
| 161 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), | ||
| 162 | }; | ||
| 163 | } | ||
| 164 | |||
| 165 | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) { | ||
| 166 | std::vector<Tegra::CommandHeader> result{ | ||
| 167 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | ||
| 168 | Tegra::SubmissionMode::Increasing), | ||
| 169 | {}}; | ||
| 170 | |||
| 171 | for (u32 count = 0; count < add_increment; ++count) { | ||
| 172 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||
| 173 | Tegra::SubmissionMode::Increasing)); | ||
| 174 | result.emplace_back( | ||
| 175 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); | ||
| 151 | } | 176 | } |
| 152 | IoctlSubmitGpfifo params{}; | 177 | |
| 153 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 178 | return result; |
| 179 | } | ||
| 180 | |||
| 181 | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence, | ||
| 182 | u32 add_increment) { | ||
| 183 | std::vector<Tegra::CommandHeader> result{ | ||
| 184 | Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, | ||
| 185 | Tegra::SubmissionMode::Increasing), | ||
| 186 | {}}; | ||
| 187 | const std::vector<Tegra::CommandHeader> increment{ | ||
| 188 | BuildIncrementCommandList(fence, add_increment)}; | ||
| 189 | |||
| 190 | result.insert(result.end(), increment.begin(), increment.end()); | ||
| 191 | |||
| 192 | return result; | ||
| 193 | } | ||
| 194 | |||
| 195 | u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||
| 196 | Tegra::CommandList&& entries) { | ||
| 154 | LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, | 197 | LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, |
| 155 | params.num_entries, params.flags.raw); | 198 | params.num_entries, params.flags.raw); |
| 156 | 199 | ||
| 157 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + | 200 | auto& gpu = system.GPU(); |
| 158 | params.num_entries * sizeof(Tegra::CommandListHeader), | ||
| 159 | "Incorrect input size"); | ||
| 160 | 201 | ||
| 161 | Tegra::CommandList entries(params.num_entries); | 202 | params.fence_out.id = channel_fence.id; |
| 162 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | ||
| 163 | params.num_entries * sizeof(Tegra::CommandListHeader)); | ||
| 164 | 203 | ||
| 165 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); | 204 | if (params.flags.add_wait.Value() && |
| 166 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | 205 | !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { |
| 206 | gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); | ||
| 207 | } | ||
| 167 | 208 | ||
| 168 | auto& gpu = system.GPU(); | 209 | if (params.flags.add_increment.Value() || params.flags.increment.Value()) { |
| 169 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | 210 | const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; |
| 170 | if (params.flags.increment.Value()) { | 211 | params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( |
| 171 | params.fence_out.value += current_syncpoint_value; | 212 | params.fence_out.id, params.AddIncrementValue() + increment_value); |
| 172 | } else { | 213 | } else { |
| 173 | params.fence_out.value = current_syncpoint_value; | 214 | params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); |
| 174 | } | 215 | } |
| 216 | |||
| 217 | entries.RefreshIntegrityChecks(gpu); | ||
| 175 | gpu.PushGPUEntries(std::move(entries)); | 218 | gpu.PushGPUEntries(std::move(entries)); |
| 176 | 219 | ||
| 220 | if (params.flags.add_increment.Value()) { | ||
| 221 | if (params.flags.suppress_wfi) { | ||
| 222 | gpu.PushGPUEntries(Tegra::CommandList{ | ||
| 223 | BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); | ||
| 224 | } else { | ||
| 225 | gpu.PushGPUEntries(Tegra::CommandList{ | ||
| 226 | BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 177 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 230 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| 178 | return 0; | 231 | return 0; |
| 179 | } | 232 | } |
| 180 | 233 | ||
| 234 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 235 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { | ||
| 236 | UNIMPLEMENTED(); | ||
| 237 | } | ||
| 238 | IoctlSubmitGpfifo params{}; | ||
| 239 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | ||
| 240 | |||
| 241 | Tegra::CommandList entries(params.num_entries); | ||
| 242 | std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], | ||
| 243 | params.num_entries * sizeof(Tegra::CommandListHeader)); | ||
| 244 | |||
| 245 | return SubmitGPFIFOImpl(params, output, std::move(entries)); | ||
| 246 | } | ||
| 247 | |||
| 181 | u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | 248 | u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, |
| 182 | const std::vector<u8>& input2, IoctlVersion version) { | 249 | const std::vector<u8>& input2, IoctlVersion version) { |
| 183 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { | 250 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { |
| @@ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | |||
| 185 | } | 252 | } |
| 186 | IoctlSubmitGpfifo params{}; | 253 | IoctlSubmitGpfifo params{}; |
| 187 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 254 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 188 | LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, | ||
| 189 | params.num_entries, params.flags.raw); | ||
| 190 | 255 | ||
| 191 | Tegra::CommandList entries(params.num_entries); | 256 | Tegra::CommandList entries(params.num_entries); |
| 192 | if (version == IoctlVersion::Version2) { | 257 | if (version == IoctlVersion::Version2) { |
| 193 | std::memcpy(entries.data(), input2.data(), | 258 | std::memcpy(entries.command_lists.data(), input2.data(), |
| 194 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 259 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 195 | } else { | 260 | } else { |
| 196 | system.Memory().ReadBlock(params.address, entries.data(), | 261 | system.Memory().ReadBlock(params.address, entries.command_lists.data(), |
| 197 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 262 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 198 | } | 263 | } |
| 199 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); | ||
| 200 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 201 | |||
| 202 | auto& gpu = system.GPU(); | ||
| 203 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 204 | if (params.flags.increment.Value()) { | ||
| 205 | params.fence_out.value += current_syncpoint_value; | ||
| 206 | } else { | ||
| 207 | params.fence_out.value = current_syncpoint_value; | ||
| 208 | } | ||
| 209 | gpu.PushGPUEntries(std::move(entries)); | ||
| 210 | 264 | ||
| 211 | std::memcpy(output.data(), ¶ms, output.size()); | 265 | return SubmitGPFIFOImpl(params, output, std::move(entries)); |
| 212 | return 0; | ||
| 213 | } | 266 | } |
| 214 | 267 | ||
| 215 | u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { | 268 | u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 2ac74743f..a252fc06d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -11,6 +11,11 @@ | |||
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | 13 | #include "core/hle/service/nvdrv/nvdata.h" |
| 14 | #include "video_core/dma_pusher.h" | ||
| 15 | |||
| 16 | namespace Service::Nvidia { | ||
| 17 | class SyncpointManager; | ||
| 18 | } | ||
| 14 | 19 | ||
| 15 | namespace Service::Nvidia::Devices { | 20 | namespace Service::Nvidia::Devices { |
| 16 | 21 | ||
| @@ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | |||
| 21 | 26 | ||
| 22 | class nvhost_gpu final : public nvdevice { | 27 | class nvhost_gpu final : public nvdevice { |
| 23 | public: | 28 | public: |
| 24 | explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); | 29 | explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, |
| 30 | SyncpointManager& syncpoint_manager); | ||
| 25 | ~nvhost_gpu() override; | 31 | ~nvhost_gpu() override; |
| 26 | 32 | ||
| 27 | u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | 33 | u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |
| @@ -162,10 +168,15 @@ private: | |||
| 162 | u32_le raw; | 168 | u32_le raw; |
| 163 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list | 169 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list |
| 164 | BitField<1, 1, u32_le> add_increment; // append an increment to the list | 170 | BitField<1, 1, u32_le> add_increment; // append an increment to the list |
| 165 | BitField<2, 1, u32_le> new_hw_format; // Mostly ignored | 171 | BitField<2, 1, u32_le> new_hw_format; // mostly ignored |
| 172 | BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt | ||
| 166 | BitField<8, 1, u32_le> increment; // increment the returned fence | 173 | BitField<8, 1, u32_le> increment; // increment the returned fence |
| 167 | } flags; | 174 | } flags; |
| 168 | Fence fence_out; // returned new fence object for others to wait on | 175 | Fence fence_out; // returned new fence object for others to wait on |
| 176 | |||
| 177 | u32 AddIncrementValue() const { | ||
| 178 | return flags.add_increment.Value() << 1; | ||
| 179 | } | ||
| 169 | }; | 180 | }; |
| 170 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | 181 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), |
| 171 | "IoctlSubmitGpfifo is incorrect size"); | 182 | "IoctlSubmitGpfifo is incorrect size"); |
| @@ -190,6 +201,8 @@ private: | |||
| 190 | u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); | 201 | u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); |
| 191 | u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); | 202 | u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); |
| 192 | u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); | 203 | u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); |
| 204 | u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||
| 205 | Tegra::CommandList&& entries); | ||
| 193 | u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); | 206 | u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); |
| 194 | u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | 207 | u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, |
| 195 | const std::vector<u8>& input2, IoctlVersion version); | 208 | const std::vector<u8>& input2, IoctlVersion version); |
| @@ -198,7 +211,8 @@ private: | |||
| 198 | u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); | 211 | u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); |
| 199 | 212 | ||
| 200 | std::shared_ptr<nvmap> nvmap_dev; | 213 | std::shared_ptr<nvmap> nvmap_dev; |
| 201 | u32 assigned_syncpoints{}; | 214 | SyncpointManager& syncpoint_manager; |
| 215 | Fence channel_fence; | ||
| 202 | }; | 216 | }; |
| 203 | 217 | ||
| 204 | } // namespace Service::Nvidia::Devices | 218 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 803c1a984..a46755cdc 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/hle/service/nvdrv/interface.h" | 21 | #include "core/hle/service/nvdrv/interface.h" |
| 22 | #include "core/hle/service/nvdrv/nvdrv.h" | 22 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 23 | #include "core/hle/service/nvdrv/nvmemp.h" | 23 | #include "core/hle/service/nvdrv/nvmemp.h" |
| 24 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||
| 24 | #include "core/hle/service/nvflinger/nvflinger.h" | 25 | #include "core/hle/service/nvflinger/nvflinger.h" |
| 25 | 26 | ||
| 26 | namespace Service::Nvidia { | 27 | namespace Service::Nvidia { |
| @@ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | |||
| 36 | nvflinger.SetNVDrvInstance(module_); | 37 | nvflinger.SetNVDrvInstance(module_); |
| 37 | } | 38 | } |
| 38 | 39 | ||
| 39 | Module::Module(Core::System& system) { | 40 | Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { |
| 40 | auto& kernel = system.Kernel(); | 41 | auto& kernel = system.Kernel(); |
| 41 | for (u32 i = 0; i < MaxNvEvents; i++) { | 42 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); | 43 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); | 44 | events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)}; |
| 44 | events_interface.status[i] = EventState::Free; | 45 | events_interface.status[i] = EventState::Free; |
| 45 | events_interface.registered[i] = false; | 46 | events_interface.registered[i] = false; |
| 46 | } | 47 | } |
| 47 | auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | 48 | auto nvmap_dev = std::make_shared<Devices::nvmap>(system); |
| 48 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | 49 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); |
| 49 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); | 50 | devices["/dev/nvhost-gpu"] = |
| 51 | std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager); | ||
| 50 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | 52 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); |
| 51 | devices["/dev/nvmap"] = nvmap_dev; | 53 | devices["/dev/nvmap"] = nvmap_dev; |
| 52 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); | 54 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |
| 53 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); | 55 | devices["/dev/nvhost-ctrl"] = |
| 56 | std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); | ||
| 54 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); | 57 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); |
| 55 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); | 58 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |
| 56 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); | 59 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); |
| @@ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { | |||
| 95 | if (events_interface.assigned_syncpt[i] == syncpoint_id && | 98 | if (events_interface.assigned_syncpt[i] == syncpoint_id && |
| 96 | events_interface.assigned_value[i] == value) { | 99 | events_interface.assigned_value[i] == value) { |
| 97 | events_interface.LiberateEvent(i); | 100 | events_interface.LiberateEvent(i); |
| 98 | events_interface.events[i].writable->Signal(); | 101 | events_interface.events[i].event.writable->Signal(); |
| 99 | } | 102 | } |
| 100 | } | 103 | } |
| 101 | } | 104 | } |
| 102 | 105 | ||
| 103 | std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | 106 | std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { |
| 104 | return events_interface.events[event_id].readable; | 107 | return events_interface.events[event_id].event.readable; |
| 105 | } | 108 | } |
| 106 | 109 | ||
| 107 | std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | 110 | std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { |
| 108 | return events_interface.events[event_id].writable; | 111 | return events_interface.events[event_id].event.writable; |
| 109 | } | 112 | } |
| 110 | 113 | ||
| 111 | } // namespace Service::Nvidia | 114 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 7706a5590..f3d863dac 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/writable_event.h" | 11 | #include "core/hle/kernel/writable_event.h" |
| 12 | #include "core/hle/service/nvdrv/nvdata.h" | 12 | #include "core/hle/service/nvdrv/nvdata.h" |
| 13 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||
| 13 | #include "core/hle/service/service.h" | 14 | #include "core/hle/service/service.h" |
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| @@ -22,15 +23,23 @@ class NVFlinger; | |||
| 22 | 23 | ||
| 23 | namespace Service::Nvidia { | 24 | namespace Service::Nvidia { |
| 24 | 25 | ||
| 26 | class SyncpointManager; | ||
| 27 | |||
| 25 | namespace Devices { | 28 | namespace Devices { |
| 26 | class nvdevice; | 29 | class nvdevice; |
| 27 | } | 30 | } |
| 28 | 31 | ||
| 32 | /// Represents an Nvidia event | ||
| 33 | struct NvEvent { | ||
| 34 | Kernel::EventPair event; | ||
| 35 | Fence fence{}; | ||
| 36 | }; | ||
| 37 | |||
| 29 | struct EventInterface { | 38 | struct EventInterface { |
| 30 | // Mask representing currently busy events | 39 | // Mask representing currently busy events |
| 31 | u64 events_mask{}; | 40 | u64 events_mask{}; |
| 32 | // Each kernel event associated to an NV event | 41 | // Each kernel event associated to an NV event |
| 33 | std::array<Kernel::EventPair, MaxNvEvents> events; | 42 | std::array<NvEvent, MaxNvEvents> events; |
| 34 | // The status of the current NVEvent | 43 | // The status of the current NVEvent |
| 35 | std::array<EventState, MaxNvEvents> status{}; | 44 | std::array<EventState, MaxNvEvents> status{}; |
| 36 | // Tells if an NVEvent is registered or not | 45 | // Tells if an NVEvent is registered or not |
| @@ -119,6 +128,9 @@ public: | |||
| 119 | std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; | 128 | std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; |
| 120 | 129 | ||
| 121 | private: | 130 | private: |
| 131 | /// Manages syncpoints on the host | ||
| 132 | SyncpointManager syncpoint_manager; | ||
| 133 | |||
| 122 | /// Id to use for the next open file descriptor. | 134 | /// Id to use for the next open file descriptor. |
| 123 | u32 next_fd = 1; | 135 | u32 next_fd = 1; |
| 124 | 136 | ||
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/syncpoint_manager.cpp new file mode 100644 index 000000000..0151a03b7 --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | namespace Service::Nvidia { | ||
| 10 | |||
| 11 | SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {} | ||
| 12 | |||
| 13 | SyncpointManager::~SyncpointManager() = default; | ||
| 14 | |||
| 15 | u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { | ||
| 16 | syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id); | ||
| 17 | return GetSyncpointMin(syncpoint_id); | ||
| 18 | } | ||
| 19 | |||
| 20 | u32 SyncpointManager::AllocateSyncpoint() { | ||
| 21 | for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { | ||
| 22 | if (!syncpoints[syncpoint_id].is_allocated) { | ||
| 23 | syncpoints[syncpoint_id].is_allocated = true; | ||
| 24 | return syncpoint_id; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | UNREACHABLE_MSG("No more available syncpoints!"); | ||
| 28 | return {}; | ||
| 29 | } | ||
| 30 | |||
| 31 | u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { | ||
| 32 | for (u32 index = 0; index < value; ++index) { | ||
| 33 | syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); | ||
| 34 | } | ||
| 35 | |||
| 36 | return GetSyncpointMax(syncpoint_id); | ||
| 37 | } | ||
| 38 | |||
| 39 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvdrv/syncpoint_manager.h b/src/core/hle/service/nvdrv/syncpoint_manager.h new file mode 100644 index 000000000..4168b6c7e --- /dev/null +++ b/src/core/hle/service/nvdrv/syncpoint_manager.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | class GPU; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Service::Nvidia { | ||
| 18 | |||
| 19 | class SyncpointManager final { | ||
| 20 | public: | ||
| 21 | explicit SyncpointManager(Tegra::GPU& gpu); | ||
| 22 | ~SyncpointManager(); | ||
| 23 | |||
| 24 | /** | ||
| 25 | * Returns true if the specified syncpoint is expired for the given value. | ||
| 26 | * @param syncpoint_id Syncpoint ID to check. | ||
| 27 | * @param value Value to check against the specified syncpoint. | ||
| 28 | * @returns True if the specified syncpoint is expired for the given value, otherwise False. | ||
| 29 | */ | ||
| 30 | bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { | ||
| 31 | return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); | ||
| 32 | } | ||
| 33 | |||
| 34 | /** | ||
| 35 | * Gets the lower bound for the specified syncpoint. | ||
| 36 | * @param syncpoint_id Syncpoint ID to get the lower bound for. | ||
| 37 | * @returns The lower bound for the specified syncpoint. | ||
| 38 | */ | ||
| 39 | u32 GetSyncpointMin(u32 syncpoint_id) const { | ||
| 40 | return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed); | ||
| 41 | } | ||
| 42 | |||
| 43 | /** | ||
| 44 | * Gets the uper bound for the specified syncpoint. | ||
| 45 | * @param syncpoint_id Syncpoint ID to get the upper bound for. | ||
| 46 | * @returns The upper bound for the specified syncpoint. | ||
| 47 | */ | ||
| 48 | u32 GetSyncpointMax(u32 syncpoint_id) const { | ||
| 49 | return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed); | ||
| 50 | } | ||
| 51 | |||
| 52 | /** | ||
| 53 | * Refreshes the minimum value for the specified syncpoint. | ||
| 54 | * @param syncpoint_id Syncpoint ID to be refreshed. | ||
| 55 | * @returns The new syncpoint minimum value. | ||
| 56 | */ | ||
| 57 | u32 RefreshSyncpoint(u32 syncpoint_id); | ||
| 58 | |||
| 59 | /** | ||
| 60 | * Allocates a new syncoint. | ||
| 61 | * @returns The syncpoint ID for the newly allocated syncpoint. | ||
| 62 | */ | ||
| 63 | u32 AllocateSyncpoint(); | ||
| 64 | |||
| 65 | /** | ||
| 66 | * Increases the maximum value for the specified syncpoint. | ||
| 67 | * @param syncpoint_id Syncpoint ID to be increased. | ||
| 68 | * @param value Value to increase the specified syncpoint by. | ||
| 69 | * @returns The new syncpoint maximum value. | ||
| 70 | */ | ||
| 71 | u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); | ||
| 72 | |||
| 73 | private: | ||
| 74 | struct Syncpoint { | ||
| 75 | std::atomic<u32> min; | ||
| 76 | std::atomic<u32> max; | ||
| 77 | std::atomic<bool> is_allocated; | ||
| 78 | }; | ||
| 79 | |||
| 80 | std::array<Syncpoint, MaxSyncPoints> syncpoints{}; | ||
| 81 | |||
| 82 | Tegra::GPU& gpu; | ||
| 83 | }; | ||
| 84 | |||
| 85 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 4f1e210b1..b89a2d41b 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -29,6 +29,10 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 29 | .slot = slot, | 29 | .slot = slot, |
| 30 | .status = Buffer::Status::Free, | 30 | .status = Buffer::Status::Free, |
| 31 | .igbp_buffer = igbp_buffer, | 31 | .igbp_buffer = igbp_buffer, |
| 32 | .transform = {}, | ||
| 33 | .crop_rect = {}, | ||
| 34 | .swap_interval = 0, | ||
| 35 | .multi_fence = {}, | ||
| 32 | }); | 36 | }); |
| 33 | 37 | ||
| 34 | buffer_wait_event.writable->Signal(); | 38 | buffer_wait_event.writable->Signal(); |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index c64673dba..44aa2bdae 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -242,6 +242,10 @@ void NVFlinger::Compose() { | |||
| 242 | 242 | ||
| 243 | const auto& igbp_buffer = buffer->get().igbp_buffer; | 243 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 244 | 244 | ||
| 245 | if (!system.IsPoweredOn()) { | ||
| 246 | return; // We are likely shutting down | ||
| 247 | } | ||
| 248 | |||
| 245 | auto& gpu = system.GPU(); | 249 | auto& gpu = system.GPU(); |
| 246 | const auto& multi_fence = buffer->get().multi_fence; | 250 | const auto& multi_fence = buffer->get().multi_fence; |
| 247 | guard->unlock(); | 251 | guard->unlock(); |
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index cb67094f6..5bbe6a332 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h | |||
| @@ -42,11 +42,11 @@ public: | |||
| 42 | void Decode(); | 42 | void Decode(); |
| 43 | 43 | ||
| 44 | /// Returns most recently decoded frame | 44 | /// Returns most recently decoded frame |
| 45 | AVFrame* GetCurrentFrame(); | 45 | [[nodiscard]] AVFrame* GetCurrentFrame(); |
| 46 | const AVFrame* GetCurrentFrame() const; | 46 | [[nodiscard]] const AVFrame* GetCurrentFrame() const; |
| 47 | 47 | ||
| 48 | /// Returns the value of current_codec | 48 | /// Returns the value of current_codec |
| 49 | NvdecCommon::VideoCodec GetCurrentCodec() const; | 49 | [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; |
| 50 | 50 | ||
| 51 | private: | 51 | private: |
| 52 | bool initialized{}; | 52 | bool initialized{}; |
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 549a40f52..33e063e20 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp | |||
| @@ -43,7 +43,8 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} | |||
| 43 | 43 | ||
| 44 | H264::~H264() = default; | 44 | H264::~H264() = default; |
| 45 | 45 | ||
| 46 | std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, bool is_first_frame) { | 46 | const std::vector<u8>& H264::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, |
| 47 | bool is_first_frame) { | ||
| 47 | H264DecoderContext context{}; | 48 | H264DecoderContext context{}; |
| 48 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | 49 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); |
| 49 | 50 | ||
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index f2292fd2f..273449495 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h | |||
| @@ -51,14 +51,14 @@ public: | |||
| 51 | void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); | 51 | void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); |
| 52 | 52 | ||
| 53 | /// Return the bitstream as a vector. | 53 | /// Return the bitstream as a vector. |
| 54 | std::vector<u8>& GetByteArray(); | 54 | [[nodiscard]] std::vector<u8>& GetByteArray(); |
| 55 | const std::vector<u8>& GetByteArray() const; | 55 | [[nodiscard]] const std::vector<u8>& GetByteArray() const; |
| 56 | 56 | ||
| 57 | private: | 57 | private: |
| 58 | void WriteBits(s32 value, s32 bit_count); | 58 | void WriteBits(s32 value, s32 bit_count); |
| 59 | void WriteExpGolombCodedInt(s32 value); | 59 | void WriteExpGolombCodedInt(s32 value); |
| 60 | void WriteExpGolombCodedUInt(u32 value); | 60 | void WriteExpGolombCodedUInt(u32 value); |
| 61 | s32 GetFreeBufferBits(); | 61 | [[nodiscard]] s32 GetFreeBufferBits(); |
| 62 | void Flush(); | 62 | void Flush(); |
| 63 | 63 | ||
| 64 | s32 buffer_size{8}; | 64 | s32 buffer_size{8}; |
| @@ -74,8 +74,8 @@ public: | |||
| 74 | ~H264(); | 74 | ~H264(); |
| 75 | 75 | ||
| 76 | /// Compose the H264 header of the frame for FFmpeg decoding | 76 | /// Compose the H264 header of the frame for FFmpeg decoding |
| 77 | std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, | 77 | [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state, |
| 78 | bool is_first_frame = false); | 78 | bool is_first_frame = false); |
| 79 | 79 | ||
| 80 | private: | 80 | private: |
| 81 | struct H264ParameterSet { | 81 | struct H264ParameterSet { |
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 42520f856..ab44fdc9e 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp | |||
| @@ -854,7 +854,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | |||
| 854 | return uncomp_writer; | 854 | return uncomp_writer; |
| 855 | } | 855 | } |
| 856 | 856 | ||
| 857 | std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { | 857 | const std::vector<u8>& VP9::ComposeFrameHeader(NvdecCommon::NvdecRegisters& state) { |
| 858 | std::vector<u8> bitstream; | 858 | std::vector<u8> bitstream; |
| 859 | { | 859 | { |
| 860 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); | 860 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); |
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/command_classes/codecs/vp9.h index 05c9682fa..e2504512c 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/command_classes/codecs/vp9.h | |||
| @@ -119,7 +119,7 @@ public: | |||
| 119 | 119 | ||
| 120 | /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec | 120 | /// Composes the VP9 frame from the GPU state information. Based on the official VP9 spec |
| 121 | /// documentation | 121 | /// documentation |
| 122 | std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); | 122 | [[nodiscard]] const std::vector<u8>& ComposeFrameHeader(NvdecCommon::NvdecRegisters& state); |
| 123 | 123 | ||
| 124 | /// Returns true if the most recent frame was a hidden frame. | 124 | /// Returns true if the most recent frame was a hidden frame. |
| 125 | [[nodiscard]] bool WasFrameHidden() const { | 125 | [[nodiscard]] bool WasFrameHidden() const { |
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index a50acf6e8..4f0b05d22 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h | |||
| @@ -231,9 +231,8 @@ struct PictureInfo { | |||
| 231 | u32 surface_params{}; | 231 | u32 surface_params{}; |
| 232 | INSERT_PADDING_WORDS(3); | 232 | INSERT_PADDING_WORDS(3); |
| 233 | 233 | ||
| 234 | Vp9PictureInfo Convert() const { | 234 | [[nodiscard]] Vp9PictureInfo Convert() const { |
| 235 | 235 | return { | |
| 236 | return Vp9PictureInfo{ | ||
| 237 | .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, | 236 | .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, |
| 238 | .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, | 237 | .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, |
| 239 | .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, | 238 | .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, |
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index af14f9857..eec4443f9 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h | |||
| @@ -26,8 +26,8 @@ public: | |||
| 26 | void ProcessMethod(Method method, const std::vector<u32>& arguments); | 26 | void ProcessMethod(Method method, const std::vector<u32>& arguments); |
| 27 | 27 | ||
| 28 | /// Return most recently decoded frame | 28 | /// Return most recently decoded frame |
| 29 | AVFrame* GetFrame(); | 29 | [[nodiscard]] AVFrame* GetFrame(); |
| 30 | const AVFrame* GetFrame() const; | 30 | [[nodiscard]] const AVFrame* GetFrame() const; |
| 31 | 31 | ||
| 32 | private: | 32 | private: |
| 33 | /// Invoke codec to decode a frame | 33 | /// Invoke codec to decode a frame |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index f2f96ac33..105b85a92 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/cityhash.h" | ||
| 5 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/memory.h" | 8 | #include "core/memory.h" |
| @@ -12,6 +13,20 @@ | |||
| 12 | 13 | ||
| 13 | namespace Tegra { | 14 | namespace Tegra { |
| 14 | 15 | ||
| 16 | void CommandList::RefreshIntegrityChecks(GPU& gpu) { | ||
| 17 | command_list_hashes.resize(command_lists.size()); | ||
| 18 | |||
| 19 | for (std::size_t index = 0; index < command_lists.size(); ++index) { | ||
| 20 | const CommandListHeader command_list_header = command_lists[index]; | ||
| 21 | std::vector<CommandHeader> command_headers(command_list_header.size); | ||
| 22 | gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), | ||
| 23 | command_list_header.size * sizeof(u32)); | ||
| 24 | command_list_hashes[index] = | ||
| 25 | Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||
| 26 | command_list_header.size * sizeof(u32)); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 15 | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} | 30 | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} |
| 16 | 31 | ||
| 17 | DmaPusher::~DmaPusher() = default; | 32 | DmaPusher::~DmaPusher() = default; |
| @@ -45,32 +60,51 @@ bool DmaPusher::Step() { | |||
| 45 | return false; | 60 | return false; |
| 46 | } | 61 | } |
| 47 | 62 | ||
| 48 | const CommandList& command_list{dma_pushbuffer.front()}; | 63 | CommandList& command_list{dma_pushbuffer.front()}; |
| 49 | ASSERT_OR_EXECUTE(!command_list.empty(), { | ||
| 50 | // Somehow the command_list is empty, in order to avoid a crash | ||
| 51 | // We ignore it and assume its size is 0. | ||
| 52 | dma_pushbuffer.pop(); | ||
| 53 | dma_pushbuffer_subindex = 0; | ||
| 54 | return true; | ||
| 55 | }); | ||
| 56 | const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; | ||
| 57 | const GPUVAddr dma_get = command_list_header.addr; | ||
| 58 | |||
| 59 | if (dma_pushbuffer_subindex >= command_list.size()) { | ||
| 60 | // We've gone through the current list, remove it from the queue | ||
| 61 | dma_pushbuffer.pop(); | ||
| 62 | dma_pushbuffer_subindex = 0; | ||
| 63 | } | ||
| 64 | 64 | ||
| 65 | if (command_list_header.size == 0) { | 65 | ASSERT_OR_EXECUTE( |
| 66 | return true; | 66 | command_list.command_lists.size() || command_list.prefetch_command_list.size(), { |
| 67 | } | 67 | // Somehow the command_list is empty, in order to avoid a crash |
| 68 | // We ignore it and assume its size is 0. | ||
| 69 | dma_pushbuffer.pop(); | ||
| 70 | dma_pushbuffer_subindex = 0; | ||
| 71 | return true; | ||
| 72 | }); | ||
| 68 | 73 | ||
| 69 | // Push buffer non-empty, read a word | 74 | if (command_list.prefetch_command_list.size()) { |
| 70 | command_headers.resize(command_list_header.size); | 75 | // Prefetched command list from nvdrv, used for things like synchronization |
| 71 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 76 | command_headers = std::move(command_list.prefetch_command_list); |
| 72 | command_list_header.size * sizeof(u32)); | 77 | dma_pushbuffer.pop(); |
| 78 | } else { | ||
| 79 | const CommandListHeader command_list_header{ | ||
| 80 | command_list.command_lists[dma_pushbuffer_subindex]}; | ||
| 81 | const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; | ||
| 82 | const GPUVAddr dma_get = command_list_header.addr; | ||
| 83 | |||
| 84 | if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { | ||
| 85 | // We've gone through the current list, remove it from the queue | ||
| 86 | dma_pushbuffer.pop(); | ||
| 87 | dma_pushbuffer_subindex = 0; | ||
| 88 | } | ||
| 73 | 89 | ||
| 90 | if (command_list_header.size == 0) { | ||
| 91 | return true; | ||
| 92 | } | ||
| 93 | |||
| 94 | // Push buffer non-empty, read a word | ||
| 95 | command_headers.resize(command_list_header.size); | ||
| 96 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | ||
| 97 | command_list_header.size * sizeof(u32)); | ||
| 98 | |||
| 99 | // Integrity check | ||
| 100 | const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||
| 101 | command_list_header.size * sizeof(u32)); | ||
| 102 | if (new_hash != next_hash) { | ||
| 103 | LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); | ||
| 104 | dma_pushbuffer.pop(); | ||
| 105 | return true; | ||
| 106 | } | ||
| 107 | } | ||
| 74 | for (std::size_t index = 0; index < command_headers.size();) { | 108 | for (std::size_t index = 0; index < command_headers.size();) { |
| 75 | const CommandHeader& command_header = command_headers[index]; | 109 | const CommandHeader& command_header = command_headers[index]; |
| 76 | 110 | ||
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index efa90d170..8496ba2da 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -27,6 +27,31 @@ enum class SubmissionMode : u32 { | |||
| 27 | IncreaseOnce = 5 | 27 | IncreaseOnce = 5 |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 31 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 32 | // So the values you see in docs might be multiplied by 4. | ||
| 33 | enum class BufferMethods : u32 { | ||
| 34 | BindObject = 0x0, | ||
| 35 | Nop = 0x2, | ||
| 36 | SemaphoreAddressHigh = 0x4, | ||
| 37 | SemaphoreAddressLow = 0x5, | ||
| 38 | SemaphoreSequence = 0x6, | ||
| 39 | SemaphoreTrigger = 0x7, | ||
| 40 | NotifyIntr = 0x8, | ||
| 41 | WrcacheFlush = 0x9, | ||
| 42 | Unk28 = 0xA, | ||
| 43 | UnkCacheFlush = 0xB, | ||
| 44 | RefCnt = 0x14, | ||
| 45 | SemaphoreAcquire = 0x1A, | ||
| 46 | SemaphoreRelease = 0x1B, | ||
| 47 | FenceValue = 0x1C, | ||
| 48 | FenceAction = 0x1D, | ||
| 49 | WaitForInterrupt = 0x1E, | ||
| 50 | Unk7c = 0x1F, | ||
| 51 | Yield = 0x20, | ||
| 52 | NonPullerMethods = 0x40, | ||
| 53 | }; | ||
| 54 | |||
| 30 | struct CommandListHeader { | 55 | struct CommandListHeader { |
| 31 | union { | 56 | union { |
| 32 | u64 raw; | 57 | u64 raw; |
| @@ -49,9 +74,29 @@ union CommandHeader { | |||
| 49 | static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); | 74 | static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); |
| 50 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | 75 | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); |
| 51 | 76 | ||
| 77 | static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, | ||
| 78 | SubmissionMode mode) { | ||
| 79 | CommandHeader result{}; | ||
| 80 | result.method.Assign(static_cast<u32>(method)); | ||
| 81 | result.arg_count.Assign(arg_count); | ||
| 82 | result.mode.Assign(mode); | ||
| 83 | return result; | ||
| 84 | } | ||
| 85 | |||
| 52 | class GPU; | 86 | class GPU; |
| 53 | 87 | ||
| 54 | using CommandList = std::vector<Tegra::CommandListHeader>; | 88 | struct CommandList final { |
| 89 | CommandList() = default; | ||
| 90 | explicit CommandList(std::size_t size) : command_lists(size) {} | ||
| 91 | explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) | ||
| 92 | : prefetch_command_list{std::move(prefetch_command_list)} {} | ||
| 93 | |||
| 94 | void RefreshIntegrityChecks(GPU& gpu); | ||
| 95 | |||
| 96 | std::vector<Tegra::CommandListHeader> command_lists; | ||
| 97 | std::vector<u64> command_list_hashes; | ||
| 98 | std::vector<Tegra::CommandHeader> prefetch_command_list; | ||
| 99 | }; | ||
| 55 | 100 | ||
| 56 | /** | 101 | /** |
| 57 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the | 102 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |
| @@ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; | |||
| 60 | * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for | 105 | * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for |
| 61 | * details on this implementation. | 106 | * details on this implementation. |
| 62 | */ | 107 | */ |
| 63 | class DmaPusher { | 108 | class DmaPusher final { |
| 64 | public: | 109 | public: |
| 65 | explicit DmaPusher(Core::System& system, GPU& gpu); | 110 | explicit DmaPusher(Core::System& system, GPU& gpu); |
| 66 | ~DmaPusher(); | 111 | ~DmaPusher(); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 171f78183..ebd149c3a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -194,30 +194,6 @@ void GPU::SyncGuestHost() { | |||
| 194 | void GPU::OnCommandListEnd() { | 194 | void GPU::OnCommandListEnd() { |
| 195 | renderer->Rasterizer().ReleaseFences(); | 195 | renderer->Rasterizer().ReleaseFences(); |
| 196 | } | 196 | } |
| 197 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 198 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 199 | // So the values you see in docs might be multiplied by 4. | ||
| 200 | enum class BufferMethods { | ||
| 201 | BindObject = 0x0, | ||
| 202 | Nop = 0x2, | ||
| 203 | SemaphoreAddressHigh = 0x4, | ||
| 204 | SemaphoreAddressLow = 0x5, | ||
| 205 | SemaphoreSequence = 0x6, | ||
| 206 | SemaphoreTrigger = 0x7, | ||
| 207 | NotifyIntr = 0x8, | ||
| 208 | WrcacheFlush = 0x9, | ||
| 209 | Unk28 = 0xA, | ||
| 210 | UnkCacheFlush = 0xB, | ||
| 211 | RefCnt = 0x14, | ||
| 212 | SemaphoreAcquire = 0x1A, | ||
| 213 | SemaphoreRelease = 0x1B, | ||
| 214 | FenceValue = 0x1C, | ||
| 215 | FenceAction = 0x1D, | ||
| 216 | Unk78 = 0x1E, | ||
| 217 | Unk7c = 0x1F, | ||
| 218 | Yield = 0x20, | ||
| 219 | NonPullerMethods = 0x40, | ||
| 220 | }; | ||
| 221 | 197 | ||
| 222 | enum class GpuSemaphoreOperation { | 198 | enum class GpuSemaphoreOperation { |
| 223 | AcquireEqual = 0x1, | 199 | AcquireEqual = 0x1, |
| @@ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 277 | case BufferMethods::UnkCacheFlush: | 253 | case BufferMethods::UnkCacheFlush: |
| 278 | case BufferMethods::WrcacheFlush: | 254 | case BufferMethods::WrcacheFlush: |
| 279 | case BufferMethods::FenceValue: | 255 | case BufferMethods::FenceValue: |
| 256 | break; | ||
| 280 | case BufferMethods::FenceAction: | 257 | case BufferMethods::FenceAction: |
| 258 | ProcessFenceActionMethod(); | ||
| 259 | break; | ||
| 260 | case BufferMethods::WaitForInterrupt: | ||
| 261 | ProcessWaitForInterruptMethod(); | ||
| 281 | break; | 262 | break; |
| 282 | case BufferMethods::SemaphoreTrigger: { | 263 | case BufferMethods::SemaphoreTrigger: { |
| 283 | ProcessSemaphoreTriggerMethod(); | 264 | ProcessSemaphoreTriggerMethod(); |
| @@ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | |||
| 391 | } | 372 | } |
| 392 | } | 373 | } |
| 393 | 374 | ||
| 375 | void GPU::ProcessFenceActionMethod() { | ||
| 376 | switch (regs.fence_action.op) { | ||
| 377 | case FenceOperation::Acquire: | ||
| 378 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 379 | break; | ||
| 380 | case FenceOperation::Increment: | ||
| 381 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 382 | break; | ||
| 383 | default: | ||
| 384 | UNIMPLEMENTED_MSG("Unimplemented operation {}", | ||
| 385 | static_cast<u32>(regs.fence_action.op.Value())); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | |||
| 389 | void GPU::ProcessWaitForInterruptMethod() { | ||
| 390 | // TODO(bunnei) ImplementMe | ||
| 391 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void GPU::ProcessSemaphoreTriggerMethod() { | 394 | void GPU::ProcessSemaphoreTriggerMethod() { |
| 395 | const auto semaphoreOperationMask = 0xF; | 395 | const auto semaphoreOperationMask = 0xF; |
| 396 | const auto op = | 396 | const auto op = |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b8c613b11..5444b49f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -263,6 +263,24 @@ public: | |||
| 263 | return use_nvdec; | 263 | return use_nvdec; |
| 264 | } | 264 | } |
| 265 | 265 | ||
| 266 | enum class FenceOperation : u32 { | ||
| 267 | Acquire = 0, | ||
| 268 | Increment = 1, | ||
| 269 | }; | ||
| 270 | |||
| 271 | union FenceAction { | ||
| 272 | u32 raw; | ||
| 273 | BitField<0, 1, FenceOperation> op; | ||
| 274 | BitField<8, 24, u32> syncpoint_id; | ||
| 275 | |||
| 276 | static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 277 | FenceAction result{}; | ||
| 278 | result.op.Assign(op); | ||
| 279 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 280 | return {result.raw}; | ||
| 281 | } | ||
| 282 | }; | ||
| 283 | |||
| 266 | struct Regs { | 284 | struct Regs { |
| 267 | static constexpr size_t NUM_REGS = 0x40; | 285 | static constexpr size_t NUM_REGS = 0x40; |
| 268 | 286 | ||
| @@ -291,10 +309,7 @@ public: | |||
| 291 | u32 semaphore_acquire; | 309 | u32 semaphore_acquire; |
| 292 | u32 semaphore_release; | 310 | u32 semaphore_release; |
| 293 | u32 fence_value; | 311 | u32 fence_value; |
| 294 | union { | 312 | FenceAction fence_action; |
| 295 | BitField<4, 4, u32> operation; | ||
| 296 | BitField<8, 8, u32> id; | ||
| 297 | } fence_action; | ||
| 298 | INSERT_UNION_PADDING_WORDS(0xE2); | 313 | INSERT_UNION_PADDING_WORDS(0xE2); |
| 299 | 314 | ||
| 300 | // Puller state | 315 | // Puller state |
| @@ -342,6 +357,8 @@ protected: | |||
| 342 | 357 | ||
| 343 | private: | 358 | private: |
| 344 | void ProcessBindMethod(const MethodCall& method_call); | 359 | void ProcessBindMethod(const MethodCall& method_call); |
| 360 | void ProcessFenceActionMethod(); | ||
| 361 | void ProcessWaitForInterruptMethod(); | ||
| 345 | void ProcessSemaphoreTriggerMethod(); | 362 | void ProcessSemaphoreTriggerMethod(); |
| 346 | void ProcessSemaphoreRelease(); | 363 | void ProcessSemaphoreRelease(); |
| 347 | void ProcessSemaphoreAcquire(); | 364 | void ProcessSemaphoreAcquire(); |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index e1217ca83..f34ed6735 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -771,13 +771,18 @@ void VKDevice::CollectTelemetryParameters() { | |||
| 771 | VkPhysicalDeviceDriverPropertiesKHR driver{ | 771 | VkPhysicalDeviceDriverPropertiesKHR driver{ |
| 772 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, | 772 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, |
| 773 | .pNext = nullptr, | 773 | .pNext = nullptr, |
| 774 | .driverID = {}, | ||
| 775 | .driverName = {}, | ||
| 776 | .driverInfo = {}, | ||
| 777 | .conformanceVersion = {}, | ||
| 774 | }; | 778 | }; |
| 775 | 779 | ||
| 776 | VkPhysicalDeviceProperties2KHR properties{ | 780 | VkPhysicalDeviceProperties2KHR device_properties{ |
| 777 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, | 781 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, |
| 778 | .pNext = &driver, | 782 | .pNext = &driver, |
| 783 | .properties = {}, | ||
| 779 | }; | 784 | }; |
| 780 | physical.GetProperties2KHR(properties); | 785 | physical.GetProperties2KHR(device_properties); |
| 781 | 786 | ||
| 782 | driver_id = driver.driverID; | 787 | driver_id = driver.driverID; |
| 783 | vendor_name = driver.driverName; | 788 | vendor_name = driver.driverName; |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 696eaeb5f..0e8f9c352 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -159,6 +159,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||
| 159 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 159 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| 160 | .pNext = nullptr, | 160 | .pNext = nullptr, |
| 161 | .flags = 0, | 161 | .flags = 0, |
| 162 | .codeSize = 0, | ||
| 162 | }; | 163 | }; |
| 163 | 164 | ||
| 164 | std::vector<vk::ShaderModule> modules; | 165 | std::vector<vk::ShaderModule> modules; |
| @@ -388,6 +389,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 388 | .logicOp = VK_LOGIC_OP_COPY, | 389 | .logicOp = VK_LOGIC_OP_COPY, |
| 389 | .attachmentCount = static_cast<u32>(num_attachments), | 390 | .attachmentCount = static_cast<u32>(num_attachments), |
| 390 | .pAttachments = cb_attachments.data(), | 391 | .pAttachments = cb_attachments.data(), |
| 392 | .blendConstants = {}, | ||
| 391 | }; | 393 | }; |
| 392 | 394 | ||
| 393 | std::vector dynamic_states{ | 395 | std::vector dynamic_states{ |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e8515321b..13dd16356 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -240,6 +240,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 240 | .is_tiled = is_tiled, | 240 | .is_tiled = is_tiled, |
| 241 | .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | 241 | .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || |
| 242 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, | 242 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, |
| 243 | .is_layered = false, | ||
| 243 | .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, | 244 | .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, |
| 244 | .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, | 245 | .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, |
| 245 | .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, | 246 | .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, |