diff options
| author | 2019-07-26 14:26:44 -0400 | |
|---|---|---|
| committer | 2019-07-26 14:26:44 -0400 | |
| commit | 52f54c728d9691f113f0736fab8fbc60b408dceb (patch) | |
| tree | e02db0d667f818aacbd27e54927ef91e875eb2c2 | |
| parent | Merge pull request #2739 from lioncash/cflow (diff) | |
| parent | NVServices: Correct delayed responses. (diff) | |
| download | yuzu-52f54c728d9691f113f0736fab8fbc60b408dceb.tar.gz yuzu-52f54c728d9691f113f0736fab8fbc60b408dceb.tar.xz yuzu-52f54c728d9691f113f0736fab8fbc60b408dceb.zip | |
Merge pull request #2592 from FernandoS27/sync1
Implement GPU Synchronization Mechanisms & Correct NVFlinger
44 files changed, 732 insertions, 229 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f4325f0f8..5462decee 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -111,6 +111,8 @@ add_library(core STATIC | |||
| 111 | frontend/scope_acquire_window_context.h | 111 | frontend/scope_acquire_window_context.h |
| 112 | gdbstub/gdbstub.cpp | 112 | gdbstub/gdbstub.cpp |
| 113 | gdbstub/gdbstub.h | 113 | gdbstub/gdbstub.h |
| 114 | hardware_interrupt_manager.cpp | ||
| 115 | hardware_interrupt_manager.h | ||
| 114 | hle/ipc.h | 116 | hle/ipc.h |
| 115 | hle/ipc_helpers.h | 117 | hle/ipc_helpers.h |
| 116 | hle/kernel/address_arbiter.cpp | 118 | hle/kernel/address_arbiter.cpp |
| @@ -372,6 +374,7 @@ add_library(core STATIC | |||
| 372 | hle/service/nvdrv/devices/nvmap.h | 374 | hle/service/nvdrv/devices/nvmap.h |
| 373 | hle/service/nvdrv/interface.cpp | 375 | hle/service/nvdrv/interface.cpp |
| 374 | hle/service/nvdrv/interface.h | 376 | hle/service/nvdrv/interface.h |
| 377 | hle/service/nvdrv/nvdata.h | ||
| 375 | hle/service/nvdrv/nvdrv.cpp | 378 | hle/service/nvdrv/nvdrv.cpp |
| 376 | hle/service/nvdrv/nvdrv.h | 379 | hle/service/nvdrv/nvdrv.h |
| 377 | hle/service/nvdrv/nvmemp.cpp | 380 | hle/service/nvdrv/nvmemp.cpp |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 4aceee785..20d64f3b0 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "core/file_sys/vfs_concat.h" | 19 | #include "core/file_sys/vfs_concat.h" |
| 20 | #include "core/file_sys/vfs_real.h" | 20 | #include "core/file_sys/vfs_real.h" |
| 21 | #include "core/gdbstub/gdbstub.h" | 21 | #include "core/gdbstub/gdbstub.h" |
| 22 | #include "core/hardware_interrupt_manager.h" | ||
| 22 | #include "core/hle/kernel/client_port.h" | 23 | #include "core/hle/kernel/client_port.h" |
| 23 | #include "core/hle/kernel/kernel.h" | 24 | #include "core/hle/kernel/kernel.h" |
| 24 | #include "core/hle/kernel/process.h" | 25 | #include "core/hle/kernel/process.h" |
| @@ -151,7 +152,7 @@ struct System::Impl { | |||
| 151 | if (!renderer->Init()) { | 152 | if (!renderer->Init()) { |
| 152 | return ResultStatus::ErrorVideoCore; | 153 | return ResultStatus::ErrorVideoCore; |
| 153 | } | 154 | } |
| 154 | 155 | interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | |
| 155 | gpu_core = VideoCore::CreateGPU(system); | 156 | gpu_core = VideoCore::CreateGPU(system); |
| 156 | 157 | ||
| 157 | is_powered_on = true; | 158 | is_powered_on = true; |
| @@ -298,6 +299,7 @@ struct System::Impl { | |||
| 298 | std::unique_ptr<VideoCore::RendererBase> renderer; | 299 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 299 | std::unique_ptr<Tegra::GPU> gpu_core; | 300 | std::unique_ptr<Tegra::GPU> gpu_core; |
| 300 | std::shared_ptr<Tegra::DebugContext> debug_context; | 301 | std::shared_ptr<Tegra::DebugContext> debug_context; |
| 302 | std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager; | ||
| 301 | CpuCoreManager cpu_core_manager; | 303 | CpuCoreManager cpu_core_manager; |
| 302 | bool is_powered_on = false; | 304 | bool is_powered_on = false; |
| 303 | 305 | ||
| @@ -444,6 +446,14 @@ const Tegra::GPU& System::GPU() const { | |||
| 444 | return *impl->gpu_core; | 446 | return *impl->gpu_core; |
| 445 | } | 447 | } |
| 446 | 448 | ||
| 449 | Core::Hardware::InterruptManager& System::InterruptManager() { | ||
| 450 | return *impl->interrupt_manager; | ||
| 451 | } | ||
| 452 | |||
| 453 | const Core::Hardware::InterruptManager& System::InterruptManager() const { | ||
| 454 | return *impl->interrupt_manager; | ||
| 455 | } | ||
| 456 | |||
| 447 | VideoCore::RendererBase& System::Renderer() { | 457 | VideoCore::RendererBase& System::Renderer() { |
| 448 | return *impl->renderer; | 458 | return *impl->renderer; |
| 449 | } | 459 | } |
diff --git a/src/core/core.h b/src/core/core.h index 8ebb385ac..0138d93b0 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -70,6 +70,10 @@ namespace Core::Timing { | |||
| 70 | class CoreTiming; | 70 | class CoreTiming; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | namespace Core::Hardware { | ||
| 74 | class InterruptManager; | ||
| 75 | } | ||
| 76 | |||
| 73 | namespace Core { | 77 | namespace Core { |
| 74 | 78 | ||
| 75 | class ARM_Interface; | 79 | class ARM_Interface; |
| @@ -234,6 +238,12 @@ public: | |||
| 234 | /// Provides a constant reference to the core timing instance. | 238 | /// Provides a constant reference to the core timing instance. |
| 235 | const Timing::CoreTiming& CoreTiming() const; | 239 | const Timing::CoreTiming& CoreTiming() const; |
| 236 | 240 | ||
| 241 | /// Provides a reference to the interrupt manager instance. | ||
| 242 | Core::Hardware::InterruptManager& InterruptManager(); | ||
| 243 | |||
| 244 | /// Provides a constant reference to the interrupt manager instance. | ||
| 245 | const Core::Hardware::InterruptManager& InterruptManager() const; | ||
| 246 | |||
| 237 | /// Provides a reference to the kernel instance. | 247 | /// Provides a reference to the kernel instance. |
| 238 | Kernel::KernelCore& Kernel(); | 248 | Kernel::KernelCore& Kernel(); |
| 239 | 249 | ||
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp new file mode 100644 index 000000000..c2115db2d --- /dev/null +++ b/src/core/hardware_interrupt_manager.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/hardware_interrupt_manager.h" | ||
| 8 | #include "core/hle/service/nvdrv/interface.h" | ||
| 9 | #include "core/hle/service/sm/sm.h" | ||
| 10 | |||
| 11 | namespace Core::Hardware { | ||
| 12 | |||
| 13 | InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) { | ||
| 14 | gpu_interrupt_event = | ||
| 15 | system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) { | ||
| 16 | auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv"); | ||
| 17 | const u32 syncpt = static_cast<u32>(message >> 32); | ||
| 18 | const u32 value = static_cast<u32>(message); | ||
| 19 | nvdrv->SignalGPUInterruptSyncpt(syncpt, value); | ||
| 20 | }); | ||
| 21 | } | ||
| 22 | |||
| 23 | InterruptManager::~InterruptManager() = default; | ||
| 24 | |||
| 25 | void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 26 | const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value; | ||
| 27 | system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg); | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Core::Hardware | ||
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h new file mode 100644 index 000000000..494db883a --- /dev/null +++ b/src/core/hardware_interrupt_manager.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2019 Yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Core { | ||
| 10 | class System; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Core::Timing { | ||
| 14 | struct EventType; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Core::Hardware { | ||
| 18 | |||
| 19 | class InterruptManager { | ||
| 20 | public: | ||
| 21 | explicit InterruptManager(Core::System& system); | ||
| 22 | ~InterruptManager(); | ||
| 23 | |||
| 24 | void GPUInterruptSyncpt(u32 syncpoint_id, u32 value); | ||
| 25 | |||
| 26 | private: | ||
| 27 | Core::System& system; | ||
| 28 | Core::Timing::EventType* gpu_interrupt_event{}; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Core::Hardware | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index 4f6042b00..5b8248433 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h | |||
| @@ -8,6 +8,11 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 11 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 12 | |||
| 13 | namespace Core { | ||
| 14 | class System; | ||
| 15 | } | ||
| 11 | 16 | ||
| 12 | namespace Service::Nvidia::Devices { | 17 | namespace Service::Nvidia::Devices { |
| 13 | 18 | ||
| @@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices { | |||
| 15 | /// implement the ioctl interface. | 20 | /// implement the ioctl interface. |
| 16 | class nvdevice { | 21 | class nvdevice { |
| 17 | public: | 22 | public: |
| 18 | nvdevice() = default; | 23 | explicit nvdevice(Core::System& system) : system{system} {}; |
| 19 | virtual ~nvdevice() = default; | 24 | virtual ~nvdevice() = default; |
| 20 | union Ioctl { | 25 | union Ioctl { |
| 21 | u32_le raw; | 26 | u32_le raw; |
| @@ -33,7 +38,11 @@ public: | |||
| 33 | * @param output A buffer where the output data will be written to. | 38 | * @param output A buffer where the output data will be written to. |
| 34 | * @returns The result code of the ioctl. | 39 | * @returns The result code of the ioctl. |
| 35 | */ | 40 | */ |
| 36 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; | 41 | virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 42 | IoctlCtrl& ctrl) = 0; | ||
| 43 | |||
| 44 | protected: | ||
| 45 | Core::System& system; | ||
| 37 | }; | 46 | }; |
| 38 | 47 | ||
| 39 | } // namespace Service::Nvidia::Devices | 48 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 20c7c39aa..76494f0b7 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvdisp_disp0 ::~nvdisp_disp0() = default; | 18 | nvdisp_disp0 ::~nvdisp_disp0() = default; |
| 18 | 19 | ||
| 19 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 22 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 21 | return 0; | 23 | return 0; |
| 22 | } | 24 | } |
| @@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 34 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), | 36 | addr, offset, width, height, stride, static_cast<PixelFormat>(format), |
| 35 | transform, crop_rect}; | 37 | transform, crop_rect}; |
| 36 | 38 | ||
| 37 | auto& instance = Core::System::GetInstance(); | 39 | system.GetPerfStats().EndGameFrame(); |
| 38 | instance.GetPerfStats().EndGameFrame(); | 40 | system.GPU().SwapBuffers(framebuffer); |
| 39 | instance.GPU().SwapBuffers(framebuffer); | ||
| 40 | } | 41 | } |
| 41 | 42 | ||
| 42 | } // namespace Service::Nvidia::Devices | 43 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 12f3ef825..e79e490ff 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvdisp_disp0 final : public nvdevice { | 18 | class nvdisp_disp0 final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvdisp_disp0() override; | 21 | ~nvdisp_disp0() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 26 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
| 26 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, | 27 | void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index af62d33d2..24ab3f2e9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -22,10 +22,12 @@ enum { | |||
| 22 | }; | 22 | }; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 25 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 26 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 26 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 27 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 27 | 28 | ||
| 28 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 29 | u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 30 | IoctlCtrl& ctrl) { | ||
| 29 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 31 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 30 | command.raw, input.size(), output.size()); | 32 | command.raw, input.size(), output.size()); |
| 31 | 33 | ||
| @@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>& | |||
| 65 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, | 67 | LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, |
| 66 | params.page_size, params.flags); | 68 | params.page_size, params.flags); |
| 67 | 69 | ||
| 68 | auto& gpu = Core::System::GetInstance().GPU(); | 70 | auto& gpu = system.GPU(); |
| 69 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 71 | const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; |
| 70 | if (params.flags & 1) { | 72 | if (params.flags & 1) { |
| 71 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); | 73 | params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); |
| @@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 85 | std::vector<IoctlRemapEntry> entries(num_entries); | 87 | std::vector<IoctlRemapEntry> entries(num_entries); |
| 86 | std::memcpy(entries.data(), input.data(), input.size()); | 88 | std::memcpy(entries.data(), input.data(), input.size()); |
| 87 | 89 | ||
| 88 | auto& gpu = Core::System::GetInstance().GPU(); | 90 | auto& gpu = system.GPU(); |
| 89 | for (const auto& entry : entries) { | 91 | for (const auto& entry : entries) { |
| 90 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", | 92 | LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", |
| 91 | entry.offset, entry.nvmap_handle, entry.pages); | 93 | entry.offset, entry.nvmap_handle, entry.pages); |
| @@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 136 | // case to prevent unexpected behavior. | 138 | // case to prevent unexpected behavior. |
| 137 | ASSERT(object->id == params.nvmap_handle); | 139 | ASSERT(object->id == params.nvmap_handle); |
| 138 | 140 | ||
| 139 | auto& gpu = Core::System::GetInstance().GPU(); | 141 | auto& gpu = system.GPU(); |
| 140 | 142 | ||
| 141 | if (params.flags & 1) { | 143 | if (params.flags & 1) { |
| 142 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); | 144 | params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); |
| @@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 173 | return 0; | 175 | return 0; |
| 174 | } | 176 | } |
| 175 | 177 | ||
| 176 | params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, | 178 | params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 177 | itr->second.size); | ||
| 178 | buffer_mappings.erase(itr->second.offset); | 179 | buffer_mappings.erase(itr->second.offset); |
| 179 | 180 | ||
| 180 | std::memcpy(output.data(), ¶ms, output.size()); | 181 | std::memcpy(output.data(), ¶ms, output.size()); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index eb14b1da8..30ca5f4c3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -17,10 +17,11 @@ class nvmap; | |||
| 17 | 17 | ||
| 18 | class nvhost_as_gpu final : public nvdevice { | 18 | class nvhost_as_gpu final : public nvdevice { |
| 19 | public: | 19 | public: |
| 20 | explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); | 20 | explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 21 | ~nvhost_as_gpu() override; | 21 | ~nvhost_as_gpu() override; |
| 22 | 22 | ||
| 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 23 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 24 | IoctlCtrl& ctrl) override; | ||
| 24 | 25 | ||
| 25 | private: | 26 | private: |
| 26 | enum class IoctlCommand : u32_le { | 27 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b39fb9ef9..9a66a5f88 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -7,14 +7,20 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/core.h" | ||
| 11 | #include "core/hle/kernel/readable_event.h" | ||
| 12 | #include "core/hle/kernel/writable_event.h" | ||
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" |
| 14 | #include "video_core/gpu.h" | ||
| 11 | 15 | ||
| 12 | namespace Service::Nvidia::Devices { | 16 | namespace Service::Nvidia::Devices { |
| 13 | 17 | ||
| 14 | nvhost_ctrl::nvhost_ctrl() = default; | 18 | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) |
| 19 | : nvdevice(system), events_interface{events_interface} {} | ||
| 15 | nvhost_ctrl::~nvhost_ctrl() = default; | 20 | nvhost_ctrl::~nvhost_ctrl() = default; |
| 16 | 21 | ||
| 17 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 22 | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 23 | IoctlCtrl& ctrl) { | ||
| 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 24 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 19 | command.raw, input.size(), output.size()); | 25 | command.raw, input.size(), output.size()); |
| 20 | 26 | ||
| @@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector< | |||
| 22 | case IoctlCommand::IocGetConfigCommand: | 28 | case IoctlCommand::IocGetConfigCommand: |
| 23 | return NvOsGetConfigU32(input, output); | 29 | return NvOsGetConfigU32(input, output); |
| 24 | case IoctlCommand::IocCtrlEventWaitCommand: | 30 | case IoctlCommand::IocCtrlEventWaitCommand: |
| 25 | return IocCtrlEventWait(input, output, false); | 31 | return IocCtrlEventWait(input, output, false, ctrl); |
| 26 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: | 32 | case IoctlCommand::IocCtrlEventWaitAsyncCommand: |
| 27 | return IocCtrlEventWait(input, output, true); | 33 | return IocCtrlEventWait(input, output, true, ctrl); |
| 28 | case IoctlCommand::IocCtrlEventRegisterCommand: | 34 | case IoctlCommand::IocCtrlEventRegisterCommand: |
| 29 | return IocCtrlEventRegister(input, output); | 35 | return IocCtrlEventRegister(input, output); |
| 36 | case IoctlCommand::IocCtrlEventUnregisterCommand: | ||
| 37 | return IocCtrlEventUnregister(input, output); | ||
| 38 | case IoctlCommand::IocCtrlEventSignalCommand: | ||
| 39 | return IocCtrlEventSignal(input, output); | ||
| 30 | } | 40 | } |
| 31 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); | 41 | UNIMPLEMENTED_MSG("Unimplemented ioctl"); |
| 32 | return 0; | 42 | return 0; |
| @@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& | |||
| 41 | } | 51 | } |
| 42 | 52 | ||
| 43 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, | 53 | u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, |
| 44 | bool is_async) { | 54 | bool is_async, IoctlCtrl& ctrl) { |
| 45 | IocCtrlEventWaitParams params{}; | 55 | IocCtrlEventWaitParams params{}; |
| 46 | std::memcpy(¶ms, input.data(), sizeof(params)); | 56 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 47 | LOG_WARNING(Service_NVDRV, | 57 | LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", |
| 48 | "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", | 58 | params.syncpt_id, params.threshold, params.timeout, is_async); |
| 49 | params.syncpt_id, params.threshold, params.timeout, is_async); | ||
| 50 | 59 | ||
| 51 | // TODO(Subv): Implement actual syncpt waiting. | 60 | if (params.syncpt_id >= MaxSyncPoints) { |
| 52 | params.value = 0; | 61 | return NvResult::BadParameter; |
| 62 | } | ||
| 63 | |||
| 64 | auto& gpu = system.GPU(); | ||
| 65 | // This is mostly to take into account unimplemented features. As synced | ||
| 66 | // gpu is always synced. | ||
| 67 | if (!gpu.IsAsync()) { | ||
| 68 | return NvResult::Success; | ||
| 69 | } | ||
| 70 | auto lock = gpu.LockSync(); | ||
| 71 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | ||
| 72 | const s32 diff = current_syncpoint_value - params.threshold; | ||
| 73 | if (diff >= 0) { | ||
| 74 | params.value = current_syncpoint_value; | ||
| 75 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 76 | return NvResult::Success; | ||
| 77 | } | ||
| 78 | const u32 target_value = current_syncpoint_value - diff; | ||
| 79 | |||
| 80 | if (!is_async) { | ||
| 81 | params.value = 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | if (params.timeout == 0) { | ||
| 85 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 86 | return NvResult::Timeout; | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 event_id; | ||
| 90 | if (is_async) { | ||
| 91 | event_id = params.value & 0x00FF; | ||
| 92 | if (event_id >= MaxNvEvents) { | ||
| 93 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 94 | return NvResult::BadParameter; | ||
| 95 | } | ||
| 96 | } else { | ||
| 97 | if (ctrl.fresh_call) { | ||
| 98 | const auto result = events_interface.GetFreeEvent(); | ||
| 99 | if (result) { | ||
| 100 | event_id = *result; | ||
| 101 | } else { | ||
| 102 | LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||
| 103 | event_id = params.value & 0x00FF; | ||
| 104 | } | ||
| 105 | } else { | ||
| 106 | event_id = ctrl.event_id; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | EventState status = events_interface.status[event_id]; | ||
| 111 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | ||
| 112 | events_interface.SetEventStatus(event_id, EventState::Waiting); | ||
| 113 | events_interface.assigned_syncpt[event_id] = params.syncpt_id; | ||
| 114 | events_interface.assigned_value[event_id] = target_value; | ||
| 115 | if (is_async) { | ||
| 116 | params.value = params.syncpt_id << 4; | ||
| 117 | } else { | ||
| 118 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||
| 119 | } | ||
| 120 | params.value |= event_id; | ||
| 121 | events_interface.events[event_id].writable->Clear(); | ||
| 122 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||
| 123 | if (!is_async && ctrl.fresh_call) { | ||
| 124 | ctrl.must_delay = true; | ||
| 125 | ctrl.timeout = params.timeout; | ||
| 126 | ctrl.event_id = event_id; | ||
| 127 | return NvResult::Timeout; | ||
| 128 | } | ||
| 129 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 130 | return NvResult::Timeout; | ||
| 131 | } | ||
| 53 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 132 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 54 | return 0; | 133 | return NvResult::BadParameter; |
| 55 | } | 134 | } |
| 56 | 135 | ||
| 57 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { | 136 | u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { |
| 58 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); | 137 | IocCtrlEventRegisterParams params{}; |
| 59 | // TODO(bunnei): Implement this. | 138 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 60 | return 0; | 139 | const u32 event_id = params.user_event_id & 0x00FF; |
| 140 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 141 | if (event_id >= MaxNvEvents) { | ||
| 142 | return NvResult::BadParameter; | ||
| 143 | } | ||
| 144 | if (events_interface.registered[event_id]) { | ||
| 145 | return NvResult::BadParameter; | ||
| 146 | } | ||
| 147 | events_interface.RegisterEvent(event_id); | ||
| 148 | return NvResult::Success; | ||
| 149 | } | ||
| 150 | |||
| 151 | u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 152 | IocCtrlEventUnregisterParams params{}; | ||
| 153 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 154 | const u32 event_id = params.user_event_id & 0x00FF; | ||
| 155 | LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id); | ||
| 156 | if (event_id >= MaxNvEvents) { | ||
| 157 | return NvResult::BadParameter; | ||
| 158 | } | ||
| 159 | if (!events_interface.registered[event_id]) { | ||
| 160 | return NvResult::BadParameter; | ||
| 161 | } | ||
| 162 | events_interface.UnregisterEvent(event_id); | ||
| 163 | return NvResult::Success; | ||
| 164 | } | ||
| 165 | |||
| 166 | u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) { | ||
| 167 | IocCtrlEventSignalParams params{}; | ||
| 168 | std::memcpy(¶ms, input.data(), sizeof(params)); | ||
| 169 | // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization | ||
| 170 | // It is believed from RE to cancel the GPU Event. However, better research is required | ||
| 171 | u32 event_id = params.user_event_id & 0x00FF; | ||
| 172 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id); | ||
| 173 | if (event_id >= MaxNvEvents) { | ||
| 174 | return NvResult::BadParameter; | ||
| 175 | } | ||
| 176 | if (events_interface.status[event_id] == EventState::Waiting) { | ||
| 177 | auto& gpu = system.GPU(); | ||
| 178 | if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id], | ||
| 179 | events_interface.assigned_value[event_id])) { | ||
| 180 | events_interface.LiberateEvent(event_id); | ||
| 181 | events_interface.events[event_id].writable->Signal(); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | return NvResult::Success; | ||
| 61 | } | 185 | } |
| 62 | 186 | ||
| 63 | } // namespace Service::Nvidia::Devices | 187 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 6d0de2212..14e6e7e57 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -8,15 +8,17 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 11 | #include "core/hle/service/nvdrv/nvdrv.h" | ||
| 11 | 12 | ||
| 12 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 13 | 14 | ||
| 14 | class nvhost_ctrl final : public nvdevice { | 15 | class nvhost_ctrl final : public nvdevice { |
| 15 | public: | 16 | public: |
| 16 | nvhost_ctrl(); | 17 | explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); |
| 17 | ~nvhost_ctrl() override; | 18 | ~nvhost_ctrl() override; |
| 18 | 19 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 20 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) override; | ||
| 20 | 22 | ||
| 21 | private: | 23 | private: |
| 22 | enum class IoctlCommand : u32_le { | 24 | enum class IoctlCommand : u32_le { |
| @@ -132,9 +134,16 @@ private: | |||
| 132 | 134 | ||
| 133 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); | 135 | u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); |
| 134 | 136 | ||
| 135 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); | 137 | u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, |
| 138 | IoctlCtrl& ctrl); | ||
| 136 | 139 | ||
| 137 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); | 140 | u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); |
| 141 | |||
| 142 | u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 143 | |||
| 144 | u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output); | ||
| 145 | |||
| 146 | EventInterface& events_interface; | ||
| 138 | }; | 147 | }; |
| 139 | 148 | ||
| 140 | } // namespace Service::Nvidia::Devices | 149 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 0e28755bd..988effd90 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -12,10 +12,11 @@ | |||
| 12 | 12 | ||
| 13 | namespace Service::Nvidia::Devices { | 13 | namespace Service::Nvidia::Devices { |
| 14 | 14 | ||
| 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; | 15 | nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {} |
| 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; | 16 | nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; |
| 17 | 17 | ||
| 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 18 | u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 19 | IoctlCtrl& ctrl) { | ||
| 19 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 20 | command.raw, input.size(), output.size()); | 21 | command.raw, input.size(), output.size()); |
| 21 | 22 | ||
| @@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o | |||
| 185 | 186 | ||
| 186 | IoctlGetGpuTime params{}; | 187 | IoctlGetGpuTime params{}; |
| 187 | std::memcpy(¶ms, input.data(), input.size()); | 188 | std::memcpy(¶ms, input.data(), input.size()); |
| 188 | const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); | 189 | const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); |
| 189 | params.gpu_time = static_cast<u64_le>(ns.count()); | 190 | params.gpu_time = static_cast<u64_le>(ns.count()); |
| 190 | std::memcpy(output.data(), ¶ms, output.size()); | 191 | std::memcpy(output.data(), ¶ms, output.size()); |
| 191 | return 0; | 192 | return 0; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index 240435eea..2b035ae3f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_ctrl_gpu final : public nvdevice { | 14 | class nvhost_ctrl_gpu final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_ctrl_gpu(); | 16 | explicit nvhost_ctrl_gpu(Core::System& system); |
| 17 | ~nvhost_ctrl_gpu() override; | 17 | ~nvhost_ctrl_gpu() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 8ce7bc7a5..241dac881 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -13,10 +13,12 @@ | |||
| 13 | 13 | ||
| 14 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 15 | 15 | ||
| 16 | nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} | 16 | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) |
| 17 | : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} | ||
| 17 | nvhost_gpu::~nvhost_gpu() = default; | 18 | nvhost_gpu::~nvhost_gpu() = default; |
| 18 | 19 | ||
| 19 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 20 | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 21 | IoctlCtrl& ctrl) { | ||
| 20 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 22 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 21 | command.raw, input.size(), output.size()); | 23 | command.raw, input.size(), output.size()); |
| 22 | 24 | ||
| @@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 119 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | 121 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |
| 120 | params.unk3); | 122 | params.unk3); |
| 121 | 123 | ||
| 122 | params.fence_out.id = 0; | 124 | auto& gpu = system.GPU(); |
| 123 | params.fence_out.value = 0; | 125 | params.fence_out.id = assigned_syncpoints; |
| 126 | params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); | ||
| 127 | assigned_syncpoints++; | ||
| 124 | std::memcpy(output.data(), ¶ms, output.size()); | 128 | std::memcpy(output.data(), ¶ms, output.size()); |
| 125 | return 0; | 129 | return 0; |
| 126 | } | 130 | } |
| @@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 143 | IoctlSubmitGpfifo params{}; | 147 | IoctlSubmitGpfifo params{}; |
| 144 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 148 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 145 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 149 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 146 | params.address, params.num_entries, params.flags); | 150 | params.address, params.num_entries, params.flags.raw); |
| 147 | 151 | ||
| 148 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + | 152 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |
| 149 | params.num_entries * sizeof(Tegra::CommandListHeader), | 153 | params.num_entries * sizeof(Tegra::CommandListHeader), |
| @@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 153 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 157 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 154 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 158 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 155 | 159 | ||
| 156 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 160 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 161 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 162 | |||
| 163 | auto& gpu = system.GPU(); | ||
| 164 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 165 | if (params.flags.increment.Value()) { | ||
| 166 | params.fence_out.value += current_syncpoint_value; | ||
| 167 | } else { | ||
| 168 | params.fence_out.value = current_syncpoint_value; | ||
| 169 | } | ||
| 170 | gpu.PushGPUEntries(std::move(entries)); | ||
| 157 | 171 | ||
| 158 | params.fence_out.id = 0; | ||
| 159 | params.fence_out.value = 0; | ||
| 160 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 172 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| 161 | return 0; | 173 | return 0; |
| 162 | } | 174 | } |
| @@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 168 | IoctlSubmitGpfifo params{}; | 180 | IoctlSubmitGpfifo params{}; |
| 169 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | 181 | std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |
| 170 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 182 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 171 | params.address, params.num_entries, params.flags); | 183 | params.address, params.num_entries, params.flags.raw); |
| 172 | 184 | ||
| 173 | Tegra::CommandList entries(params.num_entries); | 185 | Tegra::CommandList entries(params.num_entries); |
| 174 | Memory::ReadBlock(params.address, entries.data(), | 186 | Memory::ReadBlock(params.address, entries.data(), |
| 175 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 187 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 176 | 188 | ||
| 177 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); | 189 | UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |
| 190 | UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); | ||
| 191 | |||
| 192 | auto& gpu = system.GPU(); | ||
| 193 | u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); | ||
| 194 | if (params.flags.increment.Value()) { | ||
| 195 | params.fence_out.value += current_syncpoint_value; | ||
| 196 | } else { | ||
| 197 | params.fence_out.value = current_syncpoint_value; | ||
| 198 | } | ||
| 199 | gpu.PushGPUEntries(std::move(entries)); | ||
| 178 | 200 | ||
| 179 | params.fence_out.id = 0; | ||
| 180 | params.fence_out.value = 0; | ||
| 181 | std::memcpy(output.data(), ¶ms, output.size()); | 201 | std::memcpy(output.data(), ¶ms, output.size()); |
| 182 | return 0; | 202 | return 0; |
| 183 | } | 203 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 62beb5c0c..d2e8fbae9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
| @@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | |||
| 20 | 21 | ||
| 21 | class nvhost_gpu final : public nvdevice { | 22 | class nvhost_gpu final : public nvdevice { |
| 22 | public: | 23 | public: |
| 23 | explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); | 24 | explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |
| 24 | ~nvhost_gpu() override; | 25 | ~nvhost_gpu() override; |
| 25 | 26 | ||
| 26 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 27 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 28 | IoctlCtrl& ctrl) override; | ||
| 27 | 29 | ||
| 28 | private: | 30 | private: |
| 29 | enum class IoctlCommand : u32_le { | 31 | enum class IoctlCommand : u32_le { |
| @@ -113,11 +115,7 @@ private: | |||
| 113 | static_assert(sizeof(IoctlGetErrorNotification) == 16, | 115 | static_assert(sizeof(IoctlGetErrorNotification) == 16, |
| 114 | "IoctlGetErrorNotification is incorrect size"); | 116 | "IoctlGetErrorNotification is incorrect size"); |
| 115 | 117 | ||
| 116 | struct IoctlFence { | 118 | static_assert(sizeof(Fence) == 8, "Fence is incorrect size"); |
| 117 | u32_le id; | ||
| 118 | u32_le value; | ||
| 119 | }; | ||
| 120 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size"); | ||
| 121 | 119 | ||
| 122 | struct IoctlAllocGpfifoEx { | 120 | struct IoctlAllocGpfifoEx { |
| 123 | u32_le num_entries; | 121 | u32_le num_entries; |
| @@ -132,13 +130,13 @@ private: | |||
| 132 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); | 130 | static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); |
| 133 | 131 | ||
| 134 | struct IoctlAllocGpfifoEx2 { | 132 | struct IoctlAllocGpfifoEx2 { |
| 135 | u32_le num_entries; // in | 133 | u32_le num_entries; // in |
| 136 | u32_le flags; // in | 134 | u32_le flags; // in |
| 137 | u32_le unk0; // in (1 works) | 135 | u32_le unk0; // in (1 works) |
| 138 | IoctlFence fence_out; // out | 136 | Fence fence_out; // out |
| 139 | u32_le unk1; // in | 137 | u32_le unk1; // in |
| 140 | u32_le unk2; // in | 138 | u32_le unk2; // in |
| 141 | u32_le unk3; // in | 139 | u32_le unk3; // in |
| 142 | }; | 140 | }; |
| 143 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); | 141 | static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); |
| 144 | 142 | ||
| @@ -153,10 +151,16 @@ private: | |||
| 153 | struct IoctlSubmitGpfifo { | 151 | struct IoctlSubmitGpfifo { |
| 154 | u64_le address; // pointer to gpfifo entry structs | 152 | u64_le address; // pointer to gpfifo entry structs |
| 155 | u32_le num_entries; // number of fence objects being submitted | 153 | u32_le num_entries; // number of fence objects being submitted |
| 156 | u32_le flags; | 154 | union { |
| 157 | IoctlFence fence_out; // returned new fence object for others to wait on | 155 | u32_le raw; |
| 158 | }; | 156 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list |
| 159 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), | 157 | BitField<1, 1, u32_le> add_increment; // append an increment to the list |
| 158 | BitField<2, 1, u32_le> new_hw_format; // Mostly ignored | ||
| 159 | BitField<8, 1, u32_le> increment; // increment the returned fence | ||
| 160 | } flags; | ||
| 161 | Fence fence_out; // returned new fence object for others to wait on | ||
| 162 | }; | ||
| 163 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | ||
| 160 | "IoctlSubmitGpfifo is incorrect size"); | 164 | "IoctlSubmitGpfifo is incorrect size"); |
| 161 | 165 | ||
| 162 | struct IoctlGetWaitbase { | 166 | struct IoctlGetWaitbase { |
| @@ -184,6 +188,7 @@ private: | |||
| 184 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); | 188 | u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); |
| 185 | 189 | ||
| 186 | std::shared_ptr<nvmap> nvmap_dev; | 190 | std::shared_ptr<nvmap> nvmap_dev; |
| 191 | u32 assigned_syncpoints{}; | ||
| 187 | }; | 192 | }; |
| 188 | 193 | ||
| 189 | } // namespace Service::Nvidia::Devices | 194 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index f5e8ea7c3..f572ad30f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvdec::nvhost_nvdec() = default; | 13 | nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvdec::~nvhost_nvdec() = default; | 14 | nvhost_nvdec::~nvhost_nvdec() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 0e7b284f8..2710f0511 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvdec final : public nvdevice { | 14 | class nvhost_nvdec final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvdec(); | 16 | explicit nvhost_nvdec(Core::System& system); |
| 17 | ~nvhost_nvdec() override; | 17 | ~nvhost_nvdec() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 3e0951ab0..38282956f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_nvjpg::nvhost_nvjpg() = default; | 13 | nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; | 14 | nvhost_nvjpg::~nvhost_nvjpg() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 89fd5e95e..379766693 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_nvjpg final : public nvdevice { | 14 | class nvhost_nvjpg final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_nvjpg(); | 16 | explicit nvhost_nvjpg(Core::System& system); |
| 17 | ~nvhost_nvjpg() override; | 17 | ~nvhost_nvjpg() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index d544f0f31..70e8091db 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -10,10 +10,11 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::Nvidia::Devices { | 11 | namespace Service::Nvidia::Devices { |
| 12 | 12 | ||
| 13 | nvhost_vic::nvhost_vic() = default; | 13 | nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {} |
| 14 | nvhost_vic::~nvhost_vic() = default; | 14 | nvhost_vic::~nvhost_vic() = default; |
| 15 | 15 | ||
| 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 16 | u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 17 | IoctlCtrl& ctrl) { | ||
| 17 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", | 18 | LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", |
| 18 | command.raw, input.size(), output.size()); | 19 | command.raw, input.size(), output.size()); |
| 19 | 20 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index fc24c3f9c..7d111977e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h | |||
| @@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices { | |||
| 13 | 13 | ||
| 14 | class nvhost_vic final : public nvdevice { | 14 | class nvhost_vic final : public nvdevice { |
| 15 | public: | 15 | public: |
| 16 | nvhost_vic(); | 16 | explicit nvhost_vic(Core::System& system); |
| 17 | ~nvhost_vic() override; | 17 | ~nvhost_vic() override; |
| 18 | 18 | ||
| 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 19 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 20 | IoctlCtrl& ctrl) override; | ||
| 20 | 21 | ||
| 21 | private: | 22 | private: |
| 22 | enum class IoctlCommand : u32_le { | 23 | enum class IoctlCommand : u32_le { |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 1ec796fc6..223b496b7 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -18,7 +18,7 @@ enum { | |||
| 18 | }; | 18 | }; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | nvmap::nvmap() = default; | 21 | nvmap::nvmap(Core::System& system) : nvdevice(system) {} |
| 22 | nvmap::~nvmap() = default; | 22 | nvmap::~nvmap() = default; |
| 23 | 23 | ||
| 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { | 24 | VAddr nvmap::GetObjectAddress(u32 handle) const { |
| @@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const { | |||
| 28 | return object->addr; | 28 | return object->addr; |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { | 31 | u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 32 | IoctlCtrl& ctrl) { | ||
| 32 | switch (static_cast<IoctlCommand>(command.raw)) { | 33 | switch (static_cast<IoctlCommand>(command.raw)) { |
| 33 | case IoctlCommand::Create: | 34 | case IoctlCommand::Create: |
| 34 | return IocCreate(input, output); | 35 | return IocCreate(input, output); |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 396230c19..bf4a101c2 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices { | |||
| 16 | 16 | ||
| 17 | class nvmap final : public nvdevice { | 17 | class nvmap final : public nvdevice { |
| 18 | public: | 18 | public: |
| 19 | nvmap(); | 19 | explicit nvmap(Core::System& system); |
| 20 | ~nvmap() override; | 20 | ~nvmap() override; |
| 21 | 21 | ||
| 22 | /// Returns the allocated address of an nvmap object given its handle. | 22 | /// Returns the allocated address of an nvmap object given its handle. |
| 23 | VAddr GetObjectAddress(u32 handle) const; | 23 | VAddr GetObjectAddress(u32 handle) const; |
| 24 | 24 | ||
| 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; | 25 | u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, |
| 26 | IoctlCtrl& ctrl) override; | ||
| 26 | 27 | ||
| 27 | /// Represents an nvmap object. | 28 | /// Represents an nvmap object. |
| 28 | struct Object { | 29 | struct Object { |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index b60fc748b..d5be64ed2 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -8,12 +8,18 @@ | |||
| 8 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/kernel.h" | 9 | #include "core/hle/kernel/kernel.h" |
| 10 | #include "core/hle/kernel/readable_event.h" | 10 | #include "core/hle/kernel/readable_event.h" |
| 11 | #include "core/hle/kernel/thread.h" | ||
| 11 | #include "core/hle/kernel/writable_event.h" | 12 | #include "core/hle/kernel/writable_event.h" |
| 12 | #include "core/hle/service/nvdrv/interface.h" | 13 | #include "core/hle/service/nvdrv/interface.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 13 | #include "core/hle/service/nvdrv/nvdrv.h" | 15 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 14 | 16 | ||
| 15 | namespace Service::Nvidia { | 17 | namespace Service::Nvidia { |
| 16 | 18 | ||
| 19 | void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 20 | nvdrv->SignalSyncpt(syncpoint_id, value); | ||
| 21 | } | ||
| 22 | |||
| 17 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { | 23 | void NVDRV::Open(Kernel::HLERequestContext& ctx) { |
| 18 | LOG_DEBUG(Service_NVDRV, "called"); | 24 | LOG_DEBUG(Service_NVDRV, "called"); |
| 19 | 25 | ||
| @@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) { | |||
| 36 | 42 | ||
| 37 | std::vector<u8> output(ctx.GetWriteBufferSize()); | 43 | std::vector<u8> output(ctx.GetWriteBufferSize()); |
| 38 | 44 | ||
| 45 | IoctlCtrl ctrl{}; | ||
| 46 | |||
| 47 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl); | ||
| 48 | |||
| 49 | if (ctrl.must_delay) { | ||
| 50 | ctrl.fresh_call = false; | ||
| 51 | ctx.SleepClientThread( | ||
| 52 | "NVServices::DelayedResponse", ctrl.timeout, | ||
| 53 | [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, | ||
| 54 | Kernel::ThreadWakeupReason reason) { | ||
| 55 | IoctlCtrl ctrl2{ctrl}; | ||
| 56 | std::vector<u8> output2 = output; | ||
| 57 | u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2); | ||
| 58 | ctx.WriteBuffer(output2); | ||
| 59 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 60 | rb.Push(RESULT_SUCCESS); | ||
| 61 | rb.Push(result); | ||
| 62 | }, | ||
| 63 | nvdrv->GetEventWriteable(ctrl.event_id)); | ||
| 64 | } else { | ||
| 65 | ctx.WriteBuffer(output); | ||
| 66 | } | ||
| 39 | IPC::ResponseBuilder rb{ctx, 3}; | 67 | IPC::ResponseBuilder rb{ctx, 3}; |
| 40 | rb.Push(RESULT_SUCCESS); | 68 | rb.Push(RESULT_SUCCESS); |
| 41 | rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); | 69 | rb.Push(result); |
| 42 | |||
| 43 | ctx.WriteBuffer(output); | ||
| 44 | } | 70 | } |
| 45 | 71 | ||
| 46 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { | 72 | void NVDRV::Close(Kernel::HLERequestContext& ctx) { |
| @@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) { | |||
| 66 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | 92 | void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { |
| 67 | IPC::RequestParser rp{ctx}; | 93 | IPC::RequestParser rp{ctx}; |
| 68 | u32 fd = rp.Pop<u32>(); | 94 | u32 fd = rp.Pop<u32>(); |
| 69 | u32 event_id = rp.Pop<u32>(); | 95 | // TODO(Blinkhawk): Figure the meaning of the flag at bit 16 |
| 96 | u32 event_id = rp.Pop<u32>() & 0x000000FF; | ||
| 70 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); | 97 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); |
| 71 | 98 | ||
| 72 | IPC::ResponseBuilder rb{ctx, 3, 1}; | 99 | IPC::ResponseBuilder rb{ctx, 3, 1}; |
| 73 | rb.Push(RESULT_SUCCESS); | 100 | rb.Push(RESULT_SUCCESS); |
| 74 | rb.PushCopyObjects(query_event.readable); | 101 | if (event_id < MaxNvEvents) { |
| 75 | rb.Push<u32>(0); | 102 | rb.PushCopyObjects(nvdrv->GetEvent(event_id)); |
| 103 | rb.Push<u32>(NvResult::Success); | ||
| 104 | } else { | ||
| 105 | rb.Push<u32>(0); | ||
| 106 | rb.Push<u32>(NvResult::BadParameter); | ||
| 107 | } | ||
| 76 | } | 108 | } |
| 77 | 109 | ||
| 78 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { | 110 | void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { |
| @@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name) | |||
| 127 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, | 159 | {13, &NVDRV::FinishInitialize, "FinishInitialize"}, |
| 128 | }; | 160 | }; |
| 129 | RegisterHandlers(functions); | 161 | RegisterHandlers(functions); |
| 130 | |||
| 131 | auto& kernel = Core::System::GetInstance().Kernel(); | ||
| 132 | query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, | ||
| 133 | "NVDRV::query_event"); | ||
| 134 | } | 162 | } |
| 135 | 163 | ||
| 136 | NVDRV::~NVDRV() = default; | 164 | NVDRV::~NVDRV() = default; |
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h index 5b4889910..10a0ecd52 100644 --- a/src/core/hle/service/nvdrv/interface.h +++ b/src/core/hle/service/nvdrv/interface.h | |||
| @@ -19,6 +19,8 @@ public: | |||
| 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); | 19 | NVDRV(std::shared_ptr<Module> nvdrv, const char* name); |
| 20 | ~NVDRV() override; | 20 | ~NVDRV() override; |
| 21 | 21 | ||
| 22 | void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 23 | |||
| 22 | private: | 24 | private: |
| 23 | void Open(Kernel::HLERequestContext& ctx); | 25 | void Open(Kernel::HLERequestContext& ctx); |
| 24 | void Ioctl(Kernel::HLERequestContext& ctx); | 26 | void Ioctl(Kernel::HLERequestContext& ctx); |
| @@ -33,8 +35,6 @@ private: | |||
| 33 | std::shared_ptr<Module> nvdrv; | 35 | std::shared_ptr<Module> nvdrv; |
| 34 | 36 | ||
| 35 | u64 pid{}; | 37 | u64 pid{}; |
| 36 | |||
| 37 | Kernel::EventPair query_event; | ||
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | } // namespace Service::Nvidia | 40 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h new file mode 100644 index 000000000..ac03cbc23 --- /dev/null +++ b/src/core/hle/service/nvdrv/nvdata.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | #include "common/common_types.h" | ||
| 5 | |||
| 6 | namespace Service::Nvidia { | ||
| 7 | |||
| 8 | constexpr u32 MaxSyncPoints = 192; | ||
| 9 | constexpr u32 MaxNvEvents = 64; | ||
| 10 | |||
| 11 | struct Fence { | ||
| 12 | s32 id; | ||
| 13 | u32 value; | ||
| 14 | }; | ||
| 15 | |||
| 16 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 17 | |||
| 18 | struct MultiFence { | ||
| 19 | u32 num_fences; | ||
| 20 | std::array<Fence, 4> fences; | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum NvResult : u32 { | ||
| 24 | Success = 0, | ||
| 25 | BadParameter = 4, | ||
| 26 | Timeout = 5, | ||
| 27 | ResourceError = 15, | ||
| 28 | }; | ||
| 29 | |||
| 30 | enum class EventState { | ||
| 31 | Free = 0, | ||
| 32 | Registered = 1, | ||
| 33 | Waiting = 2, | ||
| 34 | Busy = 3, | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct IoctlCtrl { | ||
| 38 | // First call done to the servioce for services that call itself again after a call. | ||
| 39 | bool fresh_call{true}; | ||
| 40 | // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep | ||
| 41 | bool must_delay{}; | ||
| 42 | // Timeout for the delay | ||
| 43 | s64 timeout{}; | ||
| 44 | // NV Event Id | ||
| 45 | s32 event_id{-1}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | } // namespace Service::Nvidia | ||
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 6e4b8f2c6..2011a226a 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #include <utility> | 5 | #include <utility> |
| 6 | 6 | ||
| 7 | #include <fmt/format.h> | ||
| 7 | #include "core/hle/ipc_helpers.h" | 8 | #include "core/hle/ipc_helpers.h" |
| 9 | #include "core/hle/kernel/readable_event.h" | ||
| 10 | #include "core/hle/kernel/writable_event.h" | ||
| 8 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 11 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 10 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | 13 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" |
| @@ -22,8 +25,9 @@ | |||
| 22 | 25 | ||
| 23 | namespace Service::Nvidia { | 26 | namespace Service::Nvidia { |
| 24 | 27 | ||
| 25 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { | 28 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 26 | auto module_ = std::make_shared<Module>(); | 29 | Core::System& system) { |
| 30 | auto module_ = std::make_shared<Module>(system); | ||
| 27 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); | 31 | std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); |
| 28 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); | 32 | std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); |
| 29 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); | 33 | std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); |
| @@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | |||
| 32 | nvflinger.SetNVDrvInstance(module_); | 36 | nvflinger.SetNVDrvInstance(module_); |
| 33 | } | 37 | } |
| 34 | 38 | ||
| 35 | Module::Module() { | 39 | Module::Module(Core::System& system) { |
| 36 | auto nvmap_dev = std::make_shared<Devices::nvmap>(); | 40 | auto& kernel = system.Kernel(); |
| 37 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); | 41 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 38 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); | 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 39 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); | 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( |
| 44 | kernel, Kernel::ResetType::Automatic, event_label); | ||
| 45 | events_interface.status[i] = EventState::Free; | ||
| 46 | events_interface.registered[i] = false; | ||
| 47 | } | ||
| 48 | auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | ||
| 49 | devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | ||
| 50 | devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); | ||
| 51 | devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | ||
| 40 | devices["/dev/nvmap"] = nvmap_dev; | 52 | devices["/dev/nvmap"] = nvmap_dev; |
| 41 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); | 53 | devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |
| 42 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); | 54 | devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); |
| 43 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); | 55 | devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system); |
| 44 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); | 56 | devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |
| 45 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); | 57 | devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system); |
| 46 | } | 58 | } |
| 47 | 59 | ||
| 48 | Module::~Module() = default; | 60 | Module::~Module() = default; |
| @@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) { | |||
| 59 | return fd; | 71 | return fd; |
| 60 | } | 72 | } |
| 61 | 73 | ||
| 62 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { | 74 | u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 75 | IoctlCtrl& ctrl) { | ||
| 63 | auto itr = open_files.find(fd); | 76 | auto itr = open_files.find(fd); |
| 64 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); | 77 | ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); |
| 65 | 78 | ||
| 66 | auto& device = itr->second; | 79 | auto& device = itr->second; |
| 67 | return device->ioctl({command}, input, output); | 80 | return device->ioctl({command}, input, output, ctrl); |
| 68 | } | 81 | } |
| 69 | 82 | ||
| 70 | ResultCode Module::Close(u32 fd) { | 83 | ResultCode Module::Close(u32 fd) { |
| @@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) { | |||
| 77 | return RESULT_SUCCESS; | 90 | return RESULT_SUCCESS; |
| 78 | } | 91 | } |
| 79 | 92 | ||
| 93 | void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { | ||
| 94 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 95 | if (events_interface.assigned_syncpt[i] == syncpoint_id && | ||
| 96 | events_interface.assigned_value[i] == value) { | ||
| 97 | events_interface.LiberateEvent(i); | ||
| 98 | events_interface.events[i].writable->Signal(); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | ||
| 104 | return events_interface.events[event_id].readable; | ||
| 105 | } | ||
| 106 | |||
| 107 | Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | ||
| 108 | return events_interface.events[event_id].writable; | ||
| 109 | } | ||
| 110 | |||
| 80 | } // namespace Service::Nvidia | 111 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 53564f696..a339ab672 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -8,8 +8,14 @@ | |||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/hle/kernel/writable_event.h" | ||
| 12 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 11 | #include "core/hle/service/service.h" | 13 | #include "core/hle/service/service.h" |
| 12 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 13 | namespace Service::NVFlinger { | 19 | namespace Service::NVFlinger { |
| 14 | class NVFlinger; | 20 | class NVFlinger; |
| 15 | } | 21 | } |
| @@ -20,16 +26,72 @@ namespace Devices { | |||
| 20 | class nvdevice; | 26 | class nvdevice; |
| 21 | } | 27 | } |
| 22 | 28 | ||
| 23 | struct IoctlFence { | 29 | struct EventInterface { |
| 24 | u32 id; | 30 | // Mask representing currently busy events |
| 25 | u32 value; | 31 | u64 events_mask{}; |
| 32 | // Each kernel event associated to an NV event | ||
| 33 | std::array<Kernel::EventPair, MaxNvEvents> events; | ||
| 34 | // The status of the current NVEvent | ||
| 35 | std::array<EventState, MaxNvEvents> status{}; | ||
| 36 | // Tells if an NVEvent is registered or not | ||
| 37 | std::array<bool, MaxNvEvents> registered{}; | ||
| 38 | // When an NVEvent is waiting on GPU interrupt, this is the sync_point | ||
| 39 | // associated with it. | ||
| 40 | std::array<u32, MaxNvEvents> assigned_syncpt{}; | ||
| 41 | // This is the value of the GPU interrupt for which the NVEvent is waiting | ||
| 42 | // for. | ||
| 43 | std::array<u32, MaxNvEvents> assigned_value{}; | ||
| 44 | // Constant to denote an unasigned syncpoint. | ||
| 45 | static constexpr u32 unassigned_syncpt = 0xFFFFFFFF; | ||
| 46 | std::optional<u32> GetFreeEvent() const { | ||
| 47 | u64 mask = events_mask; | ||
| 48 | for (u32 i = 0; i < MaxNvEvents; i++) { | ||
| 49 | const bool is_free = (mask & 0x1) == 0; | ||
| 50 | if (is_free) { | ||
| 51 | if (status[i] == EventState::Registered || status[i] == EventState::Free) { | ||
| 52 | return {i}; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | mask = mask >> 1; | ||
| 56 | } | ||
| 57 | return {}; | ||
| 58 | } | ||
| 59 | void SetEventStatus(const u32 event_id, EventState new_status) { | ||
| 60 | EventState old_status = status[event_id]; | ||
| 61 | if (old_status == new_status) { | ||
| 62 | return; | ||
| 63 | } | ||
| 64 | status[event_id] = new_status; | ||
| 65 | if (new_status == EventState::Registered) { | ||
| 66 | registered[event_id] = true; | ||
| 67 | } | ||
| 68 | if (new_status == EventState::Waiting || new_status == EventState::Busy) { | ||
| 69 | events_mask |= (1ULL << event_id); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | void RegisterEvent(const u32 event_id) { | ||
| 73 | registered[event_id] = true; | ||
| 74 | if (status[event_id] == EventState::Free) { | ||
| 75 | status[event_id] = EventState::Registered; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | void UnregisterEvent(const u32 event_id) { | ||
| 79 | registered[event_id] = false; | ||
| 80 | if (status[event_id] == EventState::Registered) { | ||
| 81 | status[event_id] = EventState::Free; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | void LiberateEvent(const u32 event_id) { | ||
| 85 | status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free; | ||
| 86 | events_mask &= ~(1ULL << event_id); | ||
| 87 | assigned_syncpt[event_id] = unassigned_syncpt; | ||
| 88 | assigned_value[event_id] = 0; | ||
| 89 | } | ||
| 26 | }; | 90 | }; |
| 27 | 91 | ||
| 28 | static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size"); | ||
| 29 | |||
| 30 | class Module final { | 92 | class Module final { |
| 31 | public: | 93 | public: |
| 32 | Module(); | 94 | Module(Core::System& system); |
| 33 | ~Module(); | 95 | ~Module(); |
| 34 | 96 | ||
| 35 | /// Returns a pointer to one of the available devices, identified by its name. | 97 | /// Returns a pointer to one of the available devices, identified by its name. |
| @@ -44,10 +106,17 @@ public: | |||
| 44 | /// Opens a device node and returns a file descriptor to it. | 106 | /// Opens a device node and returns a file descriptor to it. |
| 45 | u32 Open(const std::string& device_name); | 107 | u32 Open(const std::string& device_name); |
| 46 | /// Sends an ioctl command to the specified file descriptor. | 108 | /// Sends an ioctl command to the specified file descriptor. |
| 47 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); | 109 | u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output, |
| 110 | IoctlCtrl& ctrl); | ||
| 48 | /// Closes a device file descriptor and returns operation success. | 111 | /// Closes a device file descriptor and returns operation success. |
| 49 | ResultCode Close(u32 fd); | 112 | ResultCode Close(u32 fd); |
| 50 | 113 | ||
| 114 | void SignalSyncpt(const u32 syncpoint_id, const u32 value); | ||
| 115 | |||
| 116 | Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const; | ||
| 117 | |||
| 118 | Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; | ||
| 119 | |||
| 51 | private: | 120 | private: |
| 52 | /// Id to use for the next open file descriptor. | 121 | /// Id to use for the next open file descriptor. |
| 53 | u32 next_fd = 1; | 122 | u32 next_fd = 1; |
| @@ -57,9 +126,12 @@ private: | |||
| 57 | 126 | ||
| 58 | /// Mapping of device node names to their implementation. | 127 | /// Mapping of device node names to their implementation. |
| 59 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; | 128 | std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; |
| 129 | |||
| 130 | EventInterface events_interface; | ||
| 60 | }; | 131 | }; |
| 61 | 132 | ||
| 62 | /// Registers all NVDRV services with the specified service manager. | 133 | /// Registers all NVDRV services with the specified service manager. |
| 63 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); | 134 | void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger, |
| 135 | Core::System& system); | ||
| 64 | 136 | ||
| 65 | } // namespace Service::Nvidia | 137 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 5731e815f..e1a07d3ee 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 34 | buffer_wait_event.writable->Signal(); | 34 | buffer_wait_event.writable->Signal(); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | 37 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, |
| 38 | u32 height) { | ||
| 38 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { | 39 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { |
| 39 | // Only consider free buffers. Buffers become free once again after they've been Acquired | 40 | // Only consider free buffers. Buffers become free once again after they've been Acquired |
| 40 | // and Released by the compositor, see the NVFlinger::Compose method. | 41 | // and Released by the compositor, see the NVFlinger::Compose method. |
| @@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { | |||
| 51 | } | 52 | } |
| 52 | 53 | ||
| 53 | itr->status = Buffer::Status::Dequeued; | 54 | itr->status = Buffer::Status::Dequeued; |
| 54 | return itr->slot; | 55 | return {{itr->slot, &itr->multi_fence}}; |
| 55 | } | 56 | } |
| 56 | 57 | ||
| 57 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | 58 | const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { |
| @@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { | |||
| 63 | } | 64 | } |
| 64 | 65 | ||
| 65 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | 66 | void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 66 | const Common::Rectangle<int>& crop_rect) { | 67 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 68 | Service::Nvidia::MultiFence& multi_fence) { | ||
| 67 | auto itr = std::find_if(queue.begin(), queue.end(), | 69 | auto itr = std::find_if(queue.begin(), queue.end(), |
| 68 | [&](const Buffer& buffer) { return buffer.slot == slot; }); | 70 | [&](const Buffer& buffer) { return buffer.slot == slot; }); |
| 69 | ASSERT(itr != queue.end()); | 71 | ASSERT(itr != queue.end()); |
| @@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, | |||
| 71 | itr->status = Buffer::Status::Queued; | 73 | itr->status = Buffer::Status::Queued; |
| 72 | itr->transform = transform; | 74 | itr->transform = transform; |
| 73 | itr->crop_rect = crop_rect; | 75 | itr->crop_rect = crop_rect; |
| 76 | itr->swap_interval = swap_interval; | ||
| 77 | itr->multi_fence = multi_fence; | ||
| 78 | queue_sequence.push_back(slot); | ||
| 74 | } | 79 | } |
| 75 | 80 | ||
| 76 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { | 81 | std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { |
| 77 | auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { | 82 | auto itr = queue.end(); |
| 78 | return buffer.status == Buffer::Status::Queued; | 83 | // Iterate to find a queued buffer matching the requested slot. |
| 79 | }); | 84 | while (itr == queue.end() && !queue_sequence.empty()) { |
| 85 | u32 slot = queue_sequence.front(); | ||
| 86 | itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) { | ||
| 87 | return buffer.status == Buffer::Status::Queued && buffer.slot == slot; | ||
| 88 | }); | ||
| 89 | queue_sequence.pop_front(); | ||
| 90 | } | ||
| 80 | if (itr == queue.end()) | 91 | if (itr == queue.end()) |
| 81 | return {}; | 92 | return {}; |
| 82 | itr->status = Buffer::Status::Acquired; | 93 | itr->status = Buffer::Status::Acquired; |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index e1ccb6171..356bedb81 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <list> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | 10 | ||
| @@ -12,6 +13,7 @@ | |||
| 12 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 13 | #include "core/hle/kernel/object.h" | 14 | #include "core/hle/kernel/object.h" |
| 14 | #include "core/hle/kernel/writable_event.h" | 15 | #include "core/hle/kernel/writable_event.h" |
| 16 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | 17 | ||
| 16 | namespace Service::NVFlinger { | 18 | namespace Service::NVFlinger { |
| 17 | 19 | ||
| @@ -68,13 +70,17 @@ public: | |||
| 68 | IGBPBuffer igbp_buffer; | 70 | IGBPBuffer igbp_buffer; |
| 69 | BufferTransformFlags transform; | 71 | BufferTransformFlags transform; |
| 70 | Common::Rectangle<int> crop_rect; | 72 | Common::Rectangle<int> crop_rect; |
| 73 | u32 swap_interval; | ||
| 74 | Service::Nvidia::MultiFence multi_fence; | ||
| 71 | }; | 75 | }; |
| 72 | 76 | ||
| 73 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); | 77 | void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); |
| 74 | std::optional<u32> DequeueBuffer(u32 width, u32 height); | 78 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width, |
| 79 | u32 height); | ||
| 75 | const IGBPBuffer& RequestBuffer(u32 slot) const; | 80 | const IGBPBuffer& RequestBuffer(u32 slot) const; |
| 76 | void QueueBuffer(u32 slot, BufferTransformFlags transform, | 81 | void QueueBuffer(u32 slot, BufferTransformFlags transform, |
| 77 | const Common::Rectangle<int>& crop_rect); | 82 | const Common::Rectangle<int>& crop_rect, u32 swap_interval, |
| 83 | Service::Nvidia::MultiFence& multi_fence); | ||
| 78 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); | 84 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); |
| 79 | void ReleaseBuffer(u32 slot); | 85 | void ReleaseBuffer(u32 slot); |
| 80 | u32 Query(QueryType type); | 86 | u32 Query(QueryType type); |
| @@ -92,6 +98,7 @@ private: | |||
| 92 | u64 layer_id; | 98 | u64 layer_id; |
| 93 | 99 | ||
| 94 | std::vector<Buffer> queue; | 100 | std::vector<Buffer> queue; |
| 101 | std::list<u32> queue_sequence; | ||
| 95 | Kernel::EventPair buffer_wait_event; | 102 | Kernel::EventPair buffer_wait_event; |
| 96 | }; | 103 | }; |
| 97 | 104 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3c5c53e24..f9db79370 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t | |||
| 37 | displays.emplace_back(4, "Null"); | 37 | displays.emplace_back(4, "Null"); |
| 38 | 38 | ||
| 39 | // Schedule the screen composition events | 39 | // Schedule the screen composition events |
| 40 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; | 40 | composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, |
| 41 | 41 | s64 cycles_late) { | |
| 42 | composition_event = core_timing.RegisterEvent( | 42 | Compose(); |
| 43 | "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { | 43 | const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); |
| 44 | Compose(); | 44 | this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event); |
| 45 | this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); | 45 | }); |
| 46 | }); | 46 | |
| 47 | 47 | core_timing.ScheduleEvent(frame_ticks, composition_event); | |
| 48 | core_timing.ScheduleEvent(ticks, composition_event); | ||
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | NVFlinger::~NVFlinger() { | 50 | NVFlinger::~NVFlinger() { |
| @@ -206,8 +205,14 @@ void NVFlinger::Compose() { | |||
| 206 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, | 205 | igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, |
| 207 | buffer->get().transform, buffer->get().crop_rect); | 206 | buffer->get().transform, buffer->get().crop_rect); |
| 208 | 207 | ||
| 208 | swap_interval = buffer->get().swap_interval; | ||
| 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); | 209 | buffer_queue.ReleaseBuffer(buffer->get().slot); |
| 210 | } | 210 | } |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | s64 NVFlinger::GetNextTicks() const { | ||
| 214 | constexpr s64 max_hertz = 120LL; | ||
| 215 | return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; | ||
| 216 | } | ||
| 217 | |||
| 213 | } // namespace Service::NVFlinger | 218 | } // namespace Service::NVFlinger |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index c0a83fffb..988be8726 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -74,6 +74,8 @@ public: | |||
| 74 | /// finished. | 74 | /// finished. |
| 75 | void Compose(); | 75 | void Compose(); |
| 76 | 76 | ||
| 77 | s64 GetNextTicks() const; | ||
| 78 | |||
| 77 | private: | 79 | private: |
| 78 | /// Finds the display identified by the specified ID. | 80 | /// Finds the display identified by the specified ID. |
| 79 | VI::Display* FindDisplay(u64 display_id); | 81 | VI::Display* FindDisplay(u64 display_id); |
| @@ -98,6 +100,8 @@ private: | |||
| 98 | /// layers. | 100 | /// layers. |
| 99 | u32 next_buffer_queue_id = 1; | 101 | u32 next_buffer_queue_id = 1; |
| 100 | 102 | ||
| 103 | u32 swap_interval = 1; | ||
| 104 | |||
| 101 | /// Event that handles screen composition. | 105 | /// Event that handles screen composition. |
| 102 | Core::Timing::EventType* composition_event; | 106 | Core::Timing::EventType* composition_event; |
| 103 | 107 | ||
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 7eefd733f..2daa1ae49 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -236,7 +236,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) { | |||
| 236 | NIM::InstallInterfaces(*sm); | 236 | NIM::InstallInterfaces(*sm); |
| 237 | NPNS::InstallInterfaces(*sm); | 237 | NPNS::InstallInterfaces(*sm); |
| 238 | NS::InstallInterfaces(*sm); | 238 | NS::InstallInterfaces(*sm); |
| 239 | Nvidia::InstallInterfaces(*sm, *nv_flinger); | 239 | Nvidia::InstallInterfaces(*sm, *nv_flinger, system); |
| 240 | PCIe::InstallInterfaces(*sm); | 240 | PCIe::InstallInterfaces(*sm); |
| 241 | PCTL::InstallInterfaces(*sm); | 241 | PCTL::InstallInterfaces(*sm); |
| 242 | PCV::InstallInterfaces(*sm); | 242 | PCV::InstallInterfaces(*sm); |
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index f1fa6ccd1..199b30635 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/hle/kernel/readable_event.h" | 21 | #include "core/hle/kernel/readable_event.h" |
| 22 | #include "core/hle/kernel/thread.h" | 22 | #include "core/hle/kernel/thread.h" |
| 23 | #include "core/hle/kernel/writable_event.h" | 23 | #include "core/hle/kernel/writable_event.h" |
| 24 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 24 | #include "core/hle/service/nvdrv/nvdrv.h" | 25 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 25 | #include "core/hle/service/nvflinger/buffer_queue.h" | 26 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 26 | #include "core/hle/service/nvflinger/nvflinger.h" | 27 | #include "core/hle/service/nvflinger/nvflinger.h" |
| @@ -328,32 +329,22 @@ public: | |||
| 328 | Data data; | 329 | Data data; |
| 329 | }; | 330 | }; |
| 330 | 331 | ||
| 331 | struct BufferProducerFence { | ||
| 332 | u32 is_valid; | ||
| 333 | std::array<Nvidia::IoctlFence, 4> fences; | ||
| 334 | }; | ||
| 335 | static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size"); | ||
| 336 | |||
| 337 | class IGBPDequeueBufferResponseParcel : public Parcel { | 332 | class IGBPDequeueBufferResponseParcel : public Parcel { |
| 338 | public: | 333 | public: |
| 339 | explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} | 334 | explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence) |
| 335 | : slot(slot), multi_fence(multi_fence) {} | ||
| 340 | ~IGBPDequeueBufferResponseParcel() override = default; | 336 | ~IGBPDequeueBufferResponseParcel() override = default; |
| 341 | 337 | ||
| 342 | protected: | 338 | protected: |
| 343 | void SerializeData() override { | 339 | void SerializeData() override { |
| 344 | // TODO(Subv): Find out how this Fence is used. | ||
| 345 | BufferProducerFence fence = {}; | ||
| 346 | fence.is_valid = 1; | ||
| 347 | for (auto& fence_ : fence.fences) | ||
| 348 | fence_.id = -1; | ||
| 349 | |||
| 350 | Write(slot); | 340 | Write(slot); |
| 351 | Write<u32_le>(1); | 341 | Write<u32_le>(1); |
| 352 | WriteObject(fence); | 342 | WriteObject(multi_fence); |
| 353 | Write<u32_le>(0); | 343 | Write<u32_le>(0); |
| 354 | } | 344 | } |
| 355 | 345 | ||
| 356 | u32_le slot; | 346 | u32_le slot; |
| 347 | Service::Nvidia::MultiFence multi_fence; | ||
| 357 | }; | 348 | }; |
| 358 | 349 | ||
| 359 | class IGBPRequestBufferRequestParcel : public Parcel { | 350 | class IGBPRequestBufferRequestParcel : public Parcel { |
| @@ -400,12 +391,6 @@ public: | |||
| 400 | data = Read<Data>(); | 391 | data = Read<Data>(); |
| 401 | } | 392 | } |
| 402 | 393 | ||
| 403 | struct Fence { | ||
| 404 | u32_le id; | ||
| 405 | u32_le value; | ||
| 406 | }; | ||
| 407 | static_assert(sizeof(Fence) == 8, "Fence has wrong size"); | ||
| 408 | |||
| 409 | struct Data { | 394 | struct Data { |
| 410 | u32_le slot; | 395 | u32_le slot; |
| 411 | INSERT_PADDING_WORDS(3); | 396 | INSERT_PADDING_WORDS(3); |
| @@ -418,15 +403,15 @@ public: | |||
| 418 | s32_le scaling_mode; | 403 | s32_le scaling_mode; |
| 419 | NVFlinger::BufferQueue::BufferTransformFlags transform; | 404 | NVFlinger::BufferQueue::BufferTransformFlags transform; |
| 420 | u32_le sticky_transform; | 405 | u32_le sticky_transform; |
| 421 | INSERT_PADDING_WORDS(2); | 406 | INSERT_PADDING_WORDS(1); |
| 422 | u32_le fence_is_valid; | 407 | u32_le swap_interval; |
| 423 | std::array<Fence, 2> fences; | 408 | Service::Nvidia::MultiFence multi_fence; |
| 424 | 409 | ||
| 425 | Common::Rectangle<int> GetCropRect() const { | 410 | Common::Rectangle<int> GetCropRect() const { |
| 426 | return {crop_left, crop_top, crop_right, crop_bottom}; | 411 | return {crop_left, crop_top, crop_right, crop_bottom}; |
| 427 | } | 412 | } |
| 428 | }; | 413 | }; |
| 429 | static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); | 414 | static_assert(sizeof(Data) == 96, "ParcelData has wrong size"); |
| 430 | 415 | ||
| 431 | Data data; | 416 | Data data; |
| 432 | }; | 417 | }; |
| @@ -547,11 +532,11 @@ private: | |||
| 547 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; | 532 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 548 | const u32 width{request.data.width}; | 533 | const u32 width{request.data.width}; |
| 549 | const u32 height{request.data.height}; | 534 | const u32 height{request.data.height}; |
| 550 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 535 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 551 | 536 | ||
| 552 | if (slot) { | 537 | if (result) { |
| 553 | // Buffer is available | 538 | // Buffer is available |
| 554 | IGBPDequeueBufferResponseParcel response{*slot}; | 539 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 555 | ctx.WriteBuffer(response.Serialize()); | 540 | ctx.WriteBuffer(response.Serialize()); |
| 556 | } else { | 541 | } else { |
| 557 | // Wait the current thread until a buffer becomes available | 542 | // Wait the current thread until a buffer becomes available |
| @@ -561,10 +546,10 @@ private: | |||
| 561 | Kernel::ThreadWakeupReason reason) { | 546 | Kernel::ThreadWakeupReason reason) { |
| 562 | // Repeat TransactParcel DequeueBuffer when a buffer is available | 547 | // Repeat TransactParcel DequeueBuffer when a buffer is available |
| 563 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); | 548 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); |
| 564 | std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); | 549 | auto result = buffer_queue.DequeueBuffer(width, height); |
| 565 | ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); | 550 | ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); |
| 566 | 551 | ||
| 567 | IGBPDequeueBufferResponseParcel response{*slot}; | 552 | IGBPDequeueBufferResponseParcel response{result->first, *result->second}; |
| 568 | ctx.WriteBuffer(response.Serialize()); | 553 | ctx.WriteBuffer(response.Serialize()); |
| 569 | IPC::ResponseBuilder rb{ctx, 2}; | 554 | IPC::ResponseBuilder rb{ctx, 2}; |
| 570 | rb.Push(RESULT_SUCCESS); | 555 | rb.Push(RESULT_SUCCESS); |
| @@ -582,7 +567,8 @@ private: | |||
| 582 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; | 567 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 583 | 568 | ||
| 584 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, | 569 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, |
| 585 | request.data.GetCropRect()); | 570 | request.data.GetCropRect(), request.data.swap_interval, |
| 571 | request.data.multi_fence); | ||
| 586 | 572 | ||
| 587 | IGBPQueueBufferResponseParcel response{1280, 720}; | 573 | IGBPQueueBufferResponseParcel response{1280, 720}; |
| 588 | ctx.WriteBuffer(response.Serialize()); | 574 | ctx.WriteBuffer(response.Serialize()); |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 74c46ec04..125c53360 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -525,8 +525,9 @@ void Maxwell3D::ProcessSyncPoint() { | |||
| 525 | const u32 sync_point = regs.sync_info.sync_point.Value(); | 525 | const u32 sync_point = regs.sync_info.sync_point.Value(); |
| 526 | const u32 increment = regs.sync_info.increment.Value(); | 526 | const u32 increment = regs.sync_info.increment.Value(); |
| 527 | const u32 cache_flush = regs.sync_info.unknown.Value(); | 527 | const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 528 | LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, | 528 | if (increment) { |
| 529 | cache_flush); | 529 | system.GPU().IncrementSyncPoint(sync_point); |
| 530 | } | ||
| 530 | } | 531 | } |
| 531 | 532 | ||
| 532 | void Maxwell3D::DrawArrays() { | 533 | void Maxwell3D::DrawArrays() { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 21007d8b2..1622332a4 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -29,7 +29,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 29 | UNREACHABLE(); | 29 | UNREACHABLE(); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { | 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 33 | : system{system}, renderer{renderer}, is_async{is_async} { | ||
| 33 | auto& rasterizer{renderer.Rasterizer()}; | 34 | auto& rasterizer{renderer.Rasterizer()}; |
| 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); | 35 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 36 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| @@ -74,6 +75,51 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 74 | return *dma_pusher; | 75 | return *dma_pusher; |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 78 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||
| 79 | syncpoints[syncpoint_id]++; | ||
| 80 | std::lock_guard lock{sync_mutex}; | ||
| 81 | if (!syncpt_interrupts[syncpoint_id].empty()) { | ||
| 82 | u32 value = syncpoints[syncpoint_id].load(); | ||
| 83 | auto it = syncpt_interrupts[syncpoint_id].begin(); | ||
| 84 | while (it != syncpt_interrupts[syncpoint_id].end()) { | ||
| 85 | if (value >= *it) { | ||
| 86 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 87 | it = syncpt_interrupts[syncpoint_id].erase(it); | ||
| 88 | continue; | ||
| 89 | } | ||
| 90 | it++; | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | ||
| 96 | return syncpoints[syncpoint_id].load(); | ||
| 97 | } | ||
| 98 | |||
| 99 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 100 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 101 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 102 | [value](u32 in_value) { return in_value == value; }); | ||
| 103 | if (contains) { | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | syncpt_interrupts[syncpoint_id].emplace_back(value); | ||
| 107 | } | ||
| 108 | |||
| 109 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | ||
| 110 | std::lock_guard lock{sync_mutex}; | ||
| 111 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | ||
| 112 | const auto iter = | ||
| 113 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 114 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 115 | |||
| 116 | if (iter == interrupt.end()) { | ||
| 117 | return false; | ||
| 118 | } | ||
| 119 | interrupt.erase(iter); | ||
| 120 | return true; | ||
| 121 | } | ||
| 122 | |||
| 77 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | 123 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { |
| 78 | ASSERT(format != RenderTargetFormat::NONE); | 124 | ASSERT(format != RenderTargetFormat::NONE); |
| 79 | 125 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 0055e5326..87c96f46b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -5,8 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <list> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | ||
| 9 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 10 | #include "core/hle/service/nvflinger/buffer_queue.h" | 14 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 11 | #include "video_core/dma_pusher.h" | 15 | #include "video_core/dma_pusher.h" |
| 12 | 16 | ||
| @@ -127,7 +131,7 @@ class MemoryManager; | |||
| 127 | 131 | ||
| 128 | class GPU { | 132 | class GPU { |
| 129 | public: | 133 | public: |
| 130 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); | 134 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); |
| 131 | 135 | ||
| 132 | virtual ~GPU(); | 136 | virtual ~GPU(); |
| 133 | 137 | ||
| @@ -170,6 +174,22 @@ public: | |||
| 170 | /// Returns a reference to the GPU DMA pusher. | 174 | /// Returns a reference to the GPU DMA pusher. |
| 171 | Tegra::DmaPusher& DmaPusher(); | 175 | Tegra::DmaPusher& DmaPusher(); |
| 172 | 176 | ||
| 177 | void IncrementSyncPoint(u32 syncpoint_id); | ||
| 178 | |||
| 179 | u32 GetSyncpointValue(u32 syncpoint_id) const; | ||
| 180 | |||
| 181 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 182 | |||
| 183 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 184 | |||
| 185 | std::unique_lock<std::mutex> LockSync() { | ||
| 186 | return std::unique_lock{sync_mutex}; | ||
| 187 | } | ||
| 188 | |||
| 189 | bool IsAsync() const { | ||
| 190 | return is_async; | ||
| 191 | } | ||
| 192 | |||
| 173 | /// Returns a const reference to the GPU DMA pusher. | 193 | /// Returns a const reference to the GPU DMA pusher. |
| 174 | const Tegra::DmaPusher& DmaPusher() const; | 194 | const Tegra::DmaPusher& DmaPusher() const; |
| 175 | 195 | ||
| @@ -239,6 +259,9 @@ public: | |||
| 239 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 259 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 240 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 260 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 241 | 261 | ||
| 262 | protected: | ||
| 263 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | ||
| 264 | |||
| 242 | private: | 265 | private: |
| 243 | void ProcessBindMethod(const MethodCall& method_call); | 266 | void ProcessBindMethod(const MethodCall& method_call); |
| 244 | void ProcessSemaphoreTriggerMethod(); | 267 | void ProcessSemaphoreTriggerMethod(); |
| @@ -257,6 +280,7 @@ private: | |||
| 257 | protected: | 280 | protected: |
| 258 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 281 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 259 | VideoCore::RendererBase& renderer; | 282 | VideoCore::RendererBase& renderer; |
| 283 | Core::System& system; | ||
| 260 | 284 | ||
| 261 | private: | 285 | private: |
| 262 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 286 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| @@ -273,6 +297,14 @@ private: | |||
| 273 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 297 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 274 | /// Inline memory engine | 298 | /// Inline memory engine |
| 275 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 299 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 300 | |||
| 301 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 302 | |||
| 303 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 304 | |||
| 305 | std::mutex sync_mutex; | ||
| 306 | |||
| 307 | const bool is_async; | ||
| 276 | }; | 308 | }; |
| 277 | 309 | ||
| 278 | #define ASSERT_REG_POSITION(field_name, position) \ | 310 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index d4e2553a9..ea67be831 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | ||
| 6 | #include "core/hardware_interrupt_manager.h" | ||
| 5 | #include "video_core/gpu_asynch.h" | 7 | #include "video_core/gpu_asynch.h" |
| 6 | #include "video_core/gpu_thread.h" | 8 | #include "video_core/gpu_thread.h" |
| 7 | #include "video_core/renderer_base.h" | 9 | #include "video_core/renderer_base.h" |
| @@ -9,7 +11,7 @@ | |||
| 9 | namespace VideoCommon { | 11 | namespace VideoCommon { |
| 10 | 12 | ||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | 13 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 12 | : GPU(system, renderer), gpu_thread{system} {} | 14 | : GPU(system, renderer, true), gpu_thread{system} {} |
| 13 | 15 | ||
| 14 | GPUAsynch::~GPUAsynch() = default; | 16 | GPUAsynch::~GPUAsynch() = default; |
| 15 | 17 | ||
| @@ -38,4 +40,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 38 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 40 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 39 | } | 41 | } |
| 40 | 42 | ||
| 43 | void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 44 | auto& interrupt_manager = system.InterruptManager(); | ||
| 45 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 46 | } | ||
| 47 | |||
| 41 | } // namespace VideoCommon | 48 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 30be74cba..36377d677 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -27,6 +27,9 @@ public: | |||
| 27 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 28 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 29 | 29 | ||
| 30 | protected: | ||
| 31 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||
| 32 | |||
| 30 | private: | 33 | private: |
| 31 | GPUThread::ThreadManager gpu_thread; | 34 | GPUThread::ThreadManager gpu_thread; |
| 32 | }; | 35 | }; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 45e43b1dc..d4ead9c47 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | 9 | ||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 11 | : GPU(system, renderer) {} | 11 | : GPU(system, renderer, false) {} |
| 12 | 12 | ||
| 13 | GPUSynch::~GPUSynch() = default; | 13 | GPUSynch::~GPUSynch() = default; |
| 14 | 14 | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 3031fcf72..07bcc47f1 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -25,6 +25,10 @@ public: | |||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | |||
| 29 | protected: | ||
| 30 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | ||
| 31 | [[maybe_unused]] u32 value) const override {} | ||
| 28 | }; | 32 | }; |
| 29 | 33 | ||
| 30 | } // namespace VideoCommon | 34 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3f0939ec9..b441e92b0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 21 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 22 | 22 | ||
| 23 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| 24 | state.WaitForCommands(); | 24 | while (state.queue.Empty()) |
| 25 | ; | ||
| 25 | 26 | ||
| 26 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 27 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 27 | if (!state.is_running) { | 28 | if (!state.is_running) { |
| @@ -32,7 +33,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 32 | 33 | ||
| 33 | CommandDataContainer next; | 34 | CommandDataContainer next; |
| 34 | while (state.is_running) { | 35 | while (state.is_running) { |
| 35 | state.WaitForCommands(); | ||
| 36 | while (!state.queue.Empty()) { | 36 | while (!state.queue.Empty()) { |
| 37 | state.queue.Pop(next); | 37 | state.queue.Pop(next); |
| 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| @@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 49 | } else { | 49 | } else { |
| 50 | UNREACHABLE(); | 50 | UNREACHABLE(); |
| 51 | } | 51 | } |
| 52 | state.signaled_fence = next.fence; | 52 | state.signaled_fence.store(next.fence); |
| 53 | state.TrySynchronize(); | ||
| 54 | } | 53 | } |
| 55 | } | 54 | } |
| 56 | } | 55 | } |
| @@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | |||
| 89 | } | 88 | } |
| 90 | 89 | ||
| 91 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | 90 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { |
| 92 | if (state.queue.Empty()) { | 91 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); |
| 93 | // It's quicker to invalidate a single region on the CPU if the queue is already empty | ||
| 94 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | ||
| 95 | } else { | ||
| 96 | PushCommand(InvalidateRegionCommand(addr, size)); | ||
| 97 | } | ||
| 98 | } | 92 | } |
| 99 | 93 | ||
| 100 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 94 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 105 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 106 | const u64 fence{++state.last_fence}; | 100 | const u64 fence{++state.last_fence}; |
| 107 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 108 | state.SignalCommands(); | ||
| 109 | return fence; | 102 | return fence; |
| 110 | } | 103 | } |
| 111 | 104 | ||
| 112 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 113 | void SynchState::WaitForSynchronization(u64 fence) { | 106 | void SynchState::WaitForSynchronization(u64 fence) { |
| 114 | if (signaled_fence >= fence) { | 107 | while (signaled_fence.load() < fence) |
| 115 | return; | 108 | ; |
| 116 | } | ||
| 117 | |||
| 118 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 119 | { | ||
| 120 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 121 | std::unique_lock lock{synchronization_mutex}; | ||
| 122 | synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); | ||
| 123 | } | ||
| 124 | } | 109 | } |
| 125 | 110 | ||
| 126 | } // namespace VideoCommon::GPUThread | 111 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a72..1d9d0c39e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -88,41 +88,9 @@ struct CommandDataContainer { | |||
| 88 | /// Struct used to synchronize the GPU thread | 88 | /// Struct used to synchronize the GPU thread |
| 89 | struct SynchState final { | 89 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 90 | std::atomic_bool is_running{true}; |
| 91 | std::atomic_int queued_frame_count{}; | ||
| 92 | std::mutex synchronization_mutex; | ||
| 93 | std::mutex commands_mutex; | ||
| 94 | std::condition_variable commands_condition; | ||
| 95 | std::condition_variable synchronization_condition; | ||
| 96 | |||
| 97 | /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU | ||
| 98 | /// synchronized. This is entirely empirical. | ||
| 99 | bool IsSynchronized() const { | ||
| 100 | constexpr std::size_t max_queue_gap{5}; | ||
| 101 | return queue.Size() <= max_queue_gap; | ||
| 102 | } | ||
| 103 | |||
| 104 | void TrySynchronize() { | ||
| 105 | if (IsSynchronized()) { | ||
| 106 | std::lock_guard lock{synchronization_mutex}; | ||
| 107 | synchronization_condition.notify_one(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | 91 | ||
| 111 | void WaitForSynchronization(u64 fence); | 92 | void WaitForSynchronization(u64 fence); |
| 112 | 93 | ||
| 113 | void SignalCommands() { | ||
| 114 | if (queue.Empty()) { | ||
| 115 | return; | ||
| 116 | } | ||
| 117 | |||
| 118 | commands_condition.notify_one(); | ||
| 119 | } | ||
| 120 | |||
| 121 | void WaitForCommands() { | ||
| 122 | std::unique_lock lock{commands_mutex}; | ||
| 123 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 124 | } | ||
| 125 | |||
| 126 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 127 | CommandQueue queue; | 95 | CommandQueue queue; |
| 128 | u64 last_fence{}; | 96 | u64 last_fence{}; |