diff options
| -rw-r--r-- | src/core/core.cpp | 12 | ||||
| -rw-r--r-- | src/core/core.h | 9 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 2 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 19 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | 4 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 10 | ||||
| -rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 40 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.h | 15 | ||||
| -rw-r--r-- | src/video_core/control/channel_state.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/control/channel_state.h | 2 | ||||
| -rw-r--r-- | src/video_core/control/channel_state_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/control/scheduler.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/control/scheduler.h | 2 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 26 | ||||
| -rw-r--r-- | src/video_core/engines/puller.cpp | 65 | ||||
| -rw-r--r-- | src/video_core/engines/puller.h | 1 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 12 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 197 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 19 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 2 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/codec.cpp (renamed from src/video_core/command_classes/codecs/codec.cpp) | 36 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/codec.h (renamed from src/video_core/command_classes/codecs/codec.h) | 14 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/h264.cpp (renamed from src/video_core/command_classes/codecs/h264.cpp) | 4 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/h264.h (renamed from src/video_core/command_classes/codecs/h264.h) | 6 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/vp8.cpp (renamed from src/video_core/command_classes/codecs/vp8.cpp) | 4 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/vp8.h (renamed from src/video_core/command_classes/codecs/vp8.h) | 5 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/vp9.cpp (renamed from src/video_core/command_classes/codecs/vp9.cpp) | 8 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/vp9.h (renamed from src/video_core/command_classes/codecs/vp9.h) | 12 | ||||
| -rw-r--r-- | src/video_core/host1x/codecs/vp9_types.h (renamed from src/video_core/command_classes/codecs/vp9_types.h) | 0 | ||||
| -rw-r--r-- | src/video_core/host1x/control.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/host1x/control.h (renamed from src/video_core/command_classes/host1x.h) | 17 | ||||
| -rw-r--r-- | src/video_core/host1x/host1x.h | 33 | ||||
| -rw-r--r-- | src/video_core/host1x/nvdec.cpp (renamed from src/video_core/command_classes/nvdec.cpp) | 6 | ||||
| -rw-r--r-- | src/video_core/host1x/nvdec.h (renamed from src/video_core/command_classes/nvdec.h) | 7 | ||||
| -rw-r--r-- | src/video_core/host1x/nvdec_common.h (renamed from src/video_core/command_classes/nvdec_common.h) | 4 | ||||
| -rw-r--r-- | src/video_core/host1x/sync_manager.cpp (renamed from src/video_core/command_classes/sync_manager.cpp) | 10 | ||||
| -rw-r--r-- | src/video_core/host1x/sync_manager.h (renamed from src/video_core/command_classes/sync_manager.h) | 6 | ||||
| -rw-r--r-- | src/video_core/host1x/syncpoint_manager.cpp | 93 | ||||
| -rw-r--r-- | src/video_core/host1x/syncpoint_manager.h | 99 | ||||
| -rw-r--r-- | src/video_core/host1x/vic.cpp (renamed from src/video_core/command_classes/vic.cpp) | 9 | ||||
| -rw-r--r-- | src/video_core/host1x/vic.h (renamed from src/video_core/command_classes/vic.h) | 7 |
44 files changed, 648 insertions, 252 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index 121092868..fa059a394 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include "core/telemetry_session.h" | 51 | #include "core/telemetry_session.h" |
| 52 | #include "core/tools/freezer.h" | 52 | #include "core/tools/freezer.h" |
| 53 | #include "network/network.h" | 53 | #include "network/network.h" |
| 54 | #include "video_core/host1x/host1x.h" | ||
| 54 | #include "video_core/renderer_base.h" | 55 | #include "video_core/renderer_base.h" |
| 55 | #include "video_core/video_core.h" | 56 | #include "video_core/video_core.h" |
| 56 | 57 | ||
| @@ -215,6 +216,7 @@ struct System::Impl { | |||
| 215 | 216 | ||
| 216 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 217 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| 217 | 218 | ||
| 219 | host1x_core = std::make_unique<Tegra::Host1x::Host1x>(); | ||
| 218 | gpu_core = VideoCore::CreateGPU(emu_window, system); | 220 | gpu_core = VideoCore::CreateGPU(emu_window, system); |
| 219 | if (!gpu_core) { | 221 | if (!gpu_core) { |
| 220 | return SystemResultStatus::ErrorVideoCore; | 222 | return SystemResultStatus::ErrorVideoCore; |
| @@ -373,6 +375,7 @@ struct System::Impl { | |||
| 373 | app_loader.reset(); | 375 | app_loader.reset(); |
| 374 | audio_core.reset(); | 376 | audio_core.reset(); |
| 375 | gpu_core.reset(); | 377 | gpu_core.reset(); |
| 378 | host1x_core.reset(); | ||
| 376 | perf_stats.reset(); | 379 | perf_stats.reset(); |
| 377 | kernel.Shutdown(); | 380 | kernel.Shutdown(); |
| 378 | memory.Reset(); | 381 | memory.Reset(); |
| @@ -450,6 +453,7 @@ struct System::Impl { | |||
| 450 | /// AppLoader used to load the current executing application | 453 | /// AppLoader used to load the current executing application |
| 451 | std::unique_ptr<Loader::AppLoader> app_loader; | 454 | std::unique_ptr<Loader::AppLoader> app_loader; |
| 452 | std::unique_ptr<Tegra::GPU> gpu_core; | 455 | std::unique_ptr<Tegra::GPU> gpu_core; |
| 456 | std::unique_ptr<Tegra::Host1x::Host1x> host1x_core; | ||
| 453 | std::unique_ptr<Hardware::InterruptManager> interrupt_manager; | 457 | std::unique_ptr<Hardware::InterruptManager> interrupt_manager; |
| 454 | std::unique_ptr<Core::DeviceMemory> device_memory; | 458 | std::unique_ptr<Core::DeviceMemory> device_memory; |
| 455 | std::unique_ptr<AudioCore::AudioCore> audio_core; | 459 | std::unique_ptr<AudioCore::AudioCore> audio_core; |
| @@ -668,6 +672,14 @@ const Tegra::GPU& System::GPU() const { | |||
| 668 | return *impl->gpu_core; | 672 | return *impl->gpu_core; |
| 669 | } | 673 | } |
| 670 | 674 | ||
| 675 | Tegra::Host1x::Host1x& System::Host1x() { | ||
| 676 | return *impl->host1x_core; | ||
| 677 | } | ||
| 678 | |||
| 679 | const Tegra::Host1x::Host1x& System::Host1x() const { | ||
| 680 | return *impl->host1x_core; | ||
| 681 | } | ||
| 682 | |||
| 671 | Core::Hardware::InterruptManager& System::InterruptManager() { | 683 | Core::Hardware::InterruptManager& System::InterruptManager() { |
| 672 | return *impl->interrupt_manager; | 684 | return *impl->interrupt_manager; |
| 673 | } | 685 | } |
diff --git a/src/core/core.h b/src/core/core.h index 0ce3b1d60..e4168a921 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -74,6 +74,9 @@ class TimeManager; | |||
| 74 | namespace Tegra { | 74 | namespace Tegra { |
| 75 | class DebugContext; | 75 | class DebugContext; |
| 76 | class GPU; | 76 | class GPU; |
| 77 | namespace Host1x { | ||
| 78 | class Host1x; | ||
| 79 | } // namespace Host1x | ||
| 77 | } // namespace Tegra | 80 | } // namespace Tegra |
| 78 | 81 | ||
| 79 | namespace VideoCore { | 82 | namespace VideoCore { |
| @@ -260,6 +263,12 @@ public: | |||
| 260 | /// Gets an immutable reference to the GPU interface. | 263 | /// Gets an immutable reference to the GPU interface. |
| 261 | [[nodiscard]] const Tegra::GPU& GPU() const; | 264 | [[nodiscard]] const Tegra::GPU& GPU() const; |
| 262 | 265 | ||
| 266 | /// Gets a mutable reference to the Host1x interface | ||
| 267 | [[nodiscard]] Tegra::Host1x::Host1x& Host1x(); | ||
| 268 | |||
| 269 | /// Gets an immutable reference to the Host1x interface. | ||
| 270 | [[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const; | ||
| 271 | |||
| 263 | /// Gets a mutable reference to the renderer. | 272 | /// Gets a mutable reference to the renderer. |
| 264 | [[nodiscard]] VideoCore::RendererBase& Renderer(); | 273 | [[nodiscard]] VideoCore::RendererBase& Renderer(); |
| 265 | 274 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index b1c0e9eb2..e6a976714 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -50,7 +50,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
| 50 | stride, format, transform, crop_rect}; | 50 | stride, format, transform, crop_rect}; |
| 51 | 51 | ||
| 52 | system.GetPerfStats().EndSystemFrame(); | 52 | system.GetPerfStats().EndSystemFrame(); |
| 53 | system.GPU().SwapBuffers(&framebuffer); | 53 | system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0); |
| 54 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); | 54 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); |
| 55 | system.GetPerfStats().BeginSystemFrame(); | 55 | system.GetPerfStats().BeginSystemFrame(); |
| 56 | } | 56 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 54074af75..ffe42d423 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 18 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| 19 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" | 19 | #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" |
| 20 | #include "video_core/gpu.h" | 20 | #include "video_core/gpu.h" |
| 21 | #include "video_core/host1x/host1x.h" | ||
| 21 | 22 | ||
| 22 | namespace Service::Nvidia::Devices { | 23 | namespace Service::Nvidia::Devices { |
| 23 | 24 | ||
| @@ -129,7 +130,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 129 | return NvResult::Success; | 130 | return NvResult::Success; |
| 130 | } | 131 | } |
| 131 | 132 | ||
| 132 | auto& gpu = system.GPU(); | 133 | auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager(); |
| 133 | const u32 target_value = params.fence.value; | 134 | const u32 target_value = params.fence.value; |
| 134 | 135 | ||
| 135 | auto lock = NvEventsLock(); | 136 | auto lock = NvEventsLock(); |
| @@ -149,7 +150,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 149 | if (events[slot].fails > 2) { | 150 | if (events[slot].fails > 2) { |
| 150 | { | 151 | { |
| 151 | auto lk = system.StallProcesses(); | 152 | auto lk = system.StallProcesses(); |
| 152 | gpu.WaitFence(fence_id, target_value); | 153 | host1x_syncpoint_manager.WaitHost(fence_id, target_value); |
| 153 | system.UnstallProcesses(); | 154 | system.UnstallProcesses(); |
| 154 | } | 155 | } |
| 155 | params.value.raw = target_value; | 156 | params.value.raw = target_value; |
| @@ -198,7 +199,15 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 198 | } | 199 | } |
| 199 | params.value.raw |= slot; | 200 | params.value.raw |= slot; |
| 200 | 201 | ||
| 201 | gpu.RegisterSyncptInterrupt(fence_id, target_value); | 202 | event.wait_handle = |
| 203 | host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() { | ||
| 204 | auto& event = events[slot]; | ||
| 205 | if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) == | ||
| 206 | EventState::Waiting) { | ||
| 207 | event.kevent->GetWritableEvent().Signal(); | ||
| 208 | } | ||
| 209 | event.status.store(EventState::Signalled, std::memory_order_release); | ||
| 210 | }); | ||
| 202 | return NvResult::Timeout; | 211 | return NvResult::Timeout; |
| 203 | } | 212 | } |
| 204 | 213 | ||
| @@ -288,8 +297,10 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::v | |||
| 288 | auto& event = events[event_id]; | 297 | auto& event = events[event_id]; |
| 289 | if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) == | 298 | if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) == |
| 290 | EventState::Waiting) { | 299 | EventState::Waiting) { |
| 291 | system.GPU().CancelSyncptInterrupt(event.assigned_syncpt, event.assigned_value); | 300 | auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager(); |
| 301 | host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle); | ||
| 292 | syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt); | 302 | syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt); |
| 303 | event.wait_handle = {}; | ||
| 293 | } | 304 | } |
| 294 | event.fails++; | 305 | event.fails++; |
| 295 | event.status.store(EventState::Cancelled, std::memory_order_release); | 306 | event.status.store(EventState::Cancelled, std::memory_order_release); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index d56aea405..136a1e925 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "core/hle/service/nvdrv/nvdrv.h" | 13 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 14 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 14 | 15 | ||
| 15 | namespace Service::Nvidia::NvCore { | 16 | namespace Service::Nvidia::NvCore { |
| 16 | class Container; | 17 | class Container; |
| @@ -78,6 +79,9 @@ private: | |||
| 78 | // Tells if an NVEvent is registered or not | 79 | // Tells if an NVEvent is registered or not |
| 79 | bool registered{}; | 80 | bool registered{}; |
| 80 | 81 | ||
| 82 | // Used for waiting on a syncpoint & canceling it. | ||
| 83 | Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{}; | ||
| 84 | |||
| 81 | bool IsBeingUsed() { | 85 | bool IsBeingUsed() { |
| 82 | const auto current_status = status.load(std::memory_order_acquire); | 86 | const auto current_status = status.load(std::memory_order_acquire); |
| 83 | return current_status == EventState::Waiting || | 87 | return current_status == EventState::Waiting || |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 38d45cb79..db3e266ad 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -210,10 +210,10 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve | |||
| 210 | 210 | ||
| 211 | static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { | 211 | static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { |
| 212 | return { | 212 | return { |
| 213 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | 213 | Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, |
| 214 | Tegra::SubmissionMode::Increasing), | 214 | Tegra::SubmissionMode::Increasing), |
| 215 | {fence.value}, | 215 | {fence.value}, |
| 216 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 216 | Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, |
| 217 | Tegra::SubmissionMode::Increasing), | 217 | Tegra::SubmissionMode::Increasing), |
| 218 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id), | 218 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id), |
| 219 | }; | 219 | }; |
| @@ -222,12 +222,12 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { | |||
| 222 | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence, | 222 | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence, |
| 223 | u32 add_increment) { | 223 | u32 add_increment) { |
| 224 | std::vector<Tegra::CommandHeader> result{ | 224 | std::vector<Tegra::CommandHeader> result{ |
| 225 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | 225 | Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, |
| 226 | Tegra::SubmissionMode::Increasing), | 226 | Tegra::SubmissionMode::Increasing), |
| 227 | {}}; | 227 | {}}; |
| 228 | 228 | ||
| 229 | for (u32 count = 0; count < add_increment; ++count) { | 229 | for (u32 count = 0; count < add_increment; ++count) { |
| 230 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 230 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, |
| 231 | Tegra::SubmissionMode::Increasing)); | 231 | Tegra::SubmissionMode::Increasing)); |
| 232 | result.emplace_back( | 232 | result.emplace_back( |
| 233 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); | 233 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); |
| @@ -239,7 +239,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence | |||
| 239 | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence, | 239 | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence, |
| 240 | u32 add_increment) { | 240 | u32 add_increment) { |
| 241 | std::vector<Tegra::CommandHeader> result{ | 241 | std::vector<Tegra::CommandHeader> result{ |
| 242 | Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, | 242 | Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, |
| 243 | Tegra::SubmissionMode::Increasing), | 243 | Tegra::SubmissionMode::Increasing), |
| 244 | {}}; | 244 | {}}; |
| 245 | const std::vector<Tegra::CommandHeader> increment{ | 245 | const std::vector<Tegra::CommandHeader> increment{ |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 8c3013f83..aa112021d 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include "core/hle/service/vi/layer/vi_layer.h" | 24 | #include "core/hle/service/vi/layer/vi_layer.h" |
| 25 | #include "core/hle/service/vi/vi_results.h" | 25 | #include "core/hle/service/vi/vi_results.h" |
| 26 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 27 | #include "video_core/host1x/host1x.h" | ||
| 28 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 27 | 29 | ||
| 28 | namespace Service::NVFlinger { | 30 | namespace Service::NVFlinger { |
| 29 | 31 | ||
| @@ -267,12 +269,12 @@ void NVFlinger::Compose() { | |||
| 267 | return; // We are likely shutting down | 269 | return; // We are likely shutting down |
| 268 | } | 270 | } |
| 269 | 271 | ||
| 270 | auto& gpu = system.GPU(); | 272 | auto& syncpoint_manager = system.Host1x().GetSyncpointManager(); |
| 271 | const auto& multi_fence = buffer.fence; | 273 | const auto& multi_fence = buffer.fence; |
| 272 | guard->unlock(); | 274 | guard->unlock(); |
| 273 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | 275 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { |
| 274 | const auto& fence = multi_fence.fences[fence_id]; | 276 | const auto& fence = multi_fence.fences[fence_id]; |
| 275 | gpu.WaitFence(fence.id, fence.value); | 277 | syncpoint_manager.WaitGuest(fence.id, fence.value); |
| 276 | } | 278 | } |
| 277 | guard->lock(); | 279 | guard->lock(); |
| 278 | 280 | ||
| @@ -284,6 +286,7 @@ void NVFlinger::Compose() { | |||
| 284 | auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); | 286 | auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); |
| 285 | ASSERT(nvdisp); | 287 | ASSERT(nvdisp); |
| 286 | 288 | ||
| 289 | guard->unlock(); | ||
| 287 | Common::Rectangle<int> crop_rect{ | 290 | Common::Rectangle<int> crop_rect{ |
| 288 | static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), | 291 | static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), |
| 289 | static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; | 292 | static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; |
| @@ -292,6 +295,8 @@ void NVFlinger::Compose() { | |||
| 292 | igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), | 295 | igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), |
| 293 | static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); | 296 | static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); |
| 294 | 297 | ||
| 298 | guard->lock(); | ||
| 299 | |||
| 295 | swap_interval = buffer.swap_interval; | 300 | swap_interval = buffer.swap_interval; |
| 296 | 301 | ||
| 297 | auto fence = android::Fence::NoFence(); | 302 | auto fence = android::Fence::NoFence(); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 35faa70a0..723f9b67c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | add_subdirectory(host_shaders) | 4 | add_subdirectory(host_shaders) |
| 5 | 5 | ||
| 6 | if(LIBVA_FOUND) | 6 | if(LIBVA_FOUND) |
| 7 | set_source_files_properties(command_classes/codecs/codec.cpp | 7 | set_source_files_properties(host1x/codecs/codec.cpp |
| 8 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) | 8 | PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) |
| 9 | list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) | 9 | list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) |
| 10 | endif() | 10 | endif() |
| @@ -15,24 +15,6 @@ add_library(video_core STATIC | |||
| 15 | buffer_cache/buffer_cache.h | 15 | buffer_cache/buffer_cache.h |
| 16 | cdma_pusher.cpp | 16 | cdma_pusher.cpp |
| 17 | cdma_pusher.h | 17 | cdma_pusher.h |
| 18 | command_classes/codecs/codec.cpp | ||
| 19 | command_classes/codecs/codec.h | ||
| 20 | command_classes/codecs/h264.cpp | ||
| 21 | command_classes/codecs/h264.h | ||
| 22 | command_classes/codecs/vp8.cpp | ||
| 23 | command_classes/codecs/vp8.h | ||
| 24 | command_classes/codecs/vp9.cpp | ||
| 25 | command_classes/codecs/vp9.h | ||
| 26 | command_classes/codecs/vp9_types.h | ||
| 27 | command_classes/host1x.cpp | ||
| 28 | command_classes/host1x.h | ||
| 29 | command_classes/nvdec.cpp | ||
| 30 | command_classes/nvdec.h | ||
| 31 | command_classes/nvdec_common.h | ||
| 32 | command_classes/sync_manager.cpp | ||
| 33 | command_classes/sync_manager.h | ||
| 34 | command_classes/vic.cpp | ||
| 35 | command_classes/vic.h | ||
| 36 | compatible_formats.cpp | 18 | compatible_formats.cpp |
| 37 | compatible_formats.h | 19 | compatible_formats.h |
| 38 | control/channel_state.cpp | 20 | control/channel_state.cpp |
| @@ -63,6 +45,26 @@ add_library(video_core STATIC | |||
| 63 | engines/puller.cpp | 45 | engines/puller.cpp |
| 64 | engines/puller.h | 46 | engines/puller.h |
| 65 | framebuffer_config.h | 47 | framebuffer_config.h |
| 48 | host1x/codecs/codec.cpp | ||
| 49 | host1x/codecs/codec.h | ||
| 50 | host1x/codecs/h264.cpp | ||
| 51 | host1x/codecs/h264.h | ||
| 52 | host1x/codecs/vp8.cpp | ||
| 53 | host1x/codecs/vp8.h | ||
| 54 | host1x/codecs/vp9.cpp | ||
| 55 | host1x/codecs/vp9.h | ||
| 56 | host1x/codecs/vp9_types.h | ||
| 57 | host1x/control.cpp | ||
| 58 | host1x/control.h | ||
| 59 | host1x/nvdec.cpp | ||
| 60 | host1x/nvdec.h | ||
| 61 | host1x/nvdec_common.h | ||
| 62 | host1x/sync_manager.cpp | ||
| 63 | host1x/sync_manager.h | ||
| 64 | host1x/syncpoint_manager.cpp | ||
| 65 | host1x/syncpoint_manager.h | ||
| 66 | host1x/vic.cpp | ||
| 67 | host1x/vic.h | ||
| 66 | macro/macro.cpp | 68 | macro/macro.cpp |
| 67 | macro/macro.h | 69 | macro/macro.h |
| 68 | macro/macro_hle.cpp | 70 | macro/macro_hle.cpp |
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 8e890a85e..148126347 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -2,20 +2,22 @@ | |||
| 2 | // SPDX-License-Identifier: MIT | 2 | // SPDX-License-Identifier: MIT |
| 3 | 3 | ||
| 4 | #include <bit> | 4 | #include <bit> |
| 5 | #include "command_classes/host1x.h" | ||
| 6 | #include "command_classes/nvdec.h" | ||
| 7 | #include "command_classes/vic.h" | ||
| 8 | #include "video_core/cdma_pusher.h" | 5 | #include "video_core/cdma_pusher.h" |
| 9 | #include "video_core/command_classes/sync_manager.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | 6 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/gpu.h" | 7 | #include "video_core/gpu.h" |
| 8 | #include "video_core/host1x/control.h" | ||
| 9 | #include "video_core/host1x/nvdec.h" | ||
| 10 | #include "video_core/host1x/nvdec_common.h" | ||
| 11 | #include "video_core/host1x/sync_manager.h" | ||
| 12 | #include "video_core/host1x/vic.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 12 | 14 | ||
| 13 | namespace Tegra { | 15 | namespace Tegra { |
| 14 | CDmaPusher::CDmaPusher(GPU& gpu_) | 16 | CDmaPusher::CDmaPusher(GPU& gpu_) |
| 15 | : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), | 17 | : gpu{gpu_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(gpu)), |
| 16 | vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), | 18 | vic_processor(std::make_unique<Host1x::Vic>(gpu, nvdec_processor)), |
| 17 | host1x_processor(std::make_unique<Host1x>(gpu)), | 19 | host1x_processor(std::make_unique<Host1x::Control>(gpu)), |
| 18 | sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {} | 20 | sync_manager(std::make_unique<Host1x::SyncptIncrManager>(gpu)) {} |
| 19 | 21 | ||
| 20 | CDmaPusher::~CDmaPusher() = default; | 22 | CDmaPusher::~CDmaPusher() = default; |
| 21 | 23 | ||
| @@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { | |||
| 109 | case ThiMethod::SetMethod1: | 111 | case ThiMethod::SetMethod1: |
| 110 | LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", | 112 | LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", |
| 111 | static_cast<u32>(vic_thi_state.method_0), data); | 113 | static_cast<u32>(vic_thi_state.method_0), data); |
| 112 | vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data); | 114 | vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0), |
| 115 | data); | ||
| 113 | break; | 116 | break; |
| 114 | default: | 117 | default: |
| 115 | break; | 118 | break; |
| 116 | } | 119 | } |
| 117 | break; | 120 | break; |
| 118 | case ChClassId::Host1x: | 121 | case ChClassId::Control: |
| 119 | // This device is mainly for syncpoint synchronization | 122 | // This device is mainly for syncpoint synchronization |
| 120 | LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); | 123 | LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); |
| 121 | host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data); | 124 | host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data); |
| 122 | break; | 125 | break; |
| 123 | default: | 126 | default: |
| 124 | UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); | 127 | UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index d6ffef95f..de17c2082 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -13,10 +13,13 @@ | |||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | 14 | ||
| 15 | class GPU; | 15 | class GPU; |
| 16 | class Host1x; | 16 | |
| 17 | namespace Host1x { | ||
| 18 | class Control; | ||
| 17 | class Nvdec; | 19 | class Nvdec; |
| 18 | class SyncptIncrManager; | 20 | class SyncptIncrManager; |
| 19 | class Vic; | 21 | class Vic; |
| 22 | } // namespace Host1x | ||
| 20 | 23 | ||
| 21 | enum class ChSubmissionMode : u32 { | 24 | enum class ChSubmissionMode : u32 { |
| 22 | SetClass = 0, | 25 | SetClass = 0, |
| @@ -30,7 +33,7 @@ enum class ChSubmissionMode : u32 { | |||
| 30 | 33 | ||
| 31 | enum class ChClassId : u32 { | 34 | enum class ChClassId : u32 { |
| 32 | NoClass = 0x0, | 35 | NoClass = 0x0, |
| 33 | Host1x = 0x1, | 36 | Control = 0x1, |
| 34 | VideoEncodeMpeg = 0x20, | 37 | VideoEncodeMpeg = 0x20, |
| 35 | VideoEncodeNvEnc = 0x21, | 38 | VideoEncodeNvEnc = 0x21, |
| 36 | VideoStreamingVi = 0x30, | 39 | VideoStreamingVi = 0x30, |
| @@ -102,10 +105,10 @@ private: | |||
| 102 | void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument); | 105 | void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument); |
| 103 | 106 | ||
| 104 | GPU& gpu; | 107 | GPU& gpu; |
| 105 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 108 | std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; |
| 106 | std::unique_ptr<Tegra::Vic> vic_processor; | 109 | std::unique_ptr<Tegra::Host1x::Vic> vic_processor; |
| 107 | std::unique_ptr<Tegra::Host1x> host1x_processor; | 110 | std::unique_ptr<Tegra::Host1x::Control> host1x_processor; |
| 108 | std::unique_ptr<SyncptIncrManager> sync_manager; | 111 | std::unique_ptr<Host1x::SyncptIncrManager> sync_manager; |
| 109 | ChClassId current_class{}; | 112 | ChClassId current_class{}; |
| 110 | ThiRegisters vic_thi_state{}; | 113 | ThiRegisters vic_thi_state{}; |
| 111 | ThiRegisters nvdec_thi_state{}; | 114 | ThiRegisters nvdec_thi_state{}; |
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 67803fe94..3613c4992 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv3 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index 82808a6b8..08a7591e1 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv3 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 31d80e8b7..dbf833de7 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h | |||
| @@ -1,3 +1,7 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 1 | #pragma once | 5 | #pragma once |
| 2 | 6 | ||
| 3 | #include <deque> | 7 | #include <deque> |
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp index e1abcb188..a9bb00aa7 100644 --- a/src/video_core/control/scheduler.cpp +++ b/src/video_core/control/scheduler.cpp | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv3 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | 5 | #include <memory> |
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h index 802e9caff..c1a773946 100644 --- a/src/video_core/control/scheduler.h +++ b/src/video_core/control/scheduler.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv3 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index fd7c936c4..938f0f11c 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -37,24 +37,32 @@ enum class SubmissionMode : u32 { | |||
| 37 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | 37 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence |
| 38 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | 38 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. |
| 39 | // So the values you see in docs might be multiplied by 4. | 39 | // So the values you see in docs might be multiplied by 4. |
| 40 | // Register documentation: | ||
| 41 | // https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h | ||
| 42 | // | ||
| 43 | // Register Description (approx): | ||
| 44 | // https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt | ||
| 40 | enum class BufferMethods : u32 { | 45 | enum class BufferMethods : u32 { |
| 41 | BindObject = 0x0, | 46 | BindObject = 0x0, |
| 47 | Illegal = 0x1, | ||
| 42 | Nop = 0x2, | 48 | Nop = 0x2, |
| 43 | SemaphoreAddressHigh = 0x4, | 49 | SemaphoreAddressHigh = 0x4, |
| 44 | SemaphoreAddressLow = 0x5, | 50 | SemaphoreAddressLow = 0x5, |
| 45 | SemaphoreSequence = 0x6, | 51 | SemaphoreSequencePayload = 0x6, |
| 46 | SemaphoreTrigger = 0x7, | 52 | SemaphoreOperation = 0x7, |
| 47 | NotifyIntr = 0x8, | 53 | NonStallInterrupt = 0x8, |
| 48 | WrcacheFlush = 0x9, | 54 | WrcacheFlush = 0x9, |
| 49 | Unk28 = 0xA, | 55 | MemOpA = 0xA, |
| 50 | UnkCacheFlush = 0xB, | 56 | MemOpB = 0xB, |
| 57 | MemOpC = 0xC, | ||
| 58 | MemOpD = 0xD, | ||
| 51 | RefCnt = 0x14, | 59 | RefCnt = 0x14, |
| 52 | SemaphoreAcquire = 0x1A, | 60 | SemaphoreAcquire = 0x1A, |
| 53 | SemaphoreRelease = 0x1B, | 61 | SemaphoreRelease = 0x1B, |
| 54 | FenceValue = 0x1C, | 62 | SyncpointPayload = 0x1C, |
| 55 | FenceAction = 0x1D, | 63 | SyncpointOperation = 0x1D, |
| 56 | WaitForInterrupt = 0x1E, | 64 | WaitForIdle = 0x1E, |
| 57 | Unk7c = 0x1F, | 65 | CRCCheck = 0x1F, |
| 58 | Yield = 0x20, | 66 | Yield = 0x20, |
| 59 | NonPullerMethods = 0x40, | 67 | NonPullerMethods = 0x40, |
| 60 | }; | 68 | }; |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 3866c8746..8c17639e4 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -68,11 +68,6 @@ void Puller::ProcessFenceActionMethod() { | |||
| 68 | } | 68 | } |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void Puller::ProcessWaitForInterruptMethod() { | ||
| 72 | // TODO(bunnei) ImplementMe | ||
| 73 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 74 | } | ||
| 75 | |||
| 76 | void Puller::ProcessSemaphoreTriggerMethod() { | 71 | void Puller::ProcessSemaphoreTriggerMethod() { |
| 77 | const auto semaphoreOperationMask = 0xF; | 72 | const auto semaphoreOperationMask = 0xF; |
| 78 | const auto op = | 73 | const auto op = |
| @@ -91,29 +86,33 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 91 | block.timestamp = gpu.GetTicks(); | 86 | block.timestamp = gpu.GetTicks(); |
| 92 | memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); | 87 | memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); |
| 93 | } else { | 88 | } else { |
| 94 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | 89 | do { |
| 95 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | 90 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |
| 96 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 97 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 98 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 99 | // Nothing to do in this case | ||
| 100 | } else { | ||
| 101 | regs.acquire_source = true; | 91 | regs.acquire_source = true; |
| 102 | regs.acquire_value = regs.semaphore_sequence; | 92 | regs.acquire_value = regs.semaphore_sequence; |
| 103 | if (op == GpuSemaphoreOperation::AcquireEqual) { | 93 | if (op == GpuSemaphoreOperation::AcquireEqual) { |
| 104 | regs.acquire_active = true; | 94 | regs.acquire_active = true; |
| 105 | regs.acquire_mode = false; | 95 | regs.acquire_mode = false; |
| 96 | if (word != regs.acquire_value) { | ||
| 97 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 98 | continue; | ||
| 99 | } | ||
| 106 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | 100 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { |
| 107 | regs.acquire_active = true; | 101 | regs.acquire_active = true; |
| 108 | regs.acquire_mode = true; | 102 | regs.acquire_mode = true; |
| 103 | if (word < regs.acquire_value) { | ||
| 104 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 105 | continue; | ||
| 106 | } | ||
| 109 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | 107 | } else if (op == GpuSemaphoreOperation::AcquireMask) { |
| 110 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | 108 | if (word & regs.semaphore_sequence == 0) { |
| 111 | // semaphore_sequence, gives a non-0 result | 109 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 112 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | 110 | continue; |
| 111 | } | ||
| 113 | } else { | 112 | } else { |
| 114 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | 113 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); |
| 115 | } | 114 | } |
| 116 | } | 115 | } while (false); |
| 117 | } | 116 | } |
| 118 | } | 117 | } |
| 119 | 118 | ||
| @@ -124,6 +123,7 @@ void Puller::ProcessSemaphoreRelease() { | |||
| 124 | void Puller::ProcessSemaphoreAcquire() { | 123 | void Puller::ProcessSemaphoreAcquire() { |
| 125 | const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 124 | const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); |
| 126 | const auto value = regs.semaphore_acquire; | 125 | const auto value = regs.semaphore_acquire; |
| 126 | std::this_thread::sleep_for(std::chrono::milliseconds(5)); | ||
| 127 | if (word != value) { | 127 | if (word != value) { |
| 128 | regs.acquire_active = true; | 128 | regs.acquire_active = true; |
| 129 | regs.acquire_value = value; | 129 | regs.acquire_value = value; |
| @@ -146,32 +146,39 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
| 146 | case BufferMethods::Nop: | 146 | case BufferMethods::Nop: |
| 147 | case BufferMethods::SemaphoreAddressHigh: | 147 | case BufferMethods::SemaphoreAddressHigh: |
| 148 | case BufferMethods::SemaphoreAddressLow: | 148 | case BufferMethods::SemaphoreAddressLow: |
| 149 | case BufferMethods::SemaphoreSequence: | 149 | case BufferMethods::SemaphoreSequencePayload: |
| 150 | case BufferMethods::UnkCacheFlush: | ||
| 151 | case BufferMethods::WrcacheFlush: | 150 | case BufferMethods::WrcacheFlush: |
| 152 | case BufferMethods::FenceValue: | 151 | case BufferMethods::SyncpointPayload: |
| 153 | break; | 152 | break; |
| 154 | case BufferMethods::RefCnt: | 153 | case BufferMethods::RefCnt: |
| 155 | rasterizer->SignalReference(); | 154 | rasterizer->SignalReference(); |
| 156 | break; | 155 | break; |
| 157 | case BufferMethods::FenceAction: | 156 | case BufferMethods::SyncpointOperation: |
| 158 | ProcessFenceActionMethod(); | 157 | ProcessFenceActionMethod(); |
| 159 | break; | 158 | break; |
| 160 | case BufferMethods::WaitForInterrupt: | 159 | case BufferMethods::WaitForIdle: |
| 161 | ProcessWaitForInterruptMethod(); | 160 | rasterizer->WaitForIdle(); |
| 162 | break; | 161 | break; |
| 163 | case BufferMethods::SemaphoreTrigger: { | 162 | case BufferMethods::SemaphoreOperation: { |
| 164 | ProcessSemaphoreTriggerMethod(); | 163 | ProcessSemaphoreTriggerMethod(); |
| 165 | break; | 164 | break; |
| 166 | } | 165 | } |
| 167 | case BufferMethods::NotifyIntr: { | 166 | case BufferMethods::NonStallInterrupt: { |
| 168 | // TODO(Kmather73): Research and implement this method. | 167 | LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented"); |
| 169 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 170 | break; | 168 | break; |
| 171 | } | 169 | } |
| 172 | case BufferMethods::Unk28: { | 170 | case BufferMethods::MemOpA: { |
| 173 | // TODO(Kmather73): Research and implement this method. | 171 | LOG_ERROR(HW_GPU, "Memory Operation A"); |
| 174 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | 172 | break; |
| 173 | } | ||
| 174 | case BufferMethods::MemOpB: { | ||
| 175 | // Implement this better. | ||
| 176 | rasterizer->SyncGuestHost(); | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | case BufferMethods::MemOpC: | ||
| 180 | case BufferMethods::MemOpD: { | ||
| 181 | LOG_ERROR(HW_GPU, "Memory Operation C,D"); | ||
| 175 | break; | 182 | break; |
| 176 | } | 183 | } |
| 177 | case BufferMethods::SemaphoreAcquire: { | 184 | case BufferMethods::SemaphoreAcquire: { |
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index d948ec790..b4619e9a8 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h | |||
| @@ -141,7 +141,6 @@ private: | |||
| 141 | void ProcessSemaphoreAcquire(); | 141 | void ProcessSemaphoreAcquire(); |
| 142 | void ProcessSemaphoreRelease(); | 142 | void ProcessSemaphoreRelease(); |
| 143 | void ProcessSemaphoreTriggerMethod(); | 143 | void ProcessSemaphoreTriggerMethod(); |
| 144 | void ProcessWaitForInterruptMethod(); | ||
| 145 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | 144 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); |
| 146 | 145 | ||
| 147 | /// Mapping of command subchannels to their bound engine ids | 146 | /// Mapping of command subchannels to their bound engine ids |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index d658e038d..03a70e5e0 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/delayed_destruction_ring.h" | 12 | #include "video_core/delayed_destruction_ring.h" |
| 13 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 14 | #include "video_core/host1x/host1x.h" | ||
| 15 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 14 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 15 | 17 | ||
| 16 | namespace VideoCommon { | 18 | namespace VideoCommon { |
| @@ -72,6 +74,7 @@ public: | |||
| 72 | } | 74 | } |
| 73 | 75 | ||
| 74 | void SignalSyncPoint(u32 value) { | 76 | void SignalSyncPoint(u32 value) { |
| 77 | syncpoint_manager.IncrementGuest(value); | ||
| 75 | TryReleasePendingFences(); | 78 | TryReleasePendingFences(); |
| 76 | const bool should_flush = ShouldFlush(); | 79 | const bool should_flush = ShouldFlush(); |
| 77 | CommitAsyncFlushes(); | 80 | CommitAsyncFlushes(); |
| @@ -96,7 +99,7 @@ public: | |||
| 96 | auto payload = current_fence->GetPayload(); | 99 | auto payload = current_fence->GetPayload(); |
| 97 | std::memcpy(address, &payload, sizeof(payload)); | 100 | std::memcpy(address, &payload, sizeof(payload)); |
| 98 | } else { | 101 | } else { |
| 99 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 102 | syncpoint_manager.IncrementHost(current_fence->GetPayload()); |
| 100 | } | 103 | } |
| 101 | PopFence(); | 104 | PopFence(); |
| 102 | } | 105 | } |
| @@ -106,8 +109,8 @@ protected: | |||
| 106 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 109 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 107 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 110 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, |
| 108 | TQueryCache& query_cache_) | 111 | TQueryCache& query_cache_) |
| 109 | : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_}, | 112 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, |
| 110 | buffer_cache{buffer_cache_}, query_cache{query_cache_} {} | 113 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} |
| 111 | 114 | ||
| 112 | virtual ~FenceManager() = default; | 115 | virtual ~FenceManager() = default; |
| 113 | 116 | ||
| @@ -125,6 +128,7 @@ protected: | |||
| 125 | 128 | ||
| 126 | VideoCore::RasterizerInterface& rasterizer; | 129 | VideoCore::RasterizerInterface& rasterizer; |
| 127 | Tegra::GPU& gpu; | 130 | Tegra::GPU& gpu; |
| 131 | Tegra::Host1x::SyncpointManager& syncpoint_manager; | ||
| 128 | TTextureCache& texture_cache; | 132 | TTextureCache& texture_cache; |
| 129 | TTBufferCache& buffer_cache; | 133 | TTBufferCache& buffer_cache; |
| 130 | TQueryCache& query_cache; | 134 | TQueryCache& query_cache; |
| @@ -142,7 +146,7 @@ private: | |||
| 142 | const auto payload = current_fence->GetPayload(); | 146 | const auto payload = current_fence->GetPayload(); |
| 143 | std::memcpy(address, &payload, sizeof(payload)); | 147 | std::memcpy(address, &payload, sizeof(payload)); |
| 144 | } else { | 148 | } else { |
| 145 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 149 | syncpoint_manager.IncrementHost(current_fence->GetPayload()); |
| 146 | } | 150 | } |
| 147 | PopFence(); | 151 | PopFence(); |
| 148 | } | 152 | } |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index eebd7f3ff..1097db08a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -28,6 +28,8 @@ | |||
| 28 | #include "video_core/engines/maxwell_dma.h" | 28 | #include "video_core/engines/maxwell_dma.h" |
| 29 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 30 | #include "video_core/gpu_thread.h" | 30 | #include "video_core/gpu_thread.h" |
| 31 | #include "video_core/host1x/host1x.h" | ||
| 32 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 31 | #include "video_core/memory_manager.h" | 33 | #include "video_core/memory_manager.h" |
| 32 | #include "video_core/renderer_base.h" | 34 | #include "video_core/renderer_base.h" |
| 33 | #include "video_core/shader_notify.h" | 35 | #include "video_core/shader_notify.h" |
| @@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 38 | 40 | ||
| 39 | struct GPU::Impl { | 41 | struct GPU::Impl { |
| 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) | 42 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 41 | : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, | 43 | : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, |
| 42 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 44 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 43 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} | 45 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} |
| 44 | 46 | ||
| @@ -115,31 +117,35 @@ struct GPU::Impl { | |||
| 115 | } | 117 | } |
| 116 | 118 | ||
| 117 | /// Request a host GPU memory flush from the CPU. | 119 | /// Request a host GPU memory flush from the CPU. |
| 118 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { | 120 | template <typename Func> |
| 119 | std::unique_lock lck{flush_request_mutex}; | 121 | [[nodiscard]] u64 RequestSyncOperation(Func&& action) { |
| 120 | const u64 fence = ++last_flush_fence; | 122 | std::unique_lock lck{sync_request_mutex}; |
| 121 | flush_requests.emplace_back(fence, addr, size); | 123 | const u64 fence = ++last_sync_fence; |
| 124 | sync_requests.emplace_back(action); | ||
| 122 | return fence; | 125 | return fence; |
| 123 | } | 126 | } |
| 124 | 127 | ||
| 125 | /// Obtains current flush request fence id. | 128 | /// Obtains current flush request fence id. |
| 126 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | 129 | [[nodiscard]] u64 CurrentSyncRequestFence() const { |
| 127 | return current_flush_fence.load(std::memory_order_relaxed); | 130 | return current_sync_fence.load(std::memory_order_relaxed); |
| 131 | } | ||
| 132 | |||
| 133 | void WaitForSyncOperation(const u64 fence) { | ||
| 134 | std::unique_lock lck{sync_request_mutex}; | ||
| 135 | sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); | ||
| 128 | } | 136 | } |
| 129 | 137 | ||
| 130 | /// Tick pending requests within the GPU. | 138 | /// Tick pending requests within the GPU. |
| 131 | void TickWork() { | 139 | void TickWork() { |
| 132 | std::unique_lock lck{flush_request_mutex}; | 140 | std::unique_lock lck{sync_request_mutex}; |
| 133 | while (!flush_requests.empty()) { | 141 | while (!sync_requests.empty()) { |
| 134 | auto& request = flush_requests.front(); | 142 | auto request = std::move(sync_requests.front()); |
| 135 | const u64 fence = request.fence; | 143 | sync_requests.pop_front(); |
| 136 | const VAddr addr = request.addr; | 144 | sync_request_mutex.unlock(); |
| 137 | const std::size_t size = request.size; | 145 | request(); |
| 138 | flush_requests.pop_front(); | 146 | current_sync_fence.fetch_add(1, std::memory_order_release); |
| 139 | flush_request_mutex.unlock(); | 147 | sync_request_mutex.lock(); |
| 140 | rasterizer->FlushRegion(addr, size); | 148 | sync_request_cv.notify_all(); |
| 141 | current_flush_fence.store(fence); | ||
| 142 | flush_request_mutex.lock(); | ||
| 143 | } | 149 | } |
| 144 | } | 150 | } |
| 145 | 151 | ||
| @@ -207,78 +213,26 @@ struct GPU::Impl { | |||
| 207 | 213 | ||
| 208 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 214 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 209 | void WaitFence(u32 syncpoint_id, u32 value) { | 215 | void WaitFence(u32 syncpoint_id, u32 value) { |
| 210 | // Synced GPU, is always in sync | ||
| 211 | if (!is_async) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | if (syncpoint_id == UINT32_MAX) { | 216 | if (syncpoint_id == UINT32_MAX) { |
| 215 | // TODO: Research what this does. | ||
| 216 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 217 | return; | 217 | return; |
| 218 | } | 218 | } |
| 219 | MICROPROFILE_SCOPE(GPU_wait); | 219 | MICROPROFILE_SCOPE(GPU_wait); |
| 220 | std::unique_lock lock{sync_mutex}; | 220 | host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); |
| 221 | sync_cv.wait(lock, [=, this] { | ||
| 222 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 223 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 224 | return true; | ||
| 225 | } | ||
| 226 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 227 | }); | ||
| 228 | } | 221 | } |
| 229 | 222 | ||
| 230 | void IncrementSyncPoint(u32 syncpoint_id) { | 223 | void IncrementSyncPoint(u32 syncpoint_id) { |
| 231 | auto& syncpoint = syncpoints.at(syncpoint_id); | 224 | host1x.GetSyncpointManager().IncrementHost(syncpoint_id); |
| 232 | syncpoint++; | ||
| 233 | std::scoped_lock lock{sync_mutex}; | ||
| 234 | sync_cv.notify_all(); | ||
| 235 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 236 | if (!interrupt.empty()) { | ||
| 237 | u32 value = syncpoint.load(); | ||
| 238 | auto it = interrupt.begin(); | ||
| 239 | while (it != interrupt.end()) { | ||
| 240 | if (value >= *it) { | ||
| 241 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 242 | it = interrupt.erase(it); | ||
| 243 | continue; | ||
| 244 | } | ||
| 245 | it++; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | } | 225 | } |
| 249 | 226 | ||
| 250 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { | 227 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { |
| 251 | return syncpoints.at(syncpoint_id).load(); | 228 | return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); |
| 252 | } | 229 | } |
| 253 | 230 | ||
| 254 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | 231 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 255 | std::scoped_lock lock{sync_mutex}; | 232 | auto& syncpoint_manager = host1x.GetSyncpointManager(); |
| 256 | u32 current_value = syncpoints.at(syncpoint_id).load(); | 233 | syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { |
| 257 | if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) { | ||
| 258 | TriggerCpuInterrupt(syncpoint_id, value); | 234 | TriggerCpuInterrupt(syncpoint_id, value); |
| 259 | return; | 235 | }); |
| 260 | } | ||
| 261 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 262 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 263 | [value](u32 in_value) { return in_value == value; }); | ||
| 264 | if (contains) { | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | interrupt.emplace_back(value); | ||
| 268 | } | ||
| 269 | |||
| 270 | [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 271 | std::scoped_lock lock{sync_mutex}; | ||
| 272 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 273 | const auto iter = | ||
| 274 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 275 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 276 | |||
| 277 | if (iter == interrupt.end()) { | ||
| 278 | return false; | ||
| 279 | } | ||
| 280 | interrupt.erase(iter); | ||
| 281 | return true; | ||
| 282 | } | 236 | } |
| 283 | 237 | ||
| 284 | [[nodiscard]] u64 GetTicks() const { | 238 | [[nodiscard]] u64 GetTicks() const { |
| @@ -387,8 +341,48 @@ struct GPU::Impl { | |||
| 387 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 341 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 388 | } | 342 | } |
| 389 | 343 | ||
| 344 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||
| 345 | Service::Nvidia::NvFence* fences, size_t num_fences) { | ||
| 346 | size_t current_request_counter{}; | ||
| 347 | { | ||
| 348 | std::unique_lock<std::mutex> lk(request_swap_mutex); | ||
| 349 | if (free_swap_counters.empty()) { | ||
| 350 | current_request_counter = request_swap_counters.size(); | ||
| 351 | request_swap_counters.emplace_back(num_fences); | ||
| 352 | } else { | ||
| 353 | current_request_counter = free_swap_counters.front(); | ||
| 354 | request_swap_counters[current_request_counter] = num_fences; | ||
| 355 | free_swap_counters.pop_front(); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | const auto wait_fence = | ||
| 359 | RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { | ||
| 360 | auto& syncpoint_manager = host1x.GetSyncpointManager(); | ||
| 361 | if (num_fences == 0) { | ||
| 362 | renderer->SwapBuffers(framebuffer); | ||
| 363 | } | ||
| 364 | const auto executer = [this, current_request_counter, | ||
| 365 | framebuffer_copy = *framebuffer]() { | ||
| 366 | { | ||
| 367 | std::unique_lock<std::mutex> lk(request_swap_mutex); | ||
| 368 | if (--request_swap_counters[current_request_counter] != 0) { | ||
| 369 | return; | ||
| 370 | } | ||
| 371 | free_swap_counters.push_back(current_request_counter); | ||
| 372 | } | ||
| 373 | renderer->SwapBuffers(&framebuffer_copy); | ||
| 374 | }; | ||
| 375 | for (size_t i = 0; i < num_fences; i++) { | ||
| 376 | syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); | ||
| 377 | } | ||
| 378 | }); | ||
| 379 | gpu_thread.TickGPU(); | ||
| 380 | WaitForSyncOperation(wait_fence); | ||
| 381 | } | ||
| 382 | |||
| 390 | GPU& gpu; | 383 | GPU& gpu; |
| 391 | Core::System& system; | 384 | Core::System& system; |
| 385 | Host1x::Host1x& host1x; | ||
| 392 | 386 | ||
| 393 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; | 387 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; |
| 394 | std::unique_ptr<VideoCore::RendererBase> renderer; | 388 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| @@ -411,18 +405,11 @@ struct GPU::Impl { | |||
| 411 | 405 | ||
| 412 | std::condition_variable sync_cv; | 406 | std::condition_variable sync_cv; |
| 413 | 407 | ||
| 414 | struct FlushRequest { | 408 | std::list<std::function<void(void)>> sync_requests; |
| 415 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | 409 | std::atomic<u64> current_sync_fence{}; |
| 416 | : fence{fence_}, addr{addr_}, size{size_} {} | 410 | u64 last_sync_fence{}; |
| 417 | u64 fence; | 411 | std::mutex sync_request_mutex; |
| 418 | VAddr addr; | 412 | std::condition_variable sync_request_cv; |
| 419 | std::size_t size; | ||
| 420 | }; | ||
| 421 | |||
| 422 | std::list<FlushRequest> flush_requests; | ||
| 423 | std::atomic<u64> current_flush_fence{}; | ||
| 424 | u64 last_flush_fence{}; | ||
| 425 | std::mutex flush_request_mutex; | ||
| 426 | 413 | ||
| 427 | const bool is_async; | 414 | const bool is_async; |
| 428 | 415 | ||
| @@ -433,6 +420,10 @@ struct GPU::Impl { | |||
| 433 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; | 420 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; |
| 434 | Tegra::Control::ChannelState* current_channel; | 421 | Tegra::Control::ChannelState* current_channel; |
| 435 | s32 bound_channel{-1}; | 422 | s32 bound_channel{-1}; |
| 423 | |||
| 424 | std::deque<size_t> free_swap_counters; | ||
| 425 | std::deque<size_t> request_swap_counters; | ||
| 426 | std::mutex request_swap_mutex; | ||
| 436 | }; | 427 | }; |
| 437 | 428 | ||
| 438 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | 429 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) |
| @@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() { | |||
| 477 | } | 468 | } |
| 478 | 469 | ||
| 479 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 470 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { |
| 480 | return impl->RequestFlush(addr, size); | 471 | return impl->RequestSyncOperation( |
| 472 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | ||
| 473 | } | ||
| 474 | |||
| 475 | u64 GPU::CurrentSyncRequestFence() const { | ||
| 476 | return impl->CurrentSyncRequestFence(); | ||
| 481 | } | 477 | } |
| 482 | 478 | ||
| 483 | u64 GPU::CurrentFlushRequestFence() const { | 479 | void GPU::WaitForSyncOperation(u64 fence) { |
| 484 | return impl->CurrentFlushRequestFence(); | 480 | return impl->WaitForSyncOperation(fence); |
| 485 | } | 481 | } |
| 486 | 482 | ||
| 487 | void GPU::TickWork() { | 483 | void GPU::TickWork() { |
| 488 | impl->TickWork(); | 484 | impl->TickWork(); |
| 489 | } | 485 | } |
| 490 | 486 | ||
| 487 | /// Gets a mutable reference to the Host1x interface | ||
| 488 | Host1x::Host1x& GPU::Host1x() { | ||
| 489 | return impl->host1x; | ||
| 490 | } | ||
| 491 | |||
| 492 | /// Gets an immutable reference to the Host1x interface. | ||
| 493 | const Host1x::Host1x& GPU::Host1x() const { | ||
| 494 | return impl->host1x; | ||
| 495 | } | ||
| 496 | |||
| 491 | Engines::Maxwell3D& GPU::Maxwell3D() { | 497 | Engines::Maxwell3D& GPU::Maxwell3D() { |
| 492 | return impl->Maxwell3D(); | 498 | return impl->Maxwell3D(); |
| 493 | } | 499 | } |
| @@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { | |||
| 536 | return impl->ShaderNotify(); | 542 | return impl->ShaderNotify(); |
| 537 | } | 543 | } |
| 538 | 544 | ||
| 545 | void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||
| 546 | Service::Nvidia::NvFence* fences, size_t num_fences) { | ||
| 547 | impl->RequestSwapBuffers(framebuffer, fences, num_fences); | ||
| 548 | } | ||
| 549 | |||
| 539 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { | 550 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 540 | impl->WaitFence(syncpoint_id, value); | 551 | impl->WaitFence(syncpoint_id, value); |
| 541 | } | 552 | } |
| @@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | |||
| 552 | impl->RegisterSyncptInterrupt(syncpoint_id, value); | 563 | impl->RegisterSyncptInterrupt(syncpoint_id, value); |
| 553 | } | 564 | } |
| 554 | 565 | ||
| 555 | bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 556 | return impl->CancelSyncptInterrupt(syncpoint_id, value); | ||
| 557 | } | ||
| 558 | |||
| 559 | u64 GPU::GetTicks() const { | 566 | u64 GPU::GetTicks() const { |
| 560 | return impl->GetTicks(); | 567 | return impl->GetTicks(); |
| 561 | } | 568 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 7e84b0d2f..c1a538257 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -93,6 +93,10 @@ namespace Control { | |||
| 93 | struct ChannelState; | 93 | struct ChannelState; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | namespace Host1x { | ||
| 97 | class Host1x; | ||
| 98 | } // namespace Host1x | ||
| 99 | |||
| 96 | class MemoryManager; | 100 | class MemoryManager; |
| 97 | 101 | ||
| 98 | class GPU final { | 102 | class GPU final { |
| @@ -124,11 +128,19 @@ public: | |||
| 124 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 128 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 125 | 129 | ||
| 126 | /// Obtains current flush request fence id. | 130 | /// Obtains current flush request fence id. |
| 127 | [[nodiscard]] u64 CurrentFlushRequestFence() const; | 131 | [[nodiscard]] u64 CurrentSyncRequestFence() const; |
| 132 | |||
| 133 | void WaitForSyncOperation(u64 fence); | ||
| 128 | 134 | ||
| 129 | /// Tick pending requests within the GPU. | 135 | /// Tick pending requests within the GPU. |
| 130 | void TickWork(); | 136 | void TickWork(); |
| 131 | 137 | ||
| 138 | /// Gets a mutable reference to the Host1x interface | ||
| 139 | [[nodiscard]] Host1x::Host1x& Host1x(); | ||
| 140 | |||
| 141 | /// Gets an immutable reference to the Host1x interface. | ||
| 142 | [[nodiscard]] const Host1x::Host1x& Host1x() const; | ||
| 143 | |||
| 132 | /// Returns a reference to the Maxwell3D GPU engine. | 144 | /// Returns a reference to the Maxwell3D GPU engine. |
| 133 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); | 145 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); |
| 134 | 146 | ||
| @@ -174,8 +186,6 @@ public: | |||
| 174 | 186 | ||
| 175 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); | 187 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); |
| 176 | 188 | ||
| 177 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | ||
| 178 | |||
| 179 | [[nodiscard]] u64 GetTicks() const; | 189 | [[nodiscard]] u64 GetTicks() const; |
| 180 | 190 | ||
| 181 | [[nodiscard]] bool IsAsync() const; | 191 | [[nodiscard]] bool IsAsync() const; |
| @@ -184,6 +194,9 @@ public: | |||
| 184 | 194 | ||
| 185 | void RendererFrameEndNotify(); | 195 | void RendererFrameEndNotify(); |
| 186 | 196 | ||
| 197 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||
| 198 | Service::Nvidia::NvFence* fences, size_t num_fences); | ||
| 199 | |||
| 187 | /// Performs any additional setup necessary in order to begin GPU emulation. | 200 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 188 | /// This can be used to launch any necessary threads and register any necessary | 201 | /// This can be used to launch any necessary threads and register any necessary |
| 189 | /// core timing events. | 202 | /// core timing events. |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 9844cde43..2c03545bf 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -93,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 93 | } | 93 | } |
| 94 | auto& gpu = system.GPU(); | 94 | auto& gpu = system.GPU(); |
| 95 | u64 fence = gpu.RequestFlush(addr, size); | 95 | u64 fence = gpu.RequestFlush(addr, size); |
| 96 | TickGPU(); | ||
| 97 | gpu.WaitForSyncOperation(fence); | ||
| 98 | } | ||
| 99 | |||
| 100 | void ThreadManager::TickGPU() { | ||
| 96 | PushCommand(GPUTickCommand(), true); | 101 | PushCommand(GPUTickCommand(), true); |
| 97 | ASSERT(fence <= gpu.CurrentFlushRequestFence()); | ||
| 98 | } | 102 | } |
| 99 | 103 | ||
| 100 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index c5078a2b3..64628d3e3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -135,6 +135,8 @@ public: | |||
| 135 | 135 | ||
| 136 | void OnCommandListEnd(); | 136 | void OnCommandListEnd(); |
| 137 | 137 | ||
| 138 | void TickGPU(); | ||
| 139 | |||
| 138 | private: | 140 | private: |
| 139 | /// Pushes a command to be executed by the GPU thread | 141 | /// Pushes a command to be executed by the GPU thread |
| 140 | u64 PushCommand(CommandData&& command_data, bool block = false); | 142 | u64 PushCommand(CommandData&& command_data, bool block = false); |
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index a5eb97b7f..70c47ae03 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp | |||
| @@ -6,11 +6,11 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/settings.h" | 8 | #include "common/settings.h" |
| 9 | #include "video_core/command_classes/codecs/codec.h" | ||
| 10 | #include "video_core/command_classes/codecs/h264.h" | ||
| 11 | #include "video_core/command_classes/codecs/vp8.h" | ||
| 12 | #include "video_core/command_classes/codecs/vp9.h" | ||
| 13 | #include "video_core/gpu.h" | 9 | #include "video_core/gpu.h" |
| 10 | #include "video_core/host1x/codecs/codec.h" | ||
| 11 | #include "video_core/host1x/codecs/h264.h" | ||
| 12 | #include "video_core/host1x/codecs/vp8.h" | ||
| 13 | #include "video_core/host1x/codecs/vp9.h" | ||
| 14 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 15 | 15 | ||
| 16 | extern "C" { | 16 | extern "C" { |
| @@ -73,7 +73,7 @@ void AVFrameDeleter(AVFrame* ptr) { | |||
| 73 | av_frame_free(&ptr); | 73 | av_frame_free(&ptr); |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) | 76 | Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs) |
| 77 | : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), | 77 | : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), |
| 78 | vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), | 78 | vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), |
| 79 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} | 79 | vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} |
| @@ -168,11 +168,11 @@ void Codec::InitializeGpuDecoder() { | |||
| 168 | void Codec::Initialize() { | 168 | void Codec::Initialize() { |
| 169 | const AVCodecID codec = [&] { | 169 | const AVCodecID codec = [&] { |
| 170 | switch (current_codec) { | 170 | switch (current_codec) { |
| 171 | case NvdecCommon::VideoCodec::H264: | 171 | case Host1x::NvdecCommon::VideoCodec::H264: |
| 172 | return AV_CODEC_ID_H264; | 172 | return AV_CODEC_ID_H264; |
| 173 | case NvdecCommon::VideoCodec::VP8: | 173 | case Host1x::NvdecCommon::VideoCodec::VP8: |
| 174 | return AV_CODEC_ID_VP8; | 174 | return AV_CODEC_ID_VP8; |
| 175 | case NvdecCommon::VideoCodec::VP9: | 175 | case Host1x::NvdecCommon::VideoCodec::VP9: |
| 176 | return AV_CODEC_ID_VP9; | 176 | return AV_CODEC_ID_VP9; |
| 177 | default: | 177 | default: |
| 178 | UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); | 178 | UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); |
| @@ -197,7 +197,7 @@ void Codec::Initialize() { | |||
| 197 | initialized = true; | 197 | initialized = true; |
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { | 200 | void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { |
| 201 | if (current_codec != codec) { | 201 | if (current_codec != codec) { |
| 202 | current_codec = codec; | 202 | current_codec = codec; |
| 203 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); | 203 | LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); |
| @@ -215,11 +215,11 @@ void Codec::Decode() { | |||
| 215 | bool vp9_hidden_frame = false; | 215 | bool vp9_hidden_frame = false; |
| 216 | const auto& frame_data = [&]() { | 216 | const auto& frame_data = [&]() { |
| 217 | switch (current_codec) { | 217 | switch (current_codec) { |
| 218 | case Tegra::NvdecCommon::VideoCodec::H264: | 218 | case Tegra::Host1x::NvdecCommon::VideoCodec::H264: |
| 219 | return h264_decoder->ComposeFrame(state, is_first_frame); | 219 | return h264_decoder->ComposeFrame(state, is_first_frame); |
| 220 | case Tegra::NvdecCommon::VideoCodec::VP8: | 220 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: |
| 221 | return vp8_decoder->ComposeFrame(state); | 221 | return vp8_decoder->ComposeFrame(state); |
| 222 | case Tegra::NvdecCommon::VideoCodec::VP9: | 222 | case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: |
| 223 | vp9_decoder->ComposeFrame(state); | 223 | vp9_decoder->ComposeFrame(state); |
| 224 | vp9_hidden_frame = vp9_decoder->WasFrameHidden(); | 224 | vp9_hidden_frame = vp9_decoder->WasFrameHidden(); |
| 225 | return vp9_decoder->GetFrameBytes(); | 225 | return vp9_decoder->GetFrameBytes(); |
| @@ -287,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() { | |||
| 287 | return frame; | 287 | return frame; |
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { | 290 | Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { |
| 291 | return current_codec; | 291 | return current_codec; |
| 292 | } | 292 | } |
| 293 | 293 | ||
| 294 | std::string_view Codec::GetCurrentCodecName() const { | 294 | std::string_view Codec::GetCurrentCodecName() const { |
| 295 | switch (current_codec) { | 295 | switch (current_codec) { |
| 296 | case NvdecCommon::VideoCodec::None: | 296 | case Host1x::NvdecCommon::VideoCodec::None: |
| 297 | return "None"; | 297 | return "None"; |
| 298 | case NvdecCommon::VideoCodec::H264: | 298 | case Host1x::NvdecCommon::VideoCodec::H264: |
| 299 | return "H264"; | 299 | return "H264"; |
| 300 | case NvdecCommon::VideoCodec::VP8: | 300 | case Host1x::NvdecCommon::VideoCodec::VP8: |
| 301 | return "VP8"; | 301 | return "VP8"; |
| 302 | case NvdecCommon::VideoCodec::H265: | 302 | case Host1x::NvdecCommon::VideoCodec::H265: |
| 303 | return "H265"; | 303 | return "H265"; |
| 304 | case NvdecCommon::VideoCodec::VP9: | 304 | case Host1x::NvdecCommon::VideoCodec::VP9: |
| 305 | return "VP9"; | 305 | return "VP9"; |
| 306 | default: | 306 | default: |
| 307 | return "Unknown"; | 307 | return "Unknown"; |
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 0c2405465..117cb3ccd 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h | |||
| @@ -6,8 +6,8 @@ | |||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <string_view> | 7 | #include <string_view> |
| 8 | #include <queue> | 8 | #include <queue> |
| 9 | 9 | #include "common/common_types.h" | |
| 10 | #include "video_core/command_classes/nvdec_common.h" | 10 | #include "video_core/host1x/nvdec_common.h" |
| 11 | 11 | ||
| 12 | extern "C" { | 12 | extern "C" { |
| 13 | #if defined(__GNUC__) || defined(__clang__) | 13 | #if defined(__GNUC__) || defined(__clang__) |
| @@ -34,14 +34,14 @@ class VP9; | |||
| 34 | 34 | ||
| 35 | class Codec { | 35 | class Codec { |
| 36 | public: | 36 | public: |
| 37 | explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); | 37 | explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs); |
| 38 | ~Codec(); | 38 | ~Codec(); |
| 39 | 39 | ||
| 40 | /// Initialize the codec, returning success or failure | 40 | /// Initialize the codec, returning success or failure |
| 41 | void Initialize(); | 41 | void Initialize(); |
| 42 | 42 | ||
| 43 | /// Sets NVDEC video stream codec | 43 | /// Sets NVDEC video stream codec |
| 44 | void SetTargetCodec(NvdecCommon::VideoCodec codec); | 44 | void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec); |
| 45 | 45 | ||
| 46 | /// Call decoders to construct headers, decode AVFrame with ffmpeg | 46 | /// Call decoders to construct headers, decode AVFrame with ffmpeg |
| 47 | void Decode(); | 47 | void Decode(); |
| @@ -50,7 +50,7 @@ public: | |||
| 50 | [[nodiscard]] AVFramePtr GetCurrentFrame(); | 50 | [[nodiscard]] AVFramePtr GetCurrentFrame(); |
| 51 | 51 | ||
| 52 | /// Returns the value of current_codec | 52 | /// Returns the value of current_codec |
| 53 | [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; | 53 | [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; |
| 54 | 54 | ||
| 55 | /// Return name of the current codec | 55 | /// Return name of the current codec |
| 56 | [[nodiscard]] std::string_view GetCurrentCodecName() const; | 56 | [[nodiscard]] std::string_view GetCurrentCodecName() const; |
| @@ -63,14 +63,14 @@ private: | |||
| 63 | bool CreateGpuAvDevice(); | 63 | bool CreateGpuAvDevice(); |
| 64 | 64 | ||
| 65 | bool initialized{}; | 65 | bool initialized{}; |
| 66 | NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; | 66 | Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; |
| 67 | 67 | ||
| 68 | const AVCodec* av_codec{nullptr}; | 68 | const AVCodec* av_codec{nullptr}; |
| 69 | AVCodecContext* av_codec_ctx{nullptr}; | 69 | AVCodecContext* av_codec_ctx{nullptr}; |
| 70 | AVBufferRef* av_gpu_decoder{nullptr}; | 70 | AVBufferRef* av_gpu_decoder{nullptr}; |
| 71 | 71 | ||
| 72 | GPU& gpu; | 72 | GPU& gpu; |
| 73 | const NvdecCommon::NvdecRegisters& state; | 73 | const Host1x::NvdecCommon::NvdecRegisters& state; |
| 74 | std::unique_ptr<Decoder::H264> h264_decoder; | 74 | std::unique_ptr<Decoder::H264> h264_decoder; |
| 75 | std::unique_ptr<Decoder::VP8> vp8_decoder; | 75 | std::unique_ptr<Decoder::VP8> vp8_decoder; |
| 76 | std::unique_ptr<Decoder::VP9> vp9_decoder; | 76 | std::unique_ptr<Decoder::VP9> vp9_decoder; |
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index e2acd54d4..95534bc85 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -5,8 +5,8 @@ | |||
| 5 | #include <bit> | 5 | #include <bit> |
| 6 | 6 | ||
| 7 | #include "common/settings.h" | 7 | #include "common/settings.h" |
| 8 | #include "video_core/command_classes/codecs/h264.h" | ||
| 9 | #include "video_core/gpu.h" | 8 | #include "video_core/gpu.h" |
| 9 | #include "video_core/host1x/codecs/h264.h" | ||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | 11 | ||
| 12 | namespace Tegra::Decoder { | 12 | namespace Tegra::Decoder { |
| @@ -28,7 +28,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} | |||
| 28 | 28 | ||
| 29 | H264::~H264() = default; | 29 | H264::~H264() = default; |
| 30 | 30 | ||
| 31 | const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state, | 31 | const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, |
| 32 | bool is_first_frame) { | 32 | bool is_first_frame) { |
| 33 | H264DecoderContext context; | 33 | H264DecoderContext context; |
| 34 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); | 34 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); |
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index 261574364..a98730474 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 9 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/command_classes/nvdec_common.h" | 11 | #include "video_core/host1x/nvdec_common.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | class GPU; | 14 | class GPU; |
| @@ -59,8 +59,8 @@ public: | |||
| 59 | ~H264(); | 59 | ~H264(); |
| 60 | 60 | ||
| 61 | /// Compose the H264 frame for FFmpeg decoding | 61 | /// Compose the H264 frame for FFmpeg decoding |
| 62 | [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state, | 62 | [[nodiscard]] const std::vector<u8>& ComposeFrame( |
| 63 | bool is_first_frame = false); | 63 | const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); |
| 64 | 64 | ||
| 65 | private: | 65 | private: |
| 66 | std::vector<u8> frame; | 66 | std::vector<u8> frame; |
diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index c83b9bbc2..aac026e17 100644 --- a/src/video_core/command_classes/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp | |||
| @@ -3,8 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #include <vector> | 4 | #include <vector> |
| 5 | 5 | ||
| 6 | #include "video_core/command_classes/codecs/vp8.h" | ||
| 7 | #include "video_core/gpu.h" | 6 | #include "video_core/gpu.h" |
| 7 | #include "video_core/host1x/codecs/vp8.h" | ||
| 8 | #include "video_core/memory_manager.h" | 8 | #include "video_core/memory_manager.h" |
| 9 | 9 | ||
| 10 | namespace Tegra::Decoder { | 10 | namespace Tegra::Decoder { |
| @@ -12,7 +12,7 @@ VP8::VP8(GPU& gpu_) : gpu(gpu_) {} | |||
| 12 | 12 | ||
| 13 | VP8::~VP8() = default; | 13 | VP8::~VP8() = default; |
| 14 | 14 | ||
| 15 | const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { | 15 | const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 16 | VP8PictureInfo info; | 16 | VP8PictureInfo info; |
| 17 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); | 17 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); |
| 18 | 18 | ||
diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h index 3357667b0..a1dfa5f03 100644 --- a/src/video_core/command_classes/codecs/vp8.h +++ b/src/video_core/host1x/codecs/vp8.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_funcs.h" | 9 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/command_classes/nvdec_common.h" | 11 | #include "video_core/host1x/nvdec_common.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | class GPU; | 14 | class GPU; |
| @@ -20,7 +20,8 @@ public: | |||
| 20 | ~VP8(); | 20 | ~VP8(); |
| 21 | 21 | ||
| 22 | /// Compose the VP8 frame for FFmpeg decoding | 22 | /// Compose the VP8 frame for FFmpeg decoding |
| 23 | [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state); | 23 | [[nodiscard]] const std::vector<u8>& ComposeFrame( |
| 24 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 24 | 25 | ||
| 25 | private: | 26 | private: |
| 26 | std::vector<u8> frame; | 27 | std::vector<u8> frame; |
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index c01431441..bc50c6ba4 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp | |||
| @@ -4,8 +4,8 @@ | |||
| 4 | #include <algorithm> // for std::copy | 4 | #include <algorithm> // for std::copy |
| 5 | #include <numeric> | 5 | #include <numeric> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "video_core/command_classes/codecs/vp9.h" | ||
| 8 | #include "video_core/gpu.h" | 7 | #include "video_core/gpu.h" |
| 8 | #include "video_core/host1x/codecs/vp9.h" | ||
| 9 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| 10 | 10 | ||
| 11 | namespace Tegra::Decoder { | 11 | namespace Tegra::Decoder { |
| @@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ | |||
| 355 | } | 355 | } |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { | 358 | Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 359 | PictureInfo picture_info; | 359 | PictureInfo picture_info; |
| 360 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | 360 | gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); |
| 361 | Vp9PictureInfo vp9_info = picture_info.Convert(); | 361 | Vp9PictureInfo vp9_info = picture_info.Convert(); |
| @@ -376,7 +376,7 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | |||
| 376 | entropy.Convert(dst); | 376 | entropy.Convert(dst); |
| 377 | } | 377 | } |
| 378 | 378 | ||
| 379 | Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { | 379 | Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 380 | Vp9FrameContainer current_frame{}; | 380 | Vp9FrameContainer current_frame{}; |
| 381 | { | 381 | { |
| 382 | gpu.SyncGuestHost(); | 382 | gpu.SyncGuestHost(); |
| @@ -769,7 +769,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { | |||
| 769 | return uncomp_writer; | 769 | return uncomp_writer; |
| 770 | } | 770 | } |
| 771 | 771 | ||
| 772 | void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { | 772 | void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 773 | std::vector<u8> bitstream; | 773 | std::vector<u8> bitstream; |
| 774 | { | 774 | { |
| 775 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); | 775 | Vp9FrameContainer curr_frame = GetCurrentFrame(state); |
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h index ecc40e8b1..a425c0fa4 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/host1x/codecs/vp9.h | |||
| @@ -8,8 +8,8 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/stream.h" | 10 | #include "common/stream.h" |
| 11 | #include "video_core/command_classes/codecs/vp9_types.h" | 11 | #include "video_core/host1x/codecs/vp9_types.h" |
| 12 | #include "video_core/command_classes/nvdec_common.h" | 12 | #include "video_core/host1x/nvdec_common.h" |
| 13 | 13 | ||
| 14 | namespace Tegra { | 14 | namespace Tegra { |
| 15 | class GPU; | 15 | class GPU; |
| @@ -117,7 +117,7 @@ public: | |||
| 117 | 117 | ||
| 118 | /// Composes the VP9 frame from the GPU state information. | 118 | /// Composes the VP9 frame from the GPU state information. |
| 119 | /// Based on the official VP9 spec documentation | 119 | /// Based on the official VP9 spec documentation |
| 120 | void ComposeFrame(const NvdecCommon::NvdecRegisters& state); | 120 | void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state); |
| 121 | 121 | ||
| 122 | /// Returns true if the most recent frame was a hidden frame. | 122 | /// Returns true if the most recent frame was a hidden frame. |
| 123 | [[nodiscard]] bool WasFrameHidden() const { | 123 | [[nodiscard]] bool WasFrameHidden() const { |
| @@ -162,13 +162,15 @@ private: | |||
| 162 | void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); | 162 | void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); |
| 163 | 163 | ||
| 164 | /// Returns VP9 information from NVDEC provided offset and size | 164 | /// Returns VP9 information from NVDEC provided offset and size |
| 165 | [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); | 165 | [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo( |
| 166 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 166 | 167 | ||
| 167 | /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct | 168 | /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct |
| 168 | void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); | 169 | void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); |
| 169 | 170 | ||
| 170 | /// Returns frame to be decoded after buffering | 171 | /// Returns frame to be decoded after buffering |
| 171 | [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); | 172 | [[nodiscard]] Vp9FrameContainer GetCurrentFrame( |
| 173 | const Host1x::NvdecCommon::NvdecRegisters& state); | ||
| 172 | 174 | ||
| 173 | /// Use NVDEC providied information to compose the headers for the current frame | 175 | /// Use NVDEC providied information to compose the headers for the current frame |
| 174 | [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); | 176 | [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); |
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h index bb3d8df6e..bb3d8df6e 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/host1x/codecs/vp9_types.h | |||
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp new file mode 100644 index 000000000..b72b01aa3 --- /dev/null +++ b/src/video_core/host1x/control.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2022 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/gpu.h" | ||
| 7 | #include "video_core/host1x/control.h" | ||
| 8 | #include "video_core/host1x/host1x.h" | ||
| 9 | |||
| 10 | namespace Tegra::Host1x { | ||
| 11 | |||
| 12 | Control::Control(GPU& gpu_) : gpu(gpu_) {} | ||
| 13 | |||
| 14 | Control::~Control() = default; | ||
| 15 | |||
| 16 | void Control::ProcessMethod(Method method, u32 argument) { | ||
| 17 | switch (method) { | ||
| 18 | case Method::LoadSyncptPayload32: | ||
| 19 | syncpoint_value = argument; | ||
| 20 | break; | ||
| 21 | case Method::WaitSyncpt: | ||
| 22 | case Method::WaitSyncpt32: | ||
| 23 | Execute(argument); | ||
| 24 | break; | ||
| 25 | default: | ||
| 26 | UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method)); | ||
| 27 | break; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void Control::Execute(u32 data) { | ||
| 32 | gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Tegra::Host1x | ||
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/host1x/control.h index bb48a4381..04dac7d51 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/host1x/control.h | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // (https://github.com/skyline-emu/) |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 | ||
| 4 | // or any later version Refer to the license.txt file included. | ||
| 3 | 5 | ||
| 4 | #pragma once | 6 | #pragma once |
| 5 | 7 | ||
| @@ -7,9 +9,12 @@ | |||
| 7 | 9 | ||
| 8 | namespace Tegra { | 10 | namespace Tegra { |
| 9 | class GPU; | 11 | class GPU; |
| 12 | |||
| 13 | namespace Host1x { | ||
| 14 | |||
| 10 | class Nvdec; | 15 | class Nvdec; |
| 11 | 16 | ||
| 12 | class Host1x { | 17 | class Control { |
| 13 | public: | 18 | public: |
| 14 | enum class Method : u32 { | 19 | enum class Method : u32 { |
| 15 | WaitSyncpt = 0x8, | 20 | WaitSyncpt = 0x8, |
| @@ -17,8 +22,8 @@ public: | |||
| 17 | WaitSyncpt32 = 0x50, | 22 | WaitSyncpt32 = 0x50, |
| 18 | }; | 23 | }; |
| 19 | 24 | ||
| 20 | explicit Host1x(GPU& gpu); | 25 | explicit Control(GPU& gpu); |
| 21 | ~Host1x(); | 26 | ~Control(); |
| 22 | 27 | ||
| 23 | /// Writes the method into the state, Invoke Execute() if encountered | 28 | /// Writes the method into the state, Invoke Execute() if encountered |
| 24 | void ProcessMethod(Method method, u32 argument); | 29 | void ProcessMethod(Method method, u32 argument); |
| @@ -31,4 +36,6 @@ private: | |||
| 31 | GPU& gpu; | 36 | GPU& gpu; |
| 32 | }; | 37 | }; |
| 33 | 38 | ||
| 39 | } // namespace Host1x | ||
| 40 | |||
| 34 | } // namespace Tegra | 41 | } // namespace Tegra |
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h new file mode 100644 index 000000000..2971be286 --- /dev/null +++ b/src/video_core/host1x/host1x.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2022 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | |||
| 13 | namespace Host1x { | ||
| 14 | |||
| 15 | class Host1x { | ||
| 16 | public: | ||
| 17 | Host1x() : syncpoint_manager{} {} | ||
| 18 | |||
| 19 | SyncpointManager& GetSyncpointManager() { | ||
| 20 | return syncpoint_manager; | ||
| 21 | } | ||
| 22 | |||
| 23 | const SyncpointManager& GetSyncpointManager() const { | ||
| 24 | return syncpoint_manager; | ||
| 25 | } | ||
| 26 | |||
| 27 | private: | ||
| 28 | SyncpointManager syncpoint_manager; | ||
| 29 | }; | ||
| 30 | |||
| 31 | } // namespace Host1x | ||
| 32 | |||
| 33 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/host1x/nvdec.cpp index 4fbbe3da6..5f6decd0d 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/host1x/nvdec.cpp | |||
| @@ -2,10 +2,10 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/assert.h" | 4 | #include "common/assert.h" |
| 5 | #include "video_core/command_classes/nvdec.h" | ||
| 6 | #include "video_core/gpu.h" | 5 | #include "video_core/gpu.h" |
| 6 | #include "video_core/host1x/nvdec.h" | ||
| 7 | 7 | ||
| 8 | namespace Tegra { | 8 | namespace Tegra::Host1x { |
| 9 | 9 | ||
| 10 | #define NVDEC_REG_INDEX(field_name) \ | 10 | #define NVDEC_REG_INDEX(field_name) \ |
| 11 | (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) | 11 | (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) |
| @@ -44,4 +44,4 @@ void Nvdec::Execute() { | |||
| 44 | } | 44 | } |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | } // namespace Tegra | 47 | } // namespace Tegra::Host1x |
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/host1x/nvdec.h index 488531fc6..41ba1f7a0 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/host1x/nvdec.h | |||
| @@ -6,11 +6,13 @@ | |||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/command_classes/codecs/codec.h" | 9 | #include "video_core/host1x/codecs/codec.h" |
| 10 | 10 | ||
| 11 | namespace Tegra { | 11 | namespace Tegra { |
| 12 | class GPU; | 12 | class GPU; |
| 13 | 13 | ||
| 14 | namespace Host1x { | ||
| 15 | |||
| 14 | class Nvdec { | 16 | class Nvdec { |
| 15 | public: | 17 | public: |
| 16 | explicit Nvdec(GPU& gpu); | 18 | explicit Nvdec(GPU& gpu); |
| @@ -30,4 +32,7 @@ private: | |||
| 30 | NvdecCommon::NvdecRegisters state; | 32 | NvdecCommon::NvdecRegisters state; |
| 31 | std::unique_ptr<Codec> codec; | 33 | std::unique_ptr<Codec> codec; |
| 32 | }; | 34 | }; |
| 35 | |||
| 36 | } // namespace Host1x | ||
| 37 | |||
| 33 | } // namespace Tegra | 38 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/host1x/nvdec_common.h index 521e5b52b..49d67ebbe 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/host1x/nvdec_common.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include "common/common_funcs.h" | 7 | #include "common/common_funcs.h" |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | 9 | ||
| 10 | namespace Tegra::NvdecCommon { | 10 | namespace Tegra::Host1x::NvdecCommon { |
| 11 | 11 | ||
| 12 | enum class VideoCodec : u64 { | 12 | enum class VideoCodec : u64 { |
| 13 | None = 0x0, | 13 | None = 0x0, |
| @@ -94,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); | |||
| 94 | 94 | ||
| 95 | #undef ASSERT_REG_POSITION | 95 | #undef ASSERT_REG_POSITION |
| 96 | 96 | ||
| 97 | } // namespace Tegra::NvdecCommon | 97 | } // namespace Tegra::Host1x::NvdecCommon |
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp index 67e58046f..8694f77e2 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/host1x/sync_manager.cpp | |||
| @@ -4,8 +4,12 @@ | |||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include "sync_manager.h" | 5 | #include "sync_manager.h" |
| 6 | #include "video_core/gpu.h" | 6 | #include "video_core/gpu.h" |
| 7 | #include "video_core/host1x/host1x.h" | ||
| 8 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 7 | 9 | ||
| 8 | namespace Tegra { | 10 | namespace Tegra { |
| 11 | namespace Host1x { | ||
| 12 | |||
| 9 | SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} | 13 | SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} |
| 10 | SyncptIncrManager::~SyncptIncrManager() = default; | 14 | SyncptIncrManager::~SyncptIncrManager() = default; |
| 11 | 15 | ||
| @@ -36,8 +40,12 @@ void SyncptIncrManager::IncrementAllDone() { | |||
| 36 | if (!increments[done_count].complete) { | 40 | if (!increments[done_count].complete) { |
| 37 | break; | 41 | break; |
| 38 | } | 42 | } |
| 39 | gpu.IncrementSyncPoint(increments[done_count].syncpt_id); | 43 | auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager(); |
| 44 | syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id); | ||
| 45 | syncpoint_manager.IncrementHost(increments[done_count].syncpt_id); | ||
| 40 | } | 46 | } |
| 41 | increments.erase(increments.begin(), increments.begin() + done_count); | 47 | increments.erase(increments.begin(), increments.begin() + done_count); |
| 42 | } | 48 | } |
| 49 | |||
| 50 | } // namespace Host1x | ||
| 43 | } // namespace Tegra | 51 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/host1x/sync_manager.h index 6dfaae080..aba72d5c5 100644 --- a/src/video_core/command_classes/sync_manager.h +++ b/src/video_core/host1x/sync_manager.h | |||
| @@ -8,7 +8,11 @@ | |||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | 9 | ||
| 10 | namespace Tegra { | 10 | namespace Tegra { |
| 11 | |||
| 11 | class GPU; | 12 | class GPU; |
| 13 | |||
| 14 | namespace Host1x { | ||
| 15 | |||
| 12 | struct SyncptIncr { | 16 | struct SyncptIncr { |
| 13 | u32 id; | 17 | u32 id; |
| 14 | u32 class_id; | 18 | u32 class_id; |
| @@ -44,4 +48,6 @@ private: | |||
| 44 | GPU& gpu; | 48 | GPU& gpu; |
| 45 | }; | 49 | }; |
| 46 | 50 | ||
| 51 | } // namespace Host1x | ||
| 52 | |||
| 47 | } // namespace Tegra | 53 | } // namespace Tegra |
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp new file mode 100644 index 000000000..c606b8bd0 --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.cpp | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 6 | |||
| 7 | namespace Tegra { | ||
| 8 | |||
| 9 | namespace Host1x { | ||
| 10 | |||
| 11 | SyncpointManager::ActionHandle SyncpointManager::RegisterAction( | ||
| 12 | std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value, | ||
| 13 | std::function<void(void)>& action) { | ||
| 14 | if (syncpoint.load(std::memory_order_acquire) >= expected_value) { | ||
| 15 | action(); | ||
| 16 | return {}; | ||
| 17 | } | ||
| 18 | |||
| 19 | std::unique_lock<std::mutex> lk(guard); | ||
| 20 | if (syncpoint.load(std::memory_order_relaxed) >= expected_value) { | ||
| 21 | action(); | ||
| 22 | return {}; | ||
| 23 | } | ||
| 24 | auto it = action_storage.begin(); | ||
| 25 | while (it != action_storage.end()) { | ||
| 26 | if (it->expected_value >= expected_value) { | ||
| 27 | break; | ||
| 28 | } | ||
| 29 | ++it; | ||
| 30 | } | ||
| 31 | return action_storage.emplace(it, expected_value, action); | ||
| 32 | } | ||
| 33 | |||
| 34 | void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage, | ||
| 35 | ActionHandle& handle) { | ||
| 36 | std::unique_lock<std::mutex> lk(guard); | ||
| 37 | action_storage.erase(handle); | ||
| 38 | } | ||
| 39 | |||
| 40 | void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) { | ||
| 41 | DeregisterAction(guest_action_storage[syncpoint_id], handle); | ||
| 42 | } | ||
| 43 | |||
| 44 | void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) { | ||
| 45 | DeregisterAction(host_action_storage[syncpoint_id], handle); | ||
| 46 | } | ||
| 47 | |||
| 48 | void SyncpointManager::IncrementGuest(u32 syncpoint_id) { | ||
| 49 | Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]); | ||
| 50 | } | ||
| 51 | |||
| 52 | void SyncpointManager::IncrementHost(u32 syncpoint_id) { | ||
| 53 | Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]); | ||
| 54 | } | ||
| 55 | |||
| 56 | void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) { | ||
| 57 | Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value); | ||
| 58 | } | ||
| 59 | |||
| 60 | void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) { | ||
| 61 | Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value); | ||
| 62 | } | ||
| 63 | |||
| 64 | void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 65 | std::list<RegisteredAction>& action_storage) { | ||
| 66 | auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1}; | ||
| 67 | |||
| 68 | std::unique_lock<std::mutex> lk(guard); | ||
| 69 | auto it = action_storage.begin(); | ||
| 70 | while (it != action_storage.end()) { | ||
| 71 | if (it->expected_value > new_value) { | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | it->action(); | ||
| 75 | it = action_storage.erase(it); | ||
| 76 | } | ||
| 77 | wait_cv.notify_all(); | ||
| 78 | } | ||
| 79 | |||
| 80 | void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 81 | u32 expected_value) { | ||
| 82 | const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; }; | ||
| 83 | if (pred()) { | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | |||
| 87 | std::unique_lock<std::mutex> lk(guard); | ||
| 88 | wait_cv.wait(lk, pred); | ||
| 89 | } | ||
| 90 | |||
| 91 | } // namespace Host1x | ||
| 92 | |||
| 93 | } // namespace Tegra | ||
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h new file mode 100644 index 000000000..0ecc040ab --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.h | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv3 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <functional> | ||
| 11 | #include <list> | ||
| 12 | #include <mutex> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | |||
| 18 | namespace Host1x { | ||
| 19 | |||
| 20 | class SyncpointManager { | ||
| 21 | public: | ||
| 22 | u32 GetGuestSyncpointValue(u32 id) { | ||
| 23 | return syncpoints_guest[id].load(std::memory_order_acquire); | ||
| 24 | } | ||
| 25 | |||
| 26 | u32 GetHostSyncpointValue(u32 id) { | ||
| 27 | return syncpoints_host[id].load(std::memory_order_acquire); | ||
| 28 | } | ||
| 29 | |||
| 30 | struct RegisteredAction { | ||
| 31 | RegisteredAction(u32 expected_value_, std::function<void(void)>& action_) | ||
| 32 | : expected_value{expected_value_}, action{action_} {} | ||
| 33 | u32 expected_value; | ||
| 34 | std::function<void(void)> action; | ||
| 35 | }; | ||
| 36 | using ActionHandle = std::list<RegisteredAction>::iterator; | ||
| 37 | |||
| 38 | template <typename Func> | ||
| 39 | ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) { | ||
| 40 | std::function<void(void)> func(action); | ||
| 41 | return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id], | ||
| 42 | expected_value, func); | ||
| 43 | } | ||
| 44 | |||
| 45 | template <typename Func> | ||
| 46 | ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) { | ||
| 47 | std::function<void(void)> func(action); | ||
| 48 | return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id], | ||
| 49 | expected_value, func); | ||
| 50 | } | ||
| 51 | |||
| 52 | void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle); | ||
| 53 | |||
| 54 | void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle); | ||
| 55 | |||
| 56 | void IncrementGuest(u32 syncpoint_id); | ||
| 57 | |||
| 58 | void IncrementHost(u32 syncpoint_id); | ||
| 59 | |||
| 60 | void WaitGuest(u32 syncpoint_id, u32 expected_value); | ||
| 61 | |||
| 62 | void WaitHost(u32 syncpoint_id, u32 expected_value); | ||
| 63 | |||
| 64 | bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) { | ||
| 65 | return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value; | ||
| 66 | } | ||
| 67 | |||
| 68 | bool IsReadyHost(u32 syncpoint_id, u32 expected_value) { | ||
| 69 | return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value; | ||
| 70 | } | ||
| 71 | |||
| 72 | private: | ||
| 73 | void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, | ||
| 74 | std::list<RegisteredAction>& action_storage); | ||
| 75 | |||
| 76 | ActionHandle RegisterAction(std::atomic<u32>& syncpoint, | ||
| 77 | std::list<RegisteredAction>& action_storage, u32 expected_value, | ||
| 78 | std::function<void(void)>& action); | ||
| 79 | |||
| 80 | void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle); | ||
| 81 | |||
| 82 | void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value); | ||
| 83 | |||
| 84 | static constexpr size_t NUM_MAX_SYNCPOINTS = 192; | ||
| 85 | |||
| 86 | std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{}; | ||
| 87 | std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{}; | ||
| 88 | |||
| 89 | std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage; | ||
| 90 | std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage; | ||
| 91 | |||
| 92 | std::mutex guard; | ||
| 93 | std::condition_variable wait_guest_cv; | ||
| 94 | std::condition_variable wait_host_cv; | ||
| 95 | }; | ||
| 96 | |||
| 97 | } // namespace Host1x | ||
| 98 | |||
| 99 | } // namespace Tegra | ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/host1x/vic.cpp index 7c17df353..a9422670a 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -18,14 +18,17 @@ extern "C" { | |||
| 18 | #include "common/bit_field.h" | 18 | #include "common/bit_field.h" |
| 19 | #include "common/logging/log.h" | 19 | #include "common/logging/log.h" |
| 20 | 20 | ||
| 21 | #include "video_core/command_classes/nvdec.h" | ||
| 22 | #include "video_core/command_classes/vic.h" | ||
| 23 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 24 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 23 | #include "video_core/host1x/nvdec.h" | ||
| 24 | #include "video_core/host1x/vic.h" | ||
| 25 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 26 | #include "video_core/textures/decoders.h" | 26 | #include "video_core/textures/decoders.h" |
| 27 | 27 | ||
| 28 | namespace Tegra { | 28 | namespace Tegra { |
| 29 | |||
| 30 | namespace Host1x { | ||
| 31 | |||
| 29 | namespace { | 32 | namespace { |
| 30 | enum class VideoPixelFormat : u64_le { | 33 | enum class VideoPixelFormat : u64_le { |
| 31 | RGBA8 = 0x1f, | 34 | RGBA8 = 0x1f, |
| @@ -235,4 +238,6 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 235 | chroma_buffer.size()); | 238 | chroma_buffer.size()); |
| 236 | } | 239 | } |
| 237 | 240 | ||
| 241 | } // namespace Host1x | ||
| 242 | |||
| 238 | } // namespace Tegra | 243 | } // namespace Tegra |
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/host1x/vic.h index 010daa6b6..c51f8af7e 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/host1x/vic.h | |||
| @@ -11,6 +11,9 @@ struct SwsContext; | |||
| 11 | 11 | ||
| 12 | namespace Tegra { | 12 | namespace Tegra { |
| 13 | class GPU; | 13 | class GPU; |
| 14 | |||
| 15 | namespace Host1x { | ||
| 16 | |||
| 14 | class Nvdec; | 17 | class Nvdec; |
| 15 | union VicConfig; | 18 | union VicConfig; |
| 16 | 19 | ||
| @@ -40,7 +43,7 @@ private: | |||
| 40 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); | 43 | void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); |
| 41 | 44 | ||
| 42 | GPU& gpu; | 45 | GPU& gpu; |
| 43 | std::shared_ptr<Tegra::Nvdec> nvdec_processor; | 46 | std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; |
| 44 | 47 | ||
| 45 | /// Avoid reallocation of the following buffers every frame, as their | 48 | /// Avoid reallocation of the following buffers every frame, as their |
| 46 | /// size does not change during a stream | 49 | /// size does not change during a stream |
| @@ -58,4 +61,6 @@ private: | |||
| 58 | s32 scaler_height{}; | 61 | s32 scaler_height{}; |
| 59 | }; | 62 | }; |
| 60 | 63 | ||
| 64 | } // namespace Host1x | ||
| 65 | |||
| 61 | } // namespace Tegra | 66 | } // namespace Tegra |