diff options
| author | 2022-02-07 07:52:04 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:53 +0200 | |
| commit | a9ca39f8591532ba6d37f7a3e068d5eefe416464 (patch) | |
| tree | 0c469d2c853592c7f23095bfea40784fed471ae9 /src | |
| parent | Buffer Cache: Basic fixes. (diff) | |
| download | yuzu-a9ca39f8591532ba6d37f7a3e068d5eefe416464.tar.gz yuzu-a9ca39f8591532ba6d37f7a3e068d5eefe416464.tar.xz yuzu-a9ca39f8591532ba6d37f7a3e068d5eefe416464.zip | |
NVDRV: Further improvements.
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/core/container.cpp | 8 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/container.h | 8 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/nvmap.cpp | 7 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/nvmap.h | 7 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/syncpoint_manager.cpp | 112 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/core/syncpoint_manager.h | 120 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 18 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 59 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 19 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | 2 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | 15 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | 6 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/engines/puller.cpp | 18 |
16 files changed, 278 insertions, 159 deletions
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index fbd66f001..4175d3d9c 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | // Copyright 2021 yuzu emulator team | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | 2 | // (https://github.com/skyline-emu/) |
| 3 | // Licensed under GPLv2 or any later version | 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 |
| 4 | // Refer to the license.txt file included. | 4 | // or any later version Refer to the license.txt file included. |
| 5 | 5 | ||
| 6 | #include "core/hle/service/nvdrv/core/container.h" | 6 | #include "core/hle/service/nvdrv/core/container.h" |
| 7 | #include "core/hle/service/nvdrv/core/nvmap.h" | 7 | #include "core/hle/service/nvdrv/core/nvmap.h" |
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index da75d74ff..e069ade4e 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | // Copyright 2021 yuzu emulator team | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | 2 | // (https://github.com/skyline-emu/) |
| 3 | // Licensed under GPLv2 or any later version | 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 |
| 4 | // Refer to the license.txt file included. | 4 | // or any later version Refer to the license.txt file included. |
| 5 | 5 | ||
| 6 | #pragma once | 6 | #pragma once |
| 7 | 7 | ||
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 9acec7ba6..86d825af9 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // Licensed under GPLv2 or any later version | 2 | // (https://github.com/skyline-emu/) |
| 3 | // Refer to the license.txt file included. | 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 |
| 4 | // or any later version Refer to the license.txt file included. | ||
| 4 | 5 | ||
| 5 | #include "common/alignment.h" | 6 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 7 | #include "common/assert.h" |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 5acdc961e..4f37dcf43 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // Copyright 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // Licensed under GPLv2 or any later version | 2 | // (https://github.com/skyline-emu/) |
| 3 | // Refer to the license.txt file included. | 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 |
| 4 | // or any later version Refer to the license.txt file included. | ||
| 4 | 5 | ||
| 5 | #pragma once | 6 | #pragma once |
| 6 | 7 | ||
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp index 61e00448c..b34481b48 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.cpp | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // (https://github.com/skyline-emu/) |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 | ||
| 4 | // or any later version Refer to the license.txt file included. | ||
| 3 | 5 | ||
| 4 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 5 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 7 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| @@ -7,32 +9,108 @@ | |||
| 7 | 9 | ||
| 8 | namespace Service::Nvidia::NvCore { | 10 | namespace Service::Nvidia::NvCore { |
| 9 | 11 | ||
| 10 | SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} | 12 | SyncpointManager::SyncpointManager(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} { |
| 13 | constexpr u32 VBlank0SyncpointId{26}; | ||
| 14 | constexpr u32 VBlank1SyncpointId{27}; | ||
| 15 | |||
| 16 | // Reserve both vblank syncpoints as client managed as they use Continuous Mode | ||
| 17 | // Refer to section 14.3.5.3 of the TRM for more information on Continuous Mode | ||
| 18 | // https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/drm/dc.c#L660 | ||
| 19 | ReserveSyncpoint(VBlank0SyncpointId, true); | ||
| 20 | ReserveSyncpoint(VBlank1SyncpointId, true); | ||
| 21 | |||
| 22 | for (u32 syncpointId : channel_syncpoints) { | ||
| 23 | if (syncpointId) { | ||
| 24 | ReserveSyncpoint(syncpointId, false); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 11 | 28 | ||
| 12 | SyncpointManager::~SyncpointManager() = default; | 29 | SyncpointManager::~SyncpointManager() = default; |
| 13 | 30 | ||
| 14 | u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { | 31 | u32 SyncpointManager::ReserveSyncpoint(u32 id, bool clientManaged) { |
| 15 | syncpoints[syncpoint_id].min = host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); | 32 | if (syncpoints.at(id).reserved) { |
| 16 | return GetSyncpointMin(syncpoint_id); | 33 | UNREACHABLE_MSG("Requested syncpoint is in use"); |
| 34 | return 0; | ||
| 35 | } | ||
| 36 | |||
| 37 | syncpoints.at(id).reserved = true; | ||
| 38 | syncpoints.at(id).interfaceManaged = clientManaged; | ||
| 39 | |||
| 40 | return id; | ||
| 17 | } | 41 | } |
| 18 | 42 | ||
| 19 | u32 SyncpointManager::AllocateSyncpoint() { | 43 | u32 SyncpointManager::FindFreeSyncpoint() { |
| 20 | for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { | 44 | for (u32 i{1}; i < syncpoints.size(); i++) { |
| 21 | if (!syncpoints[syncpoint_id].is_allocated) { | 45 | if (!syncpoints[i].reserved) { |
| 22 | syncpoints[syncpoint_id].is_allocated = true; | 46 | return i; |
| 23 | return syncpoint_id; | ||
| 24 | } | 47 | } |
| 25 | } | 48 | } |
| 26 | ASSERT_MSG(false, "No more available syncpoints!"); | 49 | UNREACHABLE_MSG("Failed to find a free syncpoint!"); |
| 27 | return {}; | 50 | return 0; |
| 51 | } | ||
| 52 | |||
| 53 | u32 SyncpointManager::AllocateSyncpoint(bool clientManaged) { | ||
| 54 | std::lock_guard lock(reservation_lock); | ||
| 55 | return ReserveSyncpoint(FindFreeSyncpoint(), clientManaged); | ||
| 56 | } | ||
| 57 | |||
| 58 | bool SyncpointManager::IsSyncpointAllocated(u32 id) { | ||
| 59 | return (id <= SyncpointCount) && syncpoints[id].reserved; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool SyncpointManager::HasSyncpointExpired(u32 id, u32 threshold) { | ||
| 63 | const SyncpointInfo& syncpoint{syncpoints.at(id)}; | ||
| 64 | |||
| 65 | if (!syncpoint.reserved) { | ||
| 66 | UNREACHABLE(); | ||
| 67 | return 0; | ||
| 68 | } | ||
| 69 | |||
| 70 | // If the interface manages counters then we don't keep track of the maximum value as it handles | ||
| 71 | // sanity checking the values then | ||
| 72 | if (syncpoint.interfaceManaged) { | ||
| 73 | return static_cast<s32>(syncpoint.counterMin - threshold) >= 0; | ||
| 74 | } else { | ||
| 75 | return (syncpoint.counterMax - threshold) >= (syncpoint.counterMin - threshold); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | u32 SyncpointManager::IncrementSyncpointMaxExt(u32 id, u32 amount) { | ||
| 80 | if (!syncpoints.at(id).reserved) { | ||
| 81 | UNREACHABLE(); | ||
| 82 | return 0; | ||
| 83 | } | ||
| 84 | |||
| 85 | return syncpoints.at(id).counterMax += amount; | ||
| 86 | } | ||
| 87 | |||
| 88 | u32 SyncpointManager::ReadSyncpointMinValue(u32 id) { | ||
| 89 | if (!syncpoints.at(id).reserved) { | ||
| 90 | UNREACHABLE(); | ||
| 91 | return 0; | ||
| 92 | } | ||
| 93 | |||
| 94 | return syncpoints.at(id).counterMin; | ||
| 95 | } | ||
| 96 | |||
| 97 | u32 SyncpointManager::UpdateMin(u32 id) { | ||
| 98 | if (!syncpoints.at(id).reserved) { | ||
| 99 | UNREACHABLE(); | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | syncpoints.at(id).counterMin = host1x.GetSyncpointManager().GetHostSyncpointValue(id); | ||
| 104 | return syncpoints.at(id).counterMin; | ||
| 28 | } | 105 | } |
| 29 | 106 | ||
| 30 | u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { | 107 | NvFence SyncpointManager::GetSyncpointFence(u32 id) { |
| 31 | for (u32 index = 0; index < value; ++index) { | 108 | if (!syncpoints.at(id).reserved) { |
| 32 | syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); | 109 | UNREACHABLE(); |
| 110 | return NvFence{}; | ||
| 33 | } | 111 | } |
| 34 | 112 | ||
| 35 | return GetSyncpointMax(syncpoint_id); | 113 | return {.id = static_cast<s32>(id), .value = syncpoints.at(id).counterMax}; |
| 36 | } | 114 | } |
| 37 | 115 | ||
| 38 | } // namespace Service::Nvidia::NvCore | 116 | } // namespace Service::Nvidia::NvCore |
diff --git a/src/core/hle/service/nvdrv/core/syncpoint_manager.h b/src/core/hle/service/nvdrv/core/syncpoint_manager.h index f332edc6e..bfc8ba84b 100644 --- a/src/core/hle/service/nvdrv/core/syncpoint_manager.h +++ b/src/core/hle/service/nvdrv/core/syncpoint_manager.h | |||
| @@ -1,10 +1,13 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2022 yuzu emulator team and Skyline Team and Contributors |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // (https://github.com/skyline-emu/) |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 | ||
| 4 | // or any later version Refer to the license.txt file included. | ||
| 3 | 5 | ||
| 4 | #pragma once | 6 | #pragma once |
| 5 | 7 | ||
| 6 | #include <array> | 8 | #include <array> |
| 7 | #include <atomic> | 9 | #include <atomic> |
| 10 | #include <mutex> | ||
| 8 | 11 | ||
| 9 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvdrv/nvdata.h" | 13 | #include "core/hle/service/nvdrv/nvdata.h" |
| @@ -19,68 +22,111 @@ class Host1x; | |||
| 19 | 22 | ||
| 20 | namespace Service::Nvidia::NvCore { | 23 | namespace Service::Nvidia::NvCore { |
| 21 | 24 | ||
| 25 | enum class ChannelType : u32 { | ||
| 26 | MsEnc = 0, | ||
| 27 | VIC = 1, | ||
| 28 | GPU = 2, | ||
| 29 | NvDec = 3, | ||
| 30 | Display = 4, | ||
| 31 | NvJpg = 5, | ||
| 32 | TSec = 6, | ||
| 33 | Max = 7 | ||
| 34 | }; | ||
| 35 | |||
| 36 | /** | ||
| 37 | * @brief SyncpointManager handles allocating and accessing host1x syncpoints, these are cached | ||
| 38 | * versions of the HW syncpoints which are intermittently synced | ||
| 39 | * @note Refer to Chapter 14 of the Tegra X1 TRM for an exhaustive overview of them | ||
| 40 | * @url https://http.download.nvidia.com/tegra-public-appnotes/host1x.html | ||
| 41 | * @url | ||
| 42 | * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/jetson-tx1/drivers/video/tegra/host/nvhost_syncpt.c | ||
| 43 | */ | ||
| 22 | class SyncpointManager final { | 44 | class SyncpointManager final { |
| 23 | public: | 45 | public: |
| 24 | explicit SyncpointManager(Tegra::Host1x::Host1x& host1x); | 46 | explicit SyncpointManager(Tegra::Host1x::Host1x& host1x); |
| 25 | ~SyncpointManager(); | 47 | ~SyncpointManager(); |
| 26 | 48 | ||
| 27 | /** | 49 | /** |
| 28 | * Returns true if the specified syncpoint is expired for the given value. | 50 | * @brief Checks if the given syncpoint is both allocated and below the number of HW syncpoints |
| 29 | * @param syncpoint_id Syncpoint ID to check. | ||
| 30 | * @param value Value to check against the specified syncpoint. | ||
| 31 | * @returns True if the specified syncpoint is expired for the given value, otherwise False. | ||
| 32 | */ | 51 | */ |
| 33 | bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { | 52 | bool IsSyncpointAllocated(u32 id); |
| 34 | return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); | ||
| 35 | } | ||
| 36 | 53 | ||
| 37 | /** | 54 | /** |
| 38 | * Gets the lower bound for the specified syncpoint. | 55 | * @brief Finds a free syncpoint and reserves it |
| 39 | * @param syncpoint_id Syncpoint ID to get the lower bound for. | 56 | * @return The ID of the reserved syncpoint |
| 40 | * @returns The lower bound for the specified syncpoint. | ||
| 41 | */ | 57 | */ |
| 42 | u32 GetSyncpointMin(u32 syncpoint_id) const { | 58 | u32 AllocateSyncpoint(bool clientManaged); |
| 43 | return syncpoints.at(syncpoint_id).min.load(std::memory_order_relaxed); | ||
| 44 | } | ||
| 45 | 59 | ||
| 46 | /** | 60 | /** |
| 47 | * Gets the uper bound for the specified syncpoint. | 61 | * @url |
| 48 | * @param syncpoint_id Syncpoint ID to get the upper bound for. | 62 | * https://github.com/Jetson-TX1-AndroidTV/android_kernel_jetson_tx1_hdmi_primary/blob/8f74a72394efb871cb3f886a3de2998cd7ff2990/drivers/gpu/host1x/syncpt.c#L259 |
| 49 | * @returns The upper bound for the specified syncpoint. | ||
| 50 | */ | 63 | */ |
| 51 | u32 GetSyncpointMax(u32 syncpoint_id) const { | 64 | bool HasSyncpointExpired(u32 id, u32 threshold); |
| 52 | return syncpoints.at(syncpoint_id).max.load(std::memory_order_relaxed); | 65 | |
| 66 | bool IsFenceSignalled(NvFence fence) { | ||
| 67 | return HasSyncpointExpired(fence.id, fence.value); | ||
| 53 | } | 68 | } |
| 54 | 69 | ||
| 55 | /** | 70 | /** |
| 56 | * Refreshes the minimum value for the specified syncpoint. | 71 | * @brief Atomically increments the maximum value of a syncpoint by the given amount |
| 57 | * @param syncpoint_id Syncpoint ID to be refreshed. | 72 | * @return The new max value of the syncpoint |
| 58 | * @returns The new syncpoint minimum value. | ||
| 59 | */ | 73 | */ |
| 60 | u32 RefreshSyncpoint(u32 syncpoint_id); | 74 | u32 IncrementSyncpointMaxExt(u32 id, u32 amount); |
| 61 | 75 | ||
| 62 | /** | 76 | /** |
| 63 | * Allocates a new syncoint. | 77 | * @return The minimum value of the syncpoint |
| 64 | * @returns The syncpoint ID for the newly allocated syncpoint. | ||
| 65 | */ | 78 | */ |
| 66 | u32 AllocateSyncpoint(); | 79 | u32 ReadSyncpointMinValue(u32 id); |
| 67 | 80 | ||
| 68 | /** | 81 | /** |
| 69 | * Increases the maximum value for the specified syncpoint. | 82 | * @brief Synchronises the minimum value of the syncpoint to with the GPU |
| 70 | * @param syncpoint_id Syncpoint ID to be increased. | 83 | * @return The new minimum value of the syncpoint |
| 71 | * @param value Value to increase the specified syncpoint by. | ||
| 72 | * @returns The new syncpoint maximum value. | ||
| 73 | */ | 84 | */ |
| 74 | u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); | 85 | u32 UpdateMin(u32 id); |
| 86 | |||
| 87 | /** | ||
| 88 | * @return A fence that will be signalled once this syncpoint hits its maximum value | ||
| 89 | */ | ||
| 90 | NvFence GetSyncpointFence(u32 id); | ||
| 91 | |||
| 92 | static constexpr std::array<u32, static_cast<u32>(ChannelType::Max)> channel_syncpoints{ | ||
| 93 | 0x0, // `MsEnc` is unimplemented | ||
| 94 | 0xC, // `VIC` | ||
| 95 | 0x0, // `GPU` syncpoints are allocated per-channel instead | ||
| 96 | 0x36, // `NvDec` | ||
| 97 | 0x0, // `Display` is unimplemented | ||
| 98 | 0x37, // `NvJpg` | ||
| 99 | 0x0, // `TSec` is unimplemented | ||
| 100 | }; //!< Maps each channel ID to a constant syncpoint | ||
| 75 | 101 | ||
| 76 | private: | 102 | private: |
| 77 | struct Syncpoint { | 103 | /** |
| 78 | std::atomic<u32> min; | 104 | * @note reservation_lock should be locked when calling this |
| 79 | std::atomic<u32> max; | 105 | */ |
| 80 | std::atomic<bool> is_allocated; | 106 | u32 ReserveSyncpoint(u32 id, bool clientManaged); |
| 107 | |||
| 108 | /** | ||
| 109 | * @return The ID of the first free syncpoint | ||
| 110 | */ | ||
| 111 | u32 FindFreeSyncpoint(); | ||
| 112 | |||
| 113 | struct SyncpointInfo { | ||
| 114 | std::atomic<u32> counterMin; //!< The least value the syncpoint can be (The value it was | ||
| 115 | //!< when it was last synchronized with host1x) | ||
| 116 | std::atomic<u32> counterMax; //!< The maximum value the syncpoint can reach according to the | ||
| 117 | //!< current usage | ||
| 118 | bool interfaceManaged; //!< If the syncpoint is managed by a host1x client interface, a | ||
| 119 | //!< client interface is a HW block that can handle host1x | ||
| 120 | //!< transactions on behalf of a host1x client (Which would otherwise | ||
| 121 | //!< need to be manually synced using PIO which is synchronous and | ||
| 122 | //!< requires direct cooperation of the CPU) | ||
| 123 | bool reserved; //!< If the syncpoint is reserved or not, not to be confused with a reserved | ||
| 124 | //!< value | ||
| 81 | }; | 125 | }; |
| 82 | 126 | ||
| 83 | std::array<Syncpoint, MaxSyncPoints> syncpoints{}; | 127 | constexpr static std::size_t SyncpointCount{192}; |
| 128 | std::array<SyncpointInfo, SyncpointCount> syncpoints{}; | ||
| 129 | std::mutex reservation_lock; | ||
| 84 | 130 | ||
| 85 | Tegra::Host1x::Host1x& host1x; | 131 | Tegra::Host1x::Host1x& host1x; |
| 86 | }; | 132 | }; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 076edb02f..a84e4d425 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -112,17 +112,23 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | if (params.fence.value == 0) { | 114 | if (params.fence.value == 0) { |
| 115 | params.value.raw = syncpoint_manager.GetSyncpointMin(fence_id); | 115 | if (!syncpoint_manager.IsSyncpointAllocated(params.fence.id)) { |
| 116 | LOG_WARNING(Service_NVDRV, | ||
| 117 | "Unallocated syncpt_id={}, threshold={}, timeout={}, is_allocation={}", | ||
| 118 | params.fence.id, params.fence.value, params.timeout, is_allocation); | ||
| 119 | } else { | ||
| 120 | params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id); | ||
| 121 | } | ||
| 116 | return NvResult::Success; | 122 | return NvResult::Success; |
| 117 | } | 123 | } |
| 118 | 124 | ||
| 119 | if (syncpoint_manager.IsSyncpointExpired(fence_id, params.fence.value)) { | 125 | if (syncpoint_manager.IsFenceSignalled(params.fence)) { |
| 120 | params.value.raw = syncpoint_manager.GetSyncpointMin(fence_id); | 126 | params.value.raw = syncpoint_manager.ReadSyncpointMinValue(fence_id); |
| 121 | return NvResult::Success; | 127 | return NvResult::Success; |
| 122 | } | 128 | } |
| 123 | 129 | ||
| 124 | if (const auto new_value = syncpoint_manager.RefreshSyncpoint(fence_id); | 130 | if (const auto new_value = syncpoint_manager.UpdateMin(fence_id); |
| 125 | syncpoint_manager.IsSyncpointExpired(fence_id, params.fence.value)) { | 131 | syncpoint_manager.IsFenceSignalled(params.fence)) { |
| 126 | params.value.raw = new_value; | 132 | params.value.raw = new_value; |
| 127 | return NvResult::Success; | 133 | return NvResult::Success; |
| 128 | } | 134 | } |
| @@ -296,7 +302,7 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::v | |||
| 296 | EventState::Waiting) { | 302 | EventState::Waiting) { |
| 297 | auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager(); | 303 | auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager(); |
| 298 | host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle); | 304 | host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle); |
| 299 | syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt); | 305 | syncpoint_manager.UpdateMin(event.assigned_syncpt); |
| 300 | event.wait_handle = {}; | 306 | event.wait_handle = {}; |
| 301 | } | 307 | } |
| 302 | event.fails++; | 308 | event.fails++; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 3f981af5a..c2cc09993 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -31,9 +31,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, | |||
| 31 | : nvdevice{system_}, events_interface{events_interface_}, core{core_}, | 31 | : nvdevice{system_}, events_interface{events_interface_}, core{core_}, |
| 32 | syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, | 32 | syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, |
| 33 | channel_state{system.GPU().AllocateChannel()} { | 33 | channel_state{system.GPU().AllocateChannel()} { |
| 34 | channel_fence.id = syncpoint_manager.AllocateSyncpoint(); | 34 | channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false); |
| 35 | channel_fence.value = | ||
| 36 | system_.Host1x().GetSyncpointManager().GetGuestSyncpointValue(channel_fence.id); | ||
| 37 | sm_exception_breakpoint_int_report_event = | 35 | sm_exception_breakpoint_int_report_event = |
| 38 | events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt"); | 36 | events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt"); |
| 39 | sm_exception_breakpoint_pause_report_event = | 37 | sm_exception_breakpoint_pause_report_event = |
| @@ -191,10 +189,8 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8 | |||
| 191 | } | 189 | } |
| 192 | 190 | ||
| 193 | system.GPU().InitChannel(*channel_state); | 191 | system.GPU().InitChannel(*channel_state); |
| 194 | channel_fence.value = | ||
| 195 | system.Host1x().GetSyncpointManager().GetGuestSyncpointValue(channel_fence.id); | ||
| 196 | 192 | ||
| 197 | params.fence_out = channel_fence; | 193 | params.fence_out = syncpoint_manager.GetSyncpointFence(channel_syncpoint); |
| 198 | 194 | ||
| 199 | std::memcpy(output.data(), ¶ms, output.size()); | 195 | std::memcpy(output.data(), ¶ms, output.size()); |
| 200 | return NvResult::Success; | 196 | return NvResult::Success; |
| @@ -222,14 +218,13 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { | |||
| 222 | }; | 218 | }; |
| 223 | } | 219 | } |
| 224 | 220 | ||
| 225 | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence, | 221 | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) { |
| 226 | u32 add_increment) { | ||
| 227 | std::vector<Tegra::CommandHeader> result{ | 222 | std::vector<Tegra::CommandHeader> result{ |
| 228 | Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, | 223 | Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, |
| 229 | Tegra::SubmissionMode::Increasing), | 224 | Tegra::SubmissionMode::Increasing), |
| 230 | {}}; | 225 | {}}; |
| 231 | 226 | ||
| 232 | for (u32 count = 0; count < add_increment; ++count) { | 227 | for (u32 count = 0; count < 2; ++count) { |
| 233 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, | 228 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, |
| 234 | Tegra::SubmissionMode::Increasing)); | 229 | Tegra::SubmissionMode::Increasing)); |
| 235 | result.emplace_back( | 230 | result.emplace_back( |
| @@ -239,14 +234,12 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence | |||
| 239 | return result; | 234 | return result; |
| 240 | } | 235 | } |
| 241 | 236 | ||
| 242 | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence, | 237 | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) { |
| 243 | u32 add_increment) { | ||
| 244 | std::vector<Tegra::CommandHeader> result{ | 238 | std::vector<Tegra::CommandHeader> result{ |
| 245 | Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, | 239 | Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, |
| 246 | Tegra::SubmissionMode::Increasing), | 240 | Tegra::SubmissionMode::Increasing), |
| 247 | {}}; | 241 | {}}; |
| 248 | const std::vector<Tegra::CommandHeader> increment{ | 242 | const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)}; |
| 249 | BuildIncrementCommandList(fence, add_increment)}; | ||
| 250 | 243 | ||
| 251 | result.insert(result.end(), increment.begin(), increment.end()); | 244 | result.insert(result.end(), increment.begin(), increment.end()); |
| 252 | 245 | ||
| @@ -260,35 +253,41 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8> | |||
| 260 | 253 | ||
| 261 | auto& gpu = system.GPU(); | 254 | auto& gpu = system.GPU(); |
| 262 | 255 | ||
| 256 | std::scoped_lock lock(channel_mutex); | ||
| 257 | |||
| 263 | const auto bind_id = channel_state->bind_id; | 258 | const auto bind_id = channel_state->bind_id; |
| 264 | 259 | ||
| 265 | params.fence_out.id = channel_fence.id; | 260 | auto& flags = params.flags; |
| 266 | 261 | ||
| 267 | if (params.flags.add_wait.Value() && | 262 | if (flags.fence_wait.Value()) { |
| 268 | !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { | 263 | if (flags.increment_value.Value()) { |
| 269 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); | 264 | return NvResult::BadParameter; |
| 270 | } | 265 | } |
| 271 | 266 | ||
| 272 | if (params.flags.add_increment.Value() || params.flags.increment.Value()) { | 267 | if (!syncpoint_manager.IsFenceSignalled(params.fence)) { |
| 273 | const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; | 268 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence)}); |
| 274 | params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( | 269 | } |
| 275 | params.fence_out.id, params.AddIncrementValue() + increment_value); | ||
| 276 | } else { | ||
| 277 | params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); | ||
| 278 | } | 270 | } |
| 279 | 271 | ||
| 280 | gpu.PushGPUEntries(bind_id, std::move(entries)); | 272 | gpu.PushGPUEntries(bind_id, std::move(entries)); |
| 273 | params.fence.id = channel_syncpoint; | ||
| 274 | |||
| 275 | u32 increment{(flags.fence_increment.Value() != 0 ? 2 : 0) + | ||
| 276 | (flags.increment_value.Value() != 0 ? params.fence.value : 0)}; | ||
| 277 | params.fence.value = syncpoint_manager.IncrementSyncpointMaxExt(channel_syncpoint, increment); | ||
| 281 | 278 | ||
| 282 | if (params.flags.add_increment.Value()) { | 279 | if (flags.fence_increment.Value()) { |
| 283 | if (params.flags.suppress_wfi) { | 280 | if (flags.suppress_wfi.Value()) { |
| 284 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList( | 281 | gpu.PushGPUEntries(bind_id, |
| 285 | params.fence_out, params.AddIncrementValue())}); | 282 | Tegra::CommandList{BuildIncrementCommandList(params.fence)}); |
| 286 | } else { | 283 | } else { |
| 287 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList( | 284 | gpu.PushGPUEntries(bind_id, |
| 288 | params.fence_out, params.AddIncrementValue())}); | 285 | Tegra::CommandList{BuildIncrementWithWfiCommandList(params.fence)}); |
| 289 | } | 286 | } |
| 290 | } | 287 | } |
| 291 | 288 | ||
| 289 | flags.raw = 0; | ||
| 290 | |||
| 292 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 291 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| 293 | return NvResult::Success; | 292 | return NvResult::Success; |
| 294 | } | 293 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 3a65ed06d..1e4ecd55b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -163,17 +163,13 @@ private: | |||
| 163 | u32_le num_entries{}; // number of fence objects being submitted | 163 | u32_le num_entries{}; // number of fence objects being submitted |
| 164 | union { | 164 | union { |
| 165 | u32_le raw; | 165 | u32_le raw; |
| 166 | BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list | 166 | BitField<0, 1, u32_le> fence_wait; // append a wait sync_point to the list |
| 167 | BitField<1, 1, u32_le> add_increment; // append an increment to the list | 167 | BitField<1, 1, u32_le> fence_increment; // append an increment to the list |
| 168 | BitField<2, 1, u32_le> new_hw_format; // mostly ignored | 168 | BitField<2, 1, u32_le> new_hw_format; // mostly ignored |
| 169 | BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt | 169 | BitField<4, 1, u32_le> suppress_wfi; // suppress wait for interrupt |
| 170 | BitField<8, 1, u32_le> increment; // increment the returned fence | 170 | BitField<8, 1, u32_le> increment_value; // increment the returned fence |
| 171 | } flags; | 171 | } flags; |
| 172 | NvFence fence_out{}; // returned new fence object for others to wait on | 172 | NvFence fence{}; // returned new fence object for others to wait on |
| 173 | |||
| 174 | u32 AddIncrementValue() const { | ||
| 175 | return flags.add_increment.Value() << 1; | ||
| 176 | } | ||
| 177 | }; | 173 | }; |
| 178 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(NvFence), | 174 | static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(NvFence), |
| 179 | "IoctlSubmitGpfifo is incorrect size"); | 175 | "IoctlSubmitGpfifo is incorrect size"); |
| @@ -213,7 +209,8 @@ private: | |||
| 213 | NvCore::SyncpointManager& syncpoint_manager; | 209 | NvCore::SyncpointManager& syncpoint_manager; |
| 214 | NvCore::NvMap& nvmap; | 210 | NvCore::NvMap& nvmap; |
| 215 | std::shared_ptr<Tegra::Control::ChannelState> channel_state; | 211 | std::shared_ptr<Tegra::Control::ChannelState> channel_state; |
| 216 | NvFence channel_fence; | 212 | u32 channel_syncpoint; |
| 213 | std::mutex channel_mutex; | ||
| 217 | 214 | ||
| 218 | // Events | 215 | // Events |
| 219 | Kernel::KEvent* sm_exception_breakpoint_int_report_event; | 216 | Kernel::KEvent* sm_exception_breakpoint_int_report_event; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 00947ea19..5e3820085 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -13,7 +13,7 @@ namespace Service::Nvidia::Devices { | |||
| 13 | u32 nvhost_nvdec::next_id{}; | 13 | u32 nvhost_nvdec::next_id{}; |
| 14 | 14 | ||
| 15 | nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core) | 15 | nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core) |
| 16 | : nvhost_nvdec_common{system_, core} {} | 16 | : nvhost_nvdec_common{system_, core, NvCore::ChannelType::NvDec} {} |
| 17 | nvhost_nvdec::~nvhost_nvdec() = default; | 17 | nvhost_nvdec::~nvhost_nvdec() = default; |
| 18 | 18 | ||
| 19 | NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 19 | NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index b17589aa3..008092dbb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -48,9 +48,10 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s | |||
| 48 | 48 | ||
| 49 | std::unordered_map<DeviceFD, u32> nvhost_nvdec_common::fd_to_id{}; | 49 | std::unordered_map<DeviceFD, u32> nvhost_nvdec_common::fd_to_id{}; |
| 50 | 50 | ||
| 51 | nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_) | 51 | nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system_, NvCore::Container& core_, |
| 52 | : nvdevice{system_}, core{core_}, | 52 | NvCore::ChannelType channel_type_) |
| 53 | syncpoint_manager{core.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {} | 53 | : nvdevice{system_}, core{core_}, syncpoint_manager{core.GetSyncpointManager()}, |
| 54 | nvmap{core.GetNvMapFile()}, channel_type{channel_type_} {} | ||
| 54 | nvhost_nvdec_common::~nvhost_nvdec_common() = default; | 55 | nvhost_nvdec_common::~nvhost_nvdec_common() = default; |
| 55 | 56 | ||
| 56 | NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { | 57 | NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { |
| @@ -88,7 +89,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector<u8>& input, | |||
| 88 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | 89 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { |
| 89 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; | 90 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; |
| 90 | fence_thresholds[i] = | 91 | fence_thresholds[i] = |
| 91 | syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments); | 92 | syncpoint_manager.IncrementSyncpointMaxExt(syncpt_incr.id, syncpt_incr.increments); |
| 92 | } | 93 | } |
| 93 | } | 94 | } |
| 94 | for (const auto& cmd_buffer : command_buffers) { | 95 | for (const auto& cmd_buffer : command_buffers) { |
| @@ -116,10 +117,8 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve | |||
| 116 | std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); | 117 | std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); |
| 117 | LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); | 118 | LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); |
| 118 | 119 | ||
| 119 | if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) { | 120 | const u32 id{NvCore::SyncpointManager::channel_syncpoints[static_cast<u32>(channel_type)]}; |
| 120 | device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint(); | 121 | params.value = id; |
| 121 | } | ||
| 122 | params.value = device_syncpoints[params.param]; | ||
| 123 | std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); | 122 | std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint)); |
| 124 | 123 | ||
| 125 | return NvResult::Success; | 124 | return NvResult::Success; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 53029af6a..51bb7c2cb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/swap.h" | 8 | #include "common/swap.h" |
| 9 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | ||
| 9 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 10 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 10 | 11 | ||
| 11 | namespace Service::Nvidia { | 12 | namespace Service::Nvidia { |
| @@ -13,14 +14,14 @@ namespace Service::Nvidia { | |||
| 13 | namespace NvCore { | 14 | namespace NvCore { |
| 14 | class Container; | 15 | class Container; |
| 15 | class NvMap; | 16 | class NvMap; |
| 16 | class SyncpointManager; | ||
| 17 | } // namespace NvCore | 17 | } // namespace NvCore |
| 18 | 18 | ||
| 19 | namespace Devices { | 19 | namespace Devices { |
| 20 | 20 | ||
| 21 | class nvhost_nvdec_common : public nvdevice { | 21 | class nvhost_nvdec_common : public nvdevice { |
| 22 | public: | 22 | public: |
| 23 | explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core); | 23 | explicit nvhost_nvdec_common(Core::System& system_, NvCore::Container& core, |
| 24 | NvCore::ChannelType channel_type); | ||
| 24 | ~nvhost_nvdec_common() override; | 25 | ~nvhost_nvdec_common() override; |
| 25 | 26 | ||
| 26 | protected: | 27 | protected: |
| @@ -121,6 +122,7 @@ protected: | |||
| 121 | NvCore::Container& core; | 122 | NvCore::Container& core; |
| 122 | NvCore::SyncpointManager& syncpoint_manager; | 123 | NvCore::SyncpointManager& syncpoint_manager; |
| 123 | NvCore::NvMap& nvmap; | 124 | NvCore::NvMap& nvmap; |
| 125 | NvCore::ChannelType channel_type; | ||
| 124 | std::array<u32, MaxSyncPoints> device_syncpoints{}; | 126 | std::array<u32, MaxSyncPoints> device_syncpoints{}; |
| 125 | }; | 127 | }; |
| 126 | }; // namespace Devices | 128 | }; // namespace Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index c89ff6b27..490e399f4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -12,7 +12,7 @@ namespace Service::Nvidia::Devices { | |||
| 12 | u32 nvhost_vic::next_id{}; | 12 | u32 nvhost_vic::next_id{}; |
| 13 | 13 | ||
| 14 | nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core) | 14 | nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core) |
| 15 | : nvhost_nvdec_common{system_, core} {} | 15 | : nvhost_nvdec_common{system_, core, NvCore::ChannelType::VIC} {} |
| 16 | 16 | ||
| 17 | nvhost_vic::~nvhost_vic() = default; | 17 | nvhost_vic::~nvhost_vic() = default; |
| 18 | 18 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 632052c53..3c6e44a25 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -453,18 +453,10 @@ void Maxwell3D::ProcessFirmwareCall4() { | |||
| 453 | } | 453 | } |
| 454 | 454 | ||
| 455 | void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | 455 | void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { |
| 456 | struct LongQueryResult { | ||
| 457 | u64_le value; | ||
| 458 | u64_le timestamp; | ||
| 459 | }; | ||
| 460 | static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); | ||
| 461 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | 456 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 462 | if (long_query) { | 457 | if (long_query) { |
| 463 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | 458 | memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); |
| 464 | // GPU, this command may actually take a while to complete in real hardware due to GPU | 459 | memory_manager.Write<u64>(sequence_address, payload); |
| 465 | // wait queues. | ||
| 466 | LongQueryResult query_result{payload, system.GPU().GetTicks()}; | ||
| 467 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 468 | } else { | 460 | } else { |
| 469 | memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); | 461 | memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); |
| 470 | } | 462 | } |
| @@ -493,10 +485,10 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 493 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | 485 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 494 | const u32 payload = regs.query.query_sequence; | 486 | const u32 payload = regs.query.query_sequence; |
| 495 | std::function<void()> operation([this, sequence_address, payload] { | 487 | std::function<void()> operation([this, sequence_address, payload] { |
| 496 | LongQueryResult query_result{payload, system.GPU().GetTicks()}; | 488 | memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); |
| 497 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | 489 | memory_manager.Write<u64>(sequence_address, payload); |
| 498 | }); | 490 | }); |
| 499 | rasterizer->SignalFence(std::move(operation)); | 491 | rasterizer->SyncOperation(std::move(operation)); |
| 500 | } | 492 | } |
| 501 | break; | 493 | break; |
| 502 | case Regs::QueryOperation::Acquire: | 494 | case Regs::QueryOperation::Acquire: |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a12a95ce2..bcffd1862 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -274,16 +274,24 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 274 | void MaxwellDMA::ReleaseSemaphore() { | 274 | void MaxwellDMA::ReleaseSemaphore() { |
| 275 | const auto type = regs.launch_dma.semaphore_type; | 275 | const auto type = regs.launch_dma.semaphore_type; |
| 276 | const GPUVAddr address = regs.semaphore.address; | 276 | const GPUVAddr address = regs.semaphore.address; |
| 277 | const u32 payload = regs.semaphore.payload; | ||
| 277 | switch (type) { | 278 | switch (type) { |
| 278 | case LaunchDMA::SemaphoreType::NONE: | 279 | case LaunchDMA::SemaphoreType::NONE: |
| 279 | break; | 280 | break; |
| 280 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: | 281 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { |
| 281 | memory_manager.Write<u32>(address, regs.semaphore.payload); | 282 | std::function<void()> operation( |
| 283 | [this, address, payload] { memory_manager.Write<u32>(address, payload); }); | ||
| 284 | rasterizer->SignalFence(std::move(operation)); | ||
| 282 | break; | 285 | break; |
| 283 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: | 286 | } |
| 284 | memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload)); | 287 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { |
| 285 | memory_manager.Write<u64>(address + 8, system.GPU().GetTicks()); | 288 | std::function<void()> operation([this, address, payload] { |
| 289 | memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); | ||
| 290 | memory_manager.Write<u64>(address, payload); | ||
| 291 | }); | ||
| 292 | rasterizer->SignalFence(std::move(operation)); | ||
| 286 | break; | 293 | break; |
| 294 | } | ||
| 287 | default: | 295 | default: |
| 288 | ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value())); | 296 | ASSERT_MSG(false, "Unknown semaphore type: {}", static_cast<u32>(type.Value())); |
| 289 | } | 297 | } |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index dd9494efa..c3ed11c13 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -59,6 +59,7 @@ void Puller::ProcessFenceActionMethod() { | |||
| 59 | case Puller::FenceOperation::Acquire: | 59 | case Puller::FenceOperation::Acquire: |
| 60 | // UNIMPLEMENTED_MSG("Channel Scheduling pending."); | 60 | // UNIMPLEMENTED_MSG("Channel Scheduling pending."); |
| 61 | // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | 61 | // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); |
| 62 | rasterizer->ReleaseFences(); | ||
| 62 | break; | 63 | break; |
| 63 | case Puller::FenceOperation::Increment: | 64 | case Puller::FenceOperation::Increment: |
| 64 | rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); | 65 | rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); |
| @@ -73,19 +74,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 73 | const auto op = | 74 | const auto op = |
| 74 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | 75 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); |
| 75 | if (op == GpuSemaphoreOperation::WriteLong) { | 76 | if (op == GpuSemaphoreOperation::WriteLong) { |
| 76 | struct Block { | ||
| 77 | u32 sequence; | ||
| 78 | u32 zeros = 0; | ||
| 79 | u64 timestamp; | ||
| 80 | }; | ||
| 81 | |||
| 82 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 77 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 83 | const u32 payload = regs.semaphore_sequence; | 78 | const u32 payload = regs.semaphore_sequence; |
| 84 | std::function<void()> operation([this, sequence_address, payload] { | 79 | std::function<void()> operation([this, sequence_address, payload] { |
| 85 | Block block{}; | 80 | memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); |
| 86 | block.sequence = payload; | 81 | memory_manager.Write<u64>(sequence_address, payload); |
| 87 | block.timestamp = gpu.GetTicks(); | ||
| 88 | memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); | ||
| 89 | }); | 82 | }); |
| 90 | rasterizer->SignalFence(std::move(operation)); | 83 | rasterizer->SignalFence(std::move(operation)); |
| 91 | } else { | 84 | } else { |
| @@ -98,7 +91,6 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 98 | regs.acquire_mode = false; | 91 | regs.acquire_mode = false; |
| 99 | if (word != regs.acquire_value) { | 92 | if (word != regs.acquire_value) { |
| 100 | rasterizer->ReleaseFences(); | 93 | rasterizer->ReleaseFences(); |
| 101 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 102 | continue; | 94 | continue; |
| 103 | } | 95 | } |
| 104 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | 96 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { |
| @@ -106,13 +98,11 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 106 | regs.acquire_mode = true; | 98 | regs.acquire_mode = true; |
| 107 | if (word < regs.acquire_value) { | 99 | if (word < regs.acquire_value) { |
| 108 | rasterizer->ReleaseFences(); | 100 | rasterizer->ReleaseFences(); |
| 109 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 110 | continue; | 101 | continue; |
| 111 | } | 102 | } |
| 112 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | 103 | } else if (op == GpuSemaphoreOperation::AcquireMask) { |
| 113 | if (word && regs.semaphore_sequence == 0) { | 104 | if (word && regs.semaphore_sequence == 0) { |
| 114 | rasterizer->ReleaseFences(); | 105 | rasterizer->ReleaseFences(); |
| 115 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 116 | continue; | 106 | continue; |
| 117 | } | 107 | } |
| 118 | } else { | 108 | } else { |
| @@ -128,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() { | |||
| 128 | std::function<void()> operation([this, sequence_address, payload] { | 118 | std::function<void()> operation([this, sequence_address, payload] { |
| 129 | memory_manager.Write<u32>(sequence_address, payload); | 119 | memory_manager.Write<u32>(sequence_address, payload); |
| 130 | }); | 120 | }); |
| 131 | rasterizer->SignalFence(std::move(operation)); | 121 | rasterizer->SyncOperation(std::move(operation)); |
| 132 | } | 122 | } |
| 133 | 123 | ||
| 134 | void Puller::ProcessSemaphoreAcquire() { | 124 | void Puller::ProcessSemaphoreAcquire() { |