summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ameerj2020-12-28 01:02:06 -0500
committerGravatar ameerj2021-01-07 14:33:45 -0500
commit2c27127d04a155fe0f893e84263d58f14473785d (patch)
treee72b7d973f5c0dd4a553f815a632bf8fcc687998
parentMerge pull request #5306 from MerryMage/ignore-library-Open (diff)
downloadyuzu-2c27127d04a155fe0f893e84263d58f14473785d.tar.gz
yuzu-2c27127d04a155fe0f893e84263d58f14473785d.tar.xz
yuzu-2c27127d04a155fe0f893e84263d58f14473785d.zip
nvdec syncpt incorporation
laying the groundwork for async gpu, although this does not fully implement async nvdec operations
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp26
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h14
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h4
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp6
-rw-r--r--src/video_core/cdma_pusher.cpp15
-rw-r--r--src/video_core/cdma_pusher.h10
-rw-r--r--src/video_core/command_classes/host1x.cpp6
-rw-r--r--src/video_core/command_classes/sync_manager.cpp2
11 files changed, 59 insertions, 37 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index d8735491c..36970f828 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -11,8 +11,9 @@
11 11
12namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13 13
14nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 14nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
15 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} 15 SyncpointManager& syncpoint_manager)
16 : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
16nvhost_nvdec::~nvhost_nvdec() = default; 17nvhost_nvdec::~nvhost_nvdec() = default;
17 18
18NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, 19NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 79b8b6de1..77ef53cdd 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {
11 11
12class nvhost_nvdec final : public nvhost_nvdec_common { 12class nvhost_nvdec final : public nvhost_nvdec_common {
13public: 13public:
14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
15 SyncpointManager& syncpoint_manager);
15 ~nvhost_nvdec() override; 16 ~nvhost_nvdec() override;
16 17
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 18 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index b49cecb42..64370ad4c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -11,6 +11,7 @@
11#include "core/core.h" 11#include "core/core.h"
12#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" 12#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
13#include "core/hle/service/nvdrv/devices/nvmap.h" 13#include "core/hle/service/nvdrv/devices/nvmap.h"
14#include "core/hle/service/nvdrv/syncpoint_manager.h"
14#include "core/memory.h" 15#include "core/memory.h"
15#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
16#include "video_core/renderer_base.h" 17#include "video_core/renderer_base.h"
@@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
36} 37}
37} // Anonymous namespace 38} // Anonymous namespace
38 39
39nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 40nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
40 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 41 SyncpointManager& syncpoint_manager)
42 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
41nvhost_nvdec_common::~nvhost_nvdec_common() = default; 43nvhost_nvdec_common::~nvhost_nvdec_common() = default;
42 44
43NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { 45NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
71 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); 73 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
72 offset = SpliceVectors(input, fences, params.fence_count, offset); 74 offset = SpliceVectors(input, fences, params.fence_count, offset);
73 75
74 // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment 76 for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
77 SyncptIncr syncpt_incr = syncpt_increments[i];
75 78
79 fences[i].id = syncpt_incr.id;
80 fences[i].value =
81 syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
82 }
76 auto& gpu = system.GPU(); 83 auto& gpu = system.GPU();
77
78 for (const auto& cmd_buffer : command_buffers) { 84 for (const auto& cmd_buffer : command_buffers) {
79 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); 85 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
80 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 86 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
@@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
89 cmdlist.size() * sizeof(u32)); 95 cmdlist.size() * sizeof(u32));
90 gpu.PushCommandBuffer(cmdlist); 96 gpu.PushCommandBuffer(cmdlist);
91 } 97 }
98 fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
99
100 Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
101 gpu.PushCommandBuffer(cmdlist);
92 102
93 std::memcpy(output.data(), &params, sizeof(IoctlSubmit)); 103 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
94 // Some games expect command_buffers to be written back 104 // Some games expect command_buffers to be written back
@@ -98,6 +108,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
98 offset = WriteVectors(output, reloc_shifts, offset); 108 offset = WriteVectors(output, reloc_shifts, offset);
99 offset = WriteVectors(output, syncpt_increments, offset); 109 offset = WriteVectors(output, syncpt_increments, offset);
100 offset = WriteVectors(output, wait_checks, offset); 110 offset = WriteVectors(output, wait_checks, offset);
111 offset = WriteVectors(output, fences, offset);
101 112
102 return NvResult::Success; 113 return NvResult::Success;
103} 114}
@@ -107,9 +118,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
107 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); 118 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
108 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); 119 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
109 120
110 // We found that implementing this causes deadlocks with async gpu, along with degraded 121 if (device_syncpoints[params.param] == 0) {
111 // performance. TODO: RE the nvdec async implementation 122 device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
112 params.value = 0; 123 }
124 params.value = device_syncpoints[params.param];
113 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint)); 125 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
114 126
115 return NvResult::Success; 127 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index d9f95ba58..4c9d4ba41 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -10,12 +10,16 @@
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/hle/service/nvdrv/devices/nvdevice.h" 11#include "core/hle/service/nvdrv/devices/nvdevice.h"
12 12
13namespace Service::Nvidia::Devices { 13namespace Service::Nvidia {
14class SyncpointManager;
15
16namespace Devices {
14class nvmap; 17class nvmap;
15 18
16class nvhost_nvdec_common : public nvdevice { 19class nvhost_nvdec_common : public nvdevice {
17public: 20public:
18 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 21 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
22 SyncpointManager& syncpoint_manager);
19 ~nvhost_nvdec_common() override; 23 ~nvhost_nvdec_common() override;
20 24
21protected: 25protected:
@@ -157,8 +161,10 @@ protected:
157 s32_le nvmap_fd{}; 161 s32_le nvmap_fd{};
158 u32_le submit_timeout{}; 162 u32_le submit_timeout{};
159 std::shared_ptr<nvmap> nvmap_dev; 163 std::shared_ptr<nvmap> nvmap_dev;
160 164 SyncpointManager& syncpoint_manager;
165 std::array<u32, MaxSyncPoints> device_syncpoints{};
161 // This is expected to be ordered, therefore we must use a map, not unordered_map 166 // This is expected to be ordered, therefore we must use a map, not unordered_map
162 std::map<GPUVAddr, BufferMap> buffer_mappings; 167 std::map<GPUVAddr, BufferMap> buffer_mappings;
163}; 168};
164}; // namespace Service::Nvidia::Devices 169}; // namespace Devices
170} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 805fe86ae..72499654c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,8 +10,9 @@
10#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
11 11
12namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 13nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
14 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} 14 SyncpointManager& syncpoint_manager)
15 : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
15 16
16nvhost_vic::~nvhost_vic() = default; 17nvhost_vic::~nvhost_vic() = default;
17 18
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index b2e11f4d4..f401c61fa 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -7,11 +7,11 @@
7#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" 7#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
8 8
9namespace Service::Nvidia::Devices { 9namespace Service::Nvidia::Devices {
10class nvmap;
11 10
12class nvhost_vic final : public nvhost_nvdec_common { 11class nvhost_vic final : public nvhost_nvdec_common {
13public: 12public:
14 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 13 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
14 SyncpointManager& syncpoint_manager);
15 ~nvhost_vic(); 15 ~nvhost_vic();
16 16
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index e03195afe..620c18728 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
55 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); 55 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
56 devices["/dev/nvhost-ctrl"] = 56 devices["/dev/nvhost-ctrl"] =
57 std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); 57 std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
58 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); 58 devices["/dev/nvhost-nvdec"] =
59 std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
59 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); 60 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
60 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); 61 devices["/dev/nvhost-vic"] =
62 std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
61} 63}
62 64
63Module::~Module() = default; 65Module::~Module() = default;
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index e3e7432f7..94679d5d1 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
33 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), 33 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), 34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
35 host1x_processor(std::make_unique<Host1x>(gpu)), 35 host1x_processor(std::make_unique<Host1x>(gpu)),
36 nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), 36 sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
37 vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
38 37
39CDmaPusher::~CDmaPusher() = default; 38CDmaPusher::~CDmaPusher() = default;
40 39
@@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
110 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 109 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
111 const auto cond = static_cast<u32>((data >> 8) & 0xFF); 110 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
112 if (cond == 0) { 111 if (cond == 0) {
113 nvdec_sync->Increment(syncpoint_id); 112 sync_manager->Increment(syncpoint_id);
114 } else { 113 } else {
115 nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); 114 sync_manager->SignalDone(
116 nvdec_sync->SignalDone(syncpoint_id); 115 sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
117 } 116 }
118 break; 117 break;
119 } 118 }
@@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
135 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 134 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
136 const auto cond = static_cast<u32>((data >> 8) & 0xFF); 135 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
137 if (cond == 0) { 136 if (cond == 0) {
138 vic_sync->Increment(syncpoint_id); 137 sync_manager->Increment(syncpoint_id);
139 } else { 138 } else {
140 vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); 139 sync_manager->SignalDone(
141 vic_sync->SignalDone(syncpoint_id); 140 sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
142 } 141 }
143 break; 142 break;
144 } 143 }
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 0db1cd646..8ca70b6dd 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -116,12 +116,10 @@ private:
116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); 116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
117 117
118 GPU& gpu; 118 GPU& gpu;
119 119 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
120 std::shared_ptr<Nvdec> nvdec_processor; 120 std::unique_ptr<Tegra::Vic> vic_processor;
121 std::unique_ptr<Vic> vic_processor; 121 std::unique_ptr<Tegra::Host1x> host1x_processor;
122 std::unique_ptr<Host1x> host1x_processor; 122 std::unique_ptr<SyncptIncrManager> sync_manager;
123 std::unique_ptr<SyncptIncrManager> nvdec_sync;
124 std::unique_ptr<SyncptIncrManager> vic_sync;
125 ChClassId current_class{}; 123 ChClassId current_class{};
126 ThiRegisters vic_thi_state{}; 124 ThiRegisters vic_thi_state{};
127 ThiRegisters nvdec_thi_state{}; 125 ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index c4dd4881a..9d0a1b4d9 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -34,6 +34,8 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen
34} 34}
35 35
36void Tegra::Host1x::Execute(u32 data) { 36void Tegra::Host1x::Execute(u32 data) {
37 // This method waits on a valid syncpoint. 37 u32 syncpointId = (data & 0xFF);
38 // TODO: Implement when proper Async is in place 38 u32 threshold = state.load_syncpoint_payload32;
39
40 gpu.WaitFence(syncpointId, threshold);
39} 41}
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/command_classes/sync_manager.cpp
index 19dc9e0ab..579857766 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/command_classes/sync_manager.cpp
@@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
38} 38}
39 39
40void SyncptIncrManager::SignalDone(u32 handle) { 40void SyncptIncrManager::SignalDone(u32 handle) {
41 const auto done_incr = 41 const auto& done_incr =
42 std::find_if(increments.begin(), increments.end(), 42 std::find_if(increments.begin(), increments.end(),
43 [handle](const SyncptIncr& incr) { return incr.id == handle; }); 43 [handle](const SyncptIncr& incr) { return incr.id == handle; });
44 if (done_incr != increments.cend()) { 44 if (done_incr != increments.cend()) {