summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-01-30 10:31:13 +0100
committerGravatar Fernando Sahmkow2022-10-06 21:00:52 +0200
commit668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch)
treea1c668d6c3d00eade849b1d31dba4116095e4c12
parentTexture Cache: Fix GC and GPU Modified on Joins. (diff)
downloadyuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip
VideoCore: Refactor syncing.
-rw-r--r--src/core/core.cpp12
-rw-r--r--src/core/core.h9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp10
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp9
-rw-r--r--src/video_core/CMakeLists.txt40
-rw-r--r--src/video_core/cdma_pusher.cpp25
-rw-r--r--src/video_core/cdma_pusher.h15
-rw-r--r--src/video_core/control/channel_state.cpp2
-rw-r--r--src/video_core/control/channel_state.h2
-rw-r--r--src/video_core/control/channel_state_cache.h4
-rw-r--r--src/video_core/control/scheduler.cpp2
-rw-r--r--src/video_core/control/scheduler.h2
-rw-r--r--src/video_core/dma_pusher.h26
-rw-r--r--src/video_core/engines/puller.cpp65
-rw-r--r--src/video_core/engines/puller.h1
-rw-r--r--src/video_core/fence_manager.h12
-rw-r--r--src/video_core/gpu.cpp197
-rw-r--r--src/video_core/gpu.h19
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h2
-rw-r--r--src/video_core/host1x/codecs/codec.cpp (renamed from src/video_core/command_classes/codecs/codec.cpp)36
-rw-r--r--src/video_core/host1x/codecs/codec.h (renamed from src/video_core/command_classes/codecs/codec.h)14
-rw-r--r--src/video_core/host1x/codecs/h264.cpp (renamed from src/video_core/command_classes/codecs/h264.cpp)4
-rw-r--r--src/video_core/host1x/codecs/h264.h (renamed from src/video_core/command_classes/codecs/h264.h)6
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp (renamed from src/video_core/command_classes/codecs/vp8.cpp)4
-rw-r--r--src/video_core/host1x/codecs/vp8.h (renamed from src/video_core/command_classes/codecs/vp8.h)5
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp (renamed from src/video_core/command_classes/codecs/vp9.cpp)8
-rw-r--r--src/video_core/host1x/codecs/vp9.h (renamed from src/video_core/command_classes/codecs/vp9.h)12
-rw-r--r--src/video_core/host1x/codecs/vp9_types.h (renamed from src/video_core/command_classes/codecs/vp9_types.h)0
-rw-r--r--src/video_core/host1x/control.cpp35
-rw-r--r--src/video_core/host1x/control.h (renamed from src/video_core/command_classes/host1x.h)17
-rw-r--r--src/video_core/host1x/host1x.h33
-rw-r--r--src/video_core/host1x/nvdec.cpp (renamed from src/video_core/command_classes/nvdec.cpp)6
-rw-r--r--src/video_core/host1x/nvdec.h (renamed from src/video_core/command_classes/nvdec.h)7
-rw-r--r--src/video_core/host1x/nvdec_common.h (renamed from src/video_core/command_classes/nvdec_common.h)4
-rw-r--r--src/video_core/host1x/sync_manager.cpp (renamed from src/video_core/command_classes/sync_manager.cpp)10
-rw-r--r--src/video_core/host1x/sync_manager.h (renamed from src/video_core/command_classes/sync_manager.h)6
-rw-r--r--src/video_core/host1x/syncpoint_manager.cpp93
-rw-r--r--src/video_core/host1x/syncpoint_manager.h99
-rw-r--r--src/video_core/host1x/vic.cpp (renamed from src/video_core/command_classes/vic.cpp)9
-rw-r--r--src/video_core/host1x/vic.h (renamed from src/video_core/command_classes/vic.h)7
44 files changed, 648 insertions, 252 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 121092868..fa059a394 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,6 +51,7 @@
51#include "core/telemetry_session.h" 51#include "core/telemetry_session.h"
52#include "core/tools/freezer.h" 52#include "core/tools/freezer.h"
53#include "network/network.h" 53#include "network/network.h"
54#include "video_core/host1x/host1x.h"
54#include "video_core/renderer_base.h" 55#include "video_core/renderer_base.h"
55#include "video_core/video_core.h" 56#include "video_core/video_core.h"
56 57
@@ -215,6 +216,7 @@ struct System::Impl {
215 216
216 telemetry_session = std::make_unique<Core::TelemetrySession>(); 217 telemetry_session = std::make_unique<Core::TelemetrySession>();
217 218
219 host1x_core = std::make_unique<Tegra::Host1x::Host1x>();
218 gpu_core = VideoCore::CreateGPU(emu_window, system); 220 gpu_core = VideoCore::CreateGPU(emu_window, system);
219 if (!gpu_core) { 221 if (!gpu_core) {
220 return SystemResultStatus::ErrorVideoCore; 222 return SystemResultStatus::ErrorVideoCore;
@@ -373,6 +375,7 @@ struct System::Impl {
373 app_loader.reset(); 375 app_loader.reset();
374 audio_core.reset(); 376 audio_core.reset();
375 gpu_core.reset(); 377 gpu_core.reset();
378 host1x_core.reset();
376 perf_stats.reset(); 379 perf_stats.reset();
377 kernel.Shutdown(); 380 kernel.Shutdown();
378 memory.Reset(); 381 memory.Reset();
@@ -450,6 +453,7 @@ struct System::Impl {
450 /// AppLoader used to load the current executing application 453 /// AppLoader used to load the current executing application
451 std::unique_ptr<Loader::AppLoader> app_loader; 454 std::unique_ptr<Loader::AppLoader> app_loader;
452 std::unique_ptr<Tegra::GPU> gpu_core; 455 std::unique_ptr<Tegra::GPU> gpu_core;
456 std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
453 std::unique_ptr<Hardware::InterruptManager> interrupt_manager; 457 std::unique_ptr<Hardware::InterruptManager> interrupt_manager;
454 std::unique_ptr<Core::DeviceMemory> device_memory; 458 std::unique_ptr<Core::DeviceMemory> device_memory;
455 std::unique_ptr<AudioCore::AudioCore> audio_core; 459 std::unique_ptr<AudioCore::AudioCore> audio_core;
@@ -668,6 +672,14 @@ const Tegra::GPU& System::GPU() const {
668 return *impl->gpu_core; 672 return *impl->gpu_core;
669} 673}
670 674
675Tegra::Host1x::Host1x& System::Host1x() {
676 return *impl->host1x_core;
677}
678
679const Tegra::Host1x::Host1x& System::Host1x() const {
680 return *impl->host1x_core;
681}
682
671Core::Hardware::InterruptManager& System::InterruptManager() { 683Core::Hardware::InterruptManager& System::InterruptManager() {
672 return *impl->interrupt_manager; 684 return *impl->interrupt_manager;
673} 685}
diff --git a/src/core/core.h b/src/core/core.h
index 0ce3b1d60..e4168a921 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -74,6 +74,9 @@ class TimeManager;
74namespace Tegra { 74namespace Tegra {
75class DebugContext; 75class DebugContext;
76class GPU; 76class GPU;
77namespace Host1x {
78class Host1x;
79} // namespace Host1x
77} // namespace Tegra 80} // namespace Tegra
78 81
79namespace VideoCore { 82namespace VideoCore {
@@ -260,6 +263,12 @@ public:
260 /// Gets an immutable reference to the GPU interface. 263 /// Gets an immutable reference to the GPU interface.
261 [[nodiscard]] const Tegra::GPU& GPU() const; 264 [[nodiscard]] const Tegra::GPU& GPU() const;
262 265
266 /// Gets a mutable reference to the Host1x interface
267 [[nodiscard]] Tegra::Host1x::Host1x& Host1x();
268
269 /// Gets an immutable reference to the Host1x interface.
270 [[nodiscard]] const Tegra::Host1x::Host1x& Host1x() const;
271
263 /// Gets a mutable reference to the renderer. 272 /// Gets a mutable reference to the renderer.
264 [[nodiscard]] VideoCore::RendererBase& Renderer(); 273 [[nodiscard]] VideoCore::RendererBase& Renderer();
265 274
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index b1c0e9eb2..e6a976714 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -50,7 +50,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
50 stride, format, transform, crop_rect}; 50 stride, format, transform, crop_rect};
51 51
52 system.GetPerfStats().EndSystemFrame(); 52 system.GetPerfStats().EndSystemFrame();
53 system.GPU().SwapBuffers(&framebuffer); 53 system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
54 system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); 54 system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
55 system.GetPerfStats().BeginSystemFrame(); 55 system.GetPerfStats().BeginSystemFrame();
56} 56}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 54074af75..ffe42d423 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -18,6 +18,7 @@
18#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 18#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
19#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" 19#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
20#include "video_core/gpu.h" 20#include "video_core/gpu.h"
21#include "video_core/host1x/host1x.h"
21 22
22namespace Service::Nvidia::Devices { 23namespace Service::Nvidia::Devices {
23 24
@@ -129,7 +130,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
129 return NvResult::Success; 130 return NvResult::Success;
130 } 131 }
131 132
132 auto& gpu = system.GPU(); 133 auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
133 const u32 target_value = params.fence.value; 134 const u32 target_value = params.fence.value;
134 135
135 auto lock = NvEventsLock(); 136 auto lock = NvEventsLock();
@@ -149,7 +150,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
149 if (events[slot].fails > 2) { 150 if (events[slot].fails > 2) {
150 { 151 {
151 auto lk = system.StallProcesses(); 152 auto lk = system.StallProcesses();
152 gpu.WaitFence(fence_id, target_value); 153 host1x_syncpoint_manager.WaitHost(fence_id, target_value);
153 system.UnstallProcesses(); 154 system.UnstallProcesses();
154 } 155 }
155 params.value.raw = target_value; 156 params.value.raw = target_value;
@@ -198,7 +199,15 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
198 } 199 }
199 params.value.raw |= slot; 200 params.value.raw |= slot;
200 201
201 gpu.RegisterSyncptInterrupt(fence_id, target_value); 202 event.wait_handle =
203 host1x_syncpoint_manager.RegisterHostAction(fence_id, target_value, [this, slot]() {
204 auto& event = events[slot];
205 if (event.status.exchange(EventState::Signalling, std::memory_order_acq_rel) ==
206 EventState::Waiting) {
207 event.kevent->GetWritableEvent().Signal();
208 }
209 event.status.store(EventState::Signalled, std::memory_order_release);
210 });
202 return NvResult::Timeout; 211 return NvResult::Timeout;
203} 212}
204 213
@@ -288,8 +297,10 @@ NvResult nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::v
288 auto& event = events[event_id]; 297 auto& event = events[event_id];
289 if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) == 298 if (event.status.exchange(EventState::Cancelling, std::memory_order_acq_rel) ==
290 EventState::Waiting) { 299 EventState::Waiting) {
291 system.GPU().CancelSyncptInterrupt(event.assigned_syncpt, event.assigned_value); 300 auto& host1x_syncpoint_manager = system.Host1x().GetSyncpointManager();
301 host1x_syncpoint_manager.DeregisterHostAction(event.assigned_syncpt, event.wait_handle);
292 syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt); 302 syncpoint_manager.RefreshSyncpoint(event.assigned_syncpt);
303 event.wait_handle = {};
293 } 304 }
294 event.fails++; 305 event.fails++;
295 event.status.store(EventState::Cancelled, std::memory_order_release); 306 event.status.store(EventState::Cancelled, std::memory_order_release);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index d56aea405..136a1e925 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/service/nvdrv/devices/nvdevice.h" 12#include "core/hle/service/nvdrv/devices/nvdevice.h"
13#include "core/hle/service/nvdrv/nvdrv.h" 13#include "core/hle/service/nvdrv/nvdrv.h"
14#include "video_core/host1x/syncpoint_manager.h"
14 15
15namespace Service::Nvidia::NvCore { 16namespace Service::Nvidia::NvCore {
16class Container; 17class Container;
@@ -78,6 +79,9 @@ private:
78 // Tells if an NVEvent is registered or not 79 // Tells if an NVEvent is registered or not
79 bool registered{}; 80 bool registered{};
80 81
82 // Used for waiting on a syncpoint & canceling it.
83 Tegra::Host1x::SyncpointManager::ActionHandle wait_handle{};
84
81 bool IsBeingUsed() { 85 bool IsBeingUsed() {
82 const auto current_status = status.load(std::memory_order_acquire); 86 const auto current_status = status.load(std::memory_order_acquire);
83 return current_status == EventState::Waiting || 87 return current_status == EventState::Waiting ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 38d45cb79..db3e266ad 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -210,10 +210,10 @@ NvResult nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::ve
210 210
211static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { 211static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
212 return { 212 return {
213 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, 213 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
214 Tegra::SubmissionMode::Increasing), 214 Tegra::SubmissionMode::Increasing),
215 {fence.value}, 215 {fence.value},
216 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 216 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
217 Tegra::SubmissionMode::Increasing), 217 Tegra::SubmissionMode::Increasing),
218 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id), 218 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
219 }; 219 };
@@ -222,12 +222,12 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
222static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence, 222static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence,
223 u32 add_increment) { 223 u32 add_increment) {
224 std::vector<Tegra::CommandHeader> result{ 224 std::vector<Tegra::CommandHeader> result{
225 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, 225 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
226 Tegra::SubmissionMode::Increasing), 226 Tegra::SubmissionMode::Increasing),
227 {}}; 227 {}};
228 228
229 for (u32 count = 0; count < add_increment; ++count) { 229 for (u32 count = 0; count < add_increment; ++count) {
230 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 230 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
231 Tegra::SubmissionMode::Increasing)); 231 Tegra::SubmissionMode::Increasing));
232 result.emplace_back( 232 result.emplace_back(
233 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); 233 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
@@ -239,7 +239,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
239static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence, 239static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence,
240 u32 add_increment) { 240 u32 add_increment) {
241 std::vector<Tegra::CommandHeader> result{ 241 std::vector<Tegra::CommandHeader> result{
242 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, 242 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
243 Tegra::SubmissionMode::Increasing), 243 Tegra::SubmissionMode::Increasing),
244 {}}; 244 {}};
245 const std::vector<Tegra::CommandHeader> increment{ 245 const std::vector<Tegra::CommandHeader> increment{
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 8c3013f83..aa112021d 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -24,6 +24,8 @@
24#include "core/hle/service/vi/layer/vi_layer.h" 24#include "core/hle/service/vi/layer/vi_layer.h"
25#include "core/hle/service/vi/vi_results.h" 25#include "core/hle/service/vi/vi_results.h"
26#include "video_core/gpu.h" 26#include "video_core/gpu.h"
27#include "video_core/host1x/host1x.h"
28#include "video_core/host1x/syncpoint_manager.h"
27 29
28namespace Service::NVFlinger { 30namespace Service::NVFlinger {
29 31
@@ -267,12 +269,12 @@ void NVFlinger::Compose() {
267 return; // We are likely shutting down 269 return; // We are likely shutting down
268 } 270 }
269 271
270 auto& gpu = system.GPU(); 272 auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
271 const auto& multi_fence = buffer.fence; 273 const auto& multi_fence = buffer.fence;
272 guard->unlock(); 274 guard->unlock();
273 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { 275 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
274 const auto& fence = multi_fence.fences[fence_id]; 276 const auto& fence = multi_fence.fences[fence_id];
275 gpu.WaitFence(fence.id, fence.value); 277 syncpoint_manager.WaitGuest(fence.id, fence.value);
276 } 278 }
277 guard->lock(); 279 guard->lock();
278 280
@@ -284,6 +286,7 @@ void NVFlinger::Compose() {
284 auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); 286 auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd);
285 ASSERT(nvdisp); 287 ASSERT(nvdisp);
286 288
289 guard->unlock();
287 Common::Rectangle<int> crop_rect{ 290 Common::Rectangle<int> crop_rect{
288 static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), 291 static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()),
289 static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; 292 static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())};
@@ -292,6 +295,8 @@ void NVFlinger::Compose() {
292 igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), 295 igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
293 static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); 296 static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
294 297
298 guard->lock();
299
295 swap_interval = buffer.swap_interval; 300 swap_interval = buffer.swap_interval;
296 301
297 auto fence = android::Fence::NoFence(); 302 auto fence = android::Fence::NoFence();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 35faa70a0..723f9b67c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -4,7 +4,7 @@
4add_subdirectory(host_shaders) 4add_subdirectory(host_shaders)
5 5
6if(LIBVA_FOUND) 6if(LIBVA_FOUND)
7 set_source_files_properties(command_classes/codecs/codec.cpp 7 set_source_files_properties(host1x/codecs/codec.cpp
8 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) 8 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
9 list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) 9 list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES})
10endif() 10endif()
@@ -15,24 +15,6 @@ add_library(video_core STATIC
15 buffer_cache/buffer_cache.h 15 buffer_cache/buffer_cache.h
16 cdma_pusher.cpp 16 cdma_pusher.cpp
17 cdma_pusher.h 17 cdma_pusher.h
18 command_classes/codecs/codec.cpp
19 command_classes/codecs/codec.h
20 command_classes/codecs/h264.cpp
21 command_classes/codecs/h264.h
22 command_classes/codecs/vp8.cpp
23 command_classes/codecs/vp8.h
24 command_classes/codecs/vp9.cpp
25 command_classes/codecs/vp9.h
26 command_classes/codecs/vp9_types.h
27 command_classes/host1x.cpp
28 command_classes/host1x.h
29 command_classes/nvdec.cpp
30 command_classes/nvdec.h
31 command_classes/nvdec_common.h
32 command_classes/sync_manager.cpp
33 command_classes/sync_manager.h
34 command_classes/vic.cpp
35 command_classes/vic.h
36 compatible_formats.cpp 18 compatible_formats.cpp
37 compatible_formats.h 19 compatible_formats.h
38 control/channel_state.cpp 20 control/channel_state.cpp
@@ -63,6 +45,26 @@ add_library(video_core STATIC
63 engines/puller.cpp 45 engines/puller.cpp
64 engines/puller.h 46 engines/puller.h
65 framebuffer_config.h 47 framebuffer_config.h
48 host1x/codecs/codec.cpp
49 host1x/codecs/codec.h
50 host1x/codecs/h264.cpp
51 host1x/codecs/h264.h
52 host1x/codecs/vp8.cpp
53 host1x/codecs/vp8.h
54 host1x/codecs/vp9.cpp
55 host1x/codecs/vp9.h
56 host1x/codecs/vp9_types.h
57 host1x/control.cpp
58 host1x/control.h
59 host1x/nvdec.cpp
60 host1x/nvdec.h
61 host1x/nvdec_common.h
62 host1x/sync_manager.cpp
63 host1x/sync_manager.h
64 host1x/syncpoint_manager.cpp
65 host1x/syncpoint_manager.h
66 host1x/vic.cpp
67 host1x/vic.h
66 macro/macro.cpp 68 macro/macro.cpp
67 macro/macro.h 69 macro/macro.h
68 macro/macro_hle.cpp 70 macro/macro_hle.cpp
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 8e890a85e..148126347 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -2,20 +2,22 @@
2// SPDX-License-Identifier: MIT 2// SPDX-License-Identifier: MIT
3 3
4#include <bit> 4#include <bit>
5#include "command_classes/host1x.h"
6#include "command_classes/nvdec.h"
7#include "command_classes/vic.h"
8#include "video_core/cdma_pusher.h" 5#include "video_core/cdma_pusher.h"
9#include "video_core/command_classes/sync_manager.h"
10#include "video_core/engines/maxwell_3d.h" 6#include "video_core/engines/maxwell_3d.h"
11#include "video_core/gpu.h" 7#include "video_core/gpu.h"
8#include "video_core/host1x/control.h"
9#include "video_core/host1x/nvdec.h"
10#include "video_core/host1x/nvdec_common.h"
11#include "video_core/host1x/sync_manager.h"
12#include "video_core/host1x/vic.h"
13#include "video_core/memory_manager.h"
12 14
13namespace Tegra { 15namespace Tegra {
14CDmaPusher::CDmaPusher(GPU& gpu_) 16CDmaPusher::CDmaPusher(GPU& gpu_)
15 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), 17 : gpu{gpu_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(gpu)),
16 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), 18 vic_processor(std::make_unique<Host1x::Vic>(gpu, nvdec_processor)),
17 host1x_processor(std::make_unique<Host1x>(gpu)), 19 host1x_processor(std::make_unique<Host1x::Control>(gpu)),
18 sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {} 20 sync_manager(std::make_unique<Host1x::SyncptIncrManager>(gpu)) {}
19 21
20CDmaPusher::~CDmaPusher() = default; 22CDmaPusher::~CDmaPusher() = default;
21 23
@@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
109 case ThiMethod::SetMethod1: 111 case ThiMethod::SetMethod1:
110 LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", 112 LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
111 static_cast<u32>(vic_thi_state.method_0), data); 113 static_cast<u32>(vic_thi_state.method_0), data);
112 vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data); 114 vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0),
115 data);
113 break; 116 break;
114 default: 117 default:
115 break; 118 break;
116 } 119 }
117 break; 120 break;
118 case ChClassId::Host1x: 121 case ChClassId::Control:
119 // This device is mainly for syncpoint synchronization 122 // This device is mainly for syncpoint synchronization
120 LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); 123 LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
121 host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data); 124 host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data);
122 break; 125 break;
123 default: 126 default:
124 UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); 127 UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index d6ffef95f..de17c2082 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -13,10 +13,13 @@
13namespace Tegra { 13namespace Tegra {
14 14
15class GPU; 15class GPU;
16class Host1x; 16
17namespace Host1x {
18class Control;
17class Nvdec; 19class Nvdec;
18class SyncptIncrManager; 20class SyncptIncrManager;
19class Vic; 21class Vic;
22} // namespace Host1x
20 23
21enum class ChSubmissionMode : u32 { 24enum class ChSubmissionMode : u32 {
22 SetClass = 0, 25 SetClass = 0,
@@ -30,7 +33,7 @@ enum class ChSubmissionMode : u32 {
30 33
31enum class ChClassId : u32 { 34enum class ChClassId : u32 {
32 NoClass = 0x0, 35 NoClass = 0x0,
33 Host1x = 0x1, 36 Control = 0x1,
34 VideoEncodeMpeg = 0x20, 37 VideoEncodeMpeg = 0x20,
35 VideoEncodeNvEnc = 0x21, 38 VideoEncodeNvEnc = 0x21,
36 VideoStreamingVi = 0x30, 39 VideoStreamingVi = 0x30,
@@ -102,10 +105,10 @@ private:
102 void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument); 105 void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument);
103 106
104 GPU& gpu; 107 GPU& gpu;
105 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 108 std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
106 std::unique_ptr<Tegra::Vic> vic_processor; 109 std::unique_ptr<Tegra::Host1x::Vic> vic_processor;
107 std::unique_ptr<Tegra::Host1x> host1x_processor; 110 std::unique_ptr<Tegra::Host1x::Control> host1x_processor;
108 std::unique_ptr<SyncptIncrManager> sync_manager; 111 std::unique_ptr<Host1x::SyncptIncrManager> sync_manager;
109 ChClassId current_class{}; 112 ChClassId current_class{};
110 ThiRegisters vic_thi_state{}; 113 ThiRegisters vic_thi_state{};
111 ThiRegisters nvdec_thi_state{}; 114 ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index 67803fe94..3613c4992 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -1,5 +1,5 @@
1// Copyright 2021 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index 82808a6b8..08a7591e1 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -1,5 +1,5 @@
1// Copyright 2021 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 31d80e8b7..dbf833de7 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -1,3 +1,7 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
1#pragma once 5#pragma once
2 6
3#include <deque> 7#include <deque>
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
index e1abcb188..a9bb00aa7 100644
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -1,5 +1,5 @@
1// Copyright 2021 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <memory>
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
index 802e9caff..c1a773946 100644
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -1,5 +1,5 @@
1// Copyright 2021 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index fd7c936c4..938f0f11c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -37,24 +37,32 @@ enum class SubmissionMode : u32 {
37// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence 37// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
38// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. 38// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
39// So the values you see in docs might be multiplied by 4. 39// So the values you see in docs might be multiplied by 4.
40// Register documentation:
41// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h
42//
43// Register Description (approx):
44// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt
40enum class BufferMethods : u32 { 45enum class BufferMethods : u32 {
41 BindObject = 0x0, 46 BindObject = 0x0,
47 Illegal = 0x1,
42 Nop = 0x2, 48 Nop = 0x2,
43 SemaphoreAddressHigh = 0x4, 49 SemaphoreAddressHigh = 0x4,
44 SemaphoreAddressLow = 0x5, 50 SemaphoreAddressLow = 0x5,
45 SemaphoreSequence = 0x6, 51 SemaphoreSequencePayload = 0x6,
46 SemaphoreTrigger = 0x7, 52 SemaphoreOperation = 0x7,
47 NotifyIntr = 0x8, 53 NonStallInterrupt = 0x8,
48 WrcacheFlush = 0x9, 54 WrcacheFlush = 0x9,
49 Unk28 = 0xA, 55 MemOpA = 0xA,
50 UnkCacheFlush = 0xB, 56 MemOpB = 0xB,
57 MemOpC = 0xC,
58 MemOpD = 0xD,
51 RefCnt = 0x14, 59 RefCnt = 0x14,
52 SemaphoreAcquire = 0x1A, 60 SemaphoreAcquire = 0x1A,
53 SemaphoreRelease = 0x1B, 61 SemaphoreRelease = 0x1B,
54 FenceValue = 0x1C, 62 SyncpointPayload = 0x1C,
55 FenceAction = 0x1D, 63 SyncpointOperation = 0x1D,
56 WaitForInterrupt = 0x1E, 64 WaitForIdle = 0x1E,
57 Unk7c = 0x1F, 65 CRCCheck = 0x1F,
58 Yield = 0x20, 66 Yield = 0x20,
59 NonPullerMethods = 0x40, 67 NonPullerMethods = 0x40,
60}; 68};
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 3866c8746..8c17639e4 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -68,11 +68,6 @@ void Puller::ProcessFenceActionMethod() {
68 } 68 }
69} 69}
70 70
71void Puller::ProcessWaitForInterruptMethod() {
72 // TODO(bunnei) ImplementMe
73 LOG_WARNING(HW_GPU, "(STUBBED) called");
74}
75
76void Puller::ProcessSemaphoreTriggerMethod() { 71void Puller::ProcessSemaphoreTriggerMethod() {
77 const auto semaphoreOperationMask = 0xF; 72 const auto semaphoreOperationMask = 0xF;
78 const auto op = 73 const auto op =
@@ -91,29 +86,33 @@ void Puller::ProcessSemaphoreTriggerMethod() {
91 block.timestamp = gpu.GetTicks(); 86 block.timestamp = gpu.GetTicks();
92 memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); 87 memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
93 } else { 88 } else {
94 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; 89 do {
95 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || 90 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
96 (op == GpuSemaphoreOperation::AcquireGequal &&
97 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
98 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
99 // Nothing to do in this case
100 } else {
101 regs.acquire_source = true; 91 regs.acquire_source = true;
102 regs.acquire_value = regs.semaphore_sequence; 92 regs.acquire_value = regs.semaphore_sequence;
103 if (op == GpuSemaphoreOperation::AcquireEqual) { 93 if (op == GpuSemaphoreOperation::AcquireEqual) {
104 regs.acquire_active = true; 94 regs.acquire_active = true;
105 regs.acquire_mode = false; 95 regs.acquire_mode = false;
96 if (word != regs.acquire_value) {
97 std::this_thread::sleep_for(std::chrono::milliseconds(1));
98 continue;
99 }
106 } else if (op == GpuSemaphoreOperation::AcquireGequal) { 100 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
107 regs.acquire_active = true; 101 regs.acquire_active = true;
108 regs.acquire_mode = true; 102 regs.acquire_mode = true;
103 if (word < regs.acquire_value) {
104 std::this_thread::sleep_for(std::chrono::milliseconds(1));
105 continue;
106 }
109 } else if (op == GpuSemaphoreOperation::AcquireMask) { 107 } else if (op == GpuSemaphoreOperation::AcquireMask) {
110 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with 108 if (word & regs.semaphore_sequence == 0) {
111 // semaphore_sequence, gives a non-0 result 109 std::this_thread::sleep_for(std::chrono::milliseconds(1));
112 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); 110 continue;
111 }
113 } else { 112 } else {
114 LOG_ERROR(HW_GPU, "Invalid semaphore operation"); 113 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
115 } 114 }
116 } 115 } while (false);
117 } 116 }
118} 117}
119 118
@@ -124,6 +123,7 @@ void Puller::ProcessSemaphoreRelease() {
124void Puller::ProcessSemaphoreAcquire() { 123void Puller::ProcessSemaphoreAcquire() {
125 const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); 124 const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
126 const auto value = regs.semaphore_acquire; 125 const auto value = regs.semaphore_acquire;
126 std::this_thread::sleep_for(std::chrono::milliseconds(5));
127 if (word != value) { 127 if (word != value) {
128 regs.acquire_active = true; 128 regs.acquire_active = true;
129 regs.acquire_value = value; 129 regs.acquire_value = value;
@@ -146,32 +146,39 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
146 case BufferMethods::Nop: 146 case BufferMethods::Nop:
147 case BufferMethods::SemaphoreAddressHigh: 147 case BufferMethods::SemaphoreAddressHigh:
148 case BufferMethods::SemaphoreAddressLow: 148 case BufferMethods::SemaphoreAddressLow:
149 case BufferMethods::SemaphoreSequence: 149 case BufferMethods::SemaphoreSequencePayload:
150 case BufferMethods::UnkCacheFlush:
151 case BufferMethods::WrcacheFlush: 150 case BufferMethods::WrcacheFlush:
152 case BufferMethods::FenceValue: 151 case BufferMethods::SyncpointPayload:
153 break; 152 break;
154 case BufferMethods::RefCnt: 153 case BufferMethods::RefCnt:
155 rasterizer->SignalReference(); 154 rasterizer->SignalReference();
156 break; 155 break;
157 case BufferMethods::FenceAction: 156 case BufferMethods::SyncpointOperation:
158 ProcessFenceActionMethod(); 157 ProcessFenceActionMethod();
159 break; 158 break;
160 case BufferMethods::WaitForInterrupt: 159 case BufferMethods::WaitForIdle:
161 ProcessWaitForInterruptMethod(); 160 rasterizer->WaitForIdle();
162 break; 161 break;
163 case BufferMethods::SemaphoreTrigger: { 162 case BufferMethods::SemaphoreOperation: {
164 ProcessSemaphoreTriggerMethod(); 163 ProcessSemaphoreTriggerMethod();
165 break; 164 break;
166 } 165 }
167 case BufferMethods::NotifyIntr: { 166 case BufferMethods::NonStallInterrupt: {
168 // TODO(Kmather73): Research and implement this method. 167 LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented");
169 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
170 break; 168 break;
171 } 169 }
172 case BufferMethods::Unk28: { 170 case BufferMethods::MemOpA: {
173 // TODO(Kmather73): Research and implement this method. 171 LOG_ERROR(HW_GPU, "Memory Operation A");
174 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); 172 break;
173 }
174 case BufferMethods::MemOpB: {
175 // Implement this better.
176 rasterizer->SyncGuestHost();
177 break;
178 }
179 case BufferMethods::MemOpC:
180 case BufferMethods::MemOpD: {
181 LOG_ERROR(HW_GPU, "Memory Operation C,D");
175 break; 182 break;
176 } 183 }
177 case BufferMethods::SemaphoreAcquire: { 184 case BufferMethods::SemaphoreAcquire: {
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
index d948ec790..b4619e9a8 100644
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -141,7 +141,6 @@ private:
141 void ProcessSemaphoreAcquire(); 141 void ProcessSemaphoreAcquire();
142 void ProcessSemaphoreRelease(); 142 void ProcessSemaphoreRelease();
143 void ProcessSemaphoreTriggerMethod(); 143 void ProcessSemaphoreTriggerMethod();
144 void ProcessWaitForInterruptMethod();
145 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); 144 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
146 145
147 /// Mapping of command subchannels to their bound engine ids 146 /// Mapping of command subchannels to their bound engine ids
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index d658e038d..03a70e5e0 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -11,6 +11,8 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/delayed_destruction_ring.h" 12#include "video_core/delayed_destruction_ring.h"
13#include "video_core/gpu.h" 13#include "video_core/gpu.h"
14#include "video_core/host1x/host1x.h"
15#include "video_core/host1x/syncpoint_manager.h"
14#include "video_core/rasterizer_interface.h" 16#include "video_core/rasterizer_interface.h"
15 17
16namespace VideoCommon { 18namespace VideoCommon {
@@ -72,6 +74,7 @@ public:
72 } 74 }
73 75
74 void SignalSyncPoint(u32 value) { 76 void SignalSyncPoint(u32 value) {
77 syncpoint_manager.IncrementGuest(value);
75 TryReleasePendingFences(); 78 TryReleasePendingFences();
76 const bool should_flush = ShouldFlush(); 79 const bool should_flush = ShouldFlush();
77 CommitAsyncFlushes(); 80 CommitAsyncFlushes();
@@ -96,7 +99,7 @@ public:
96 auto payload = current_fence->GetPayload(); 99 auto payload = current_fence->GetPayload();
97 std::memcpy(address, &payload, sizeof(payload)); 100 std::memcpy(address, &payload, sizeof(payload));
98 } else { 101 } else {
99 gpu.IncrementSyncPoint(current_fence->GetPayload()); 102 syncpoint_manager.IncrementHost(current_fence->GetPayload());
100 } 103 }
101 PopFence(); 104 PopFence();
102 } 105 }
@@ -106,8 +109,8 @@ protected:
106 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 109 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
107 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, 110 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
108 TQueryCache& query_cache_) 111 TQueryCache& query_cache_)
109 : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_}, 112 : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
110 buffer_cache{buffer_cache_}, query_cache{query_cache_} {} 113 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
111 114
112 virtual ~FenceManager() = default; 115 virtual ~FenceManager() = default;
113 116
@@ -125,6 +128,7 @@ protected:
125 128
126 VideoCore::RasterizerInterface& rasterizer; 129 VideoCore::RasterizerInterface& rasterizer;
127 Tegra::GPU& gpu; 130 Tegra::GPU& gpu;
131 Tegra::Host1x::SyncpointManager& syncpoint_manager;
128 TTextureCache& texture_cache; 132 TTextureCache& texture_cache;
129 TTBufferCache& buffer_cache; 133 TTBufferCache& buffer_cache;
130 TQueryCache& query_cache; 134 TQueryCache& query_cache;
@@ -142,7 +146,7 @@ private:
142 const auto payload = current_fence->GetPayload(); 146 const auto payload = current_fence->GetPayload();
143 std::memcpy(address, &payload, sizeof(payload)); 147 std::memcpy(address, &payload, sizeof(payload));
144 } else { 148 } else {
145 gpu.IncrementSyncPoint(current_fence->GetPayload()); 149 syncpoint_manager.IncrementHost(current_fence->GetPayload());
146 } 150 }
147 PopFence(); 151 PopFence();
148 } 152 }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index eebd7f3ff..1097db08a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -28,6 +28,8 @@
28#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
29#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h" 30#include "video_core/gpu_thread.h"
31#include "video_core/host1x/host1x.h"
32#include "video_core/host1x/syncpoint_manager.h"
31#include "video_core/memory_manager.h" 33#include "video_core/memory_manager.h"
32#include "video_core/renderer_base.h" 34#include "video_core/renderer_base.h"
33#include "video_core/shader_notify.h" 35#include "video_core/shader_notify.h"
@@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
38 40
39struct GPU::Impl { 41struct GPU::Impl {
40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) 42 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
41 : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, 43 : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
42 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 44 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
43 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} 45 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
44 46
@@ -115,31 +117,35 @@ struct GPU::Impl {
115 } 117 }
116 118
117 /// Request a host GPU memory flush from the CPU. 119 /// Request a host GPU memory flush from the CPU.
118 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { 120 template <typename Func>
119 std::unique_lock lck{flush_request_mutex}; 121 [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
120 const u64 fence = ++last_flush_fence; 122 std::unique_lock lck{sync_request_mutex};
121 flush_requests.emplace_back(fence, addr, size); 123 const u64 fence = ++last_sync_fence;
124 sync_requests.emplace_back(action);
122 return fence; 125 return fence;
123 } 126 }
124 127
125 /// Obtains current flush request fence id. 128 /// Obtains current flush request fence id.
126 [[nodiscard]] u64 CurrentFlushRequestFence() const { 129 [[nodiscard]] u64 CurrentSyncRequestFence() const {
127 return current_flush_fence.load(std::memory_order_relaxed); 130 return current_sync_fence.load(std::memory_order_relaxed);
131 }
132
133 void WaitForSyncOperation(const u64 fence) {
134 std::unique_lock lck{sync_request_mutex};
135 sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
128 } 136 }
129 137
130 /// Tick pending requests within the GPU. 138 /// Tick pending requests within the GPU.
131 void TickWork() { 139 void TickWork() {
132 std::unique_lock lck{flush_request_mutex}; 140 std::unique_lock lck{sync_request_mutex};
133 while (!flush_requests.empty()) { 141 while (!sync_requests.empty()) {
134 auto& request = flush_requests.front(); 142 auto request = std::move(sync_requests.front());
135 const u64 fence = request.fence; 143 sync_requests.pop_front();
136 const VAddr addr = request.addr; 144 sync_request_mutex.unlock();
137 const std::size_t size = request.size; 145 request();
138 flush_requests.pop_front(); 146 current_sync_fence.fetch_add(1, std::memory_order_release);
139 flush_request_mutex.unlock(); 147 sync_request_mutex.lock();
140 rasterizer->FlushRegion(addr, size); 148 sync_request_cv.notify_all();
141 current_flush_fence.store(fence);
142 flush_request_mutex.lock();
143 } 149 }
144 } 150 }
145 151
@@ -207,78 +213,26 @@ struct GPU::Impl {
207 213
208 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 214 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
209 void WaitFence(u32 syncpoint_id, u32 value) { 215 void WaitFence(u32 syncpoint_id, u32 value) {
210 // Synced GPU, is always in sync
211 if (!is_async) {
212 return;
213 }
214 if (syncpoint_id == UINT32_MAX) { 216 if (syncpoint_id == UINT32_MAX) {
215 // TODO: Research what this does.
216 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
217 return; 217 return;
218 } 218 }
219 MICROPROFILE_SCOPE(GPU_wait); 219 MICROPROFILE_SCOPE(GPU_wait);
220 std::unique_lock lock{sync_mutex}; 220 host1x.GetSyncpointManager().WaitHost(syncpoint_id, value);
221 sync_cv.wait(lock, [=, this] {
222 if (shutting_down.load(std::memory_order_relaxed)) {
223 // We're shutting down, ensure no threads continue to wait for the next syncpoint
224 return true;
225 }
226 return syncpoints.at(syncpoint_id).load() >= value;
227 });
228 } 221 }
229 222
230 void IncrementSyncPoint(u32 syncpoint_id) { 223 void IncrementSyncPoint(u32 syncpoint_id) {
231 auto& syncpoint = syncpoints.at(syncpoint_id); 224 host1x.GetSyncpointManager().IncrementHost(syncpoint_id);
232 syncpoint++;
233 std::scoped_lock lock{sync_mutex};
234 sync_cv.notify_all();
235 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
236 if (!interrupt.empty()) {
237 u32 value = syncpoint.load();
238 auto it = interrupt.begin();
239 while (it != interrupt.end()) {
240 if (value >= *it) {
241 TriggerCpuInterrupt(syncpoint_id, *it);
242 it = interrupt.erase(it);
243 continue;
244 }
245 it++;
246 }
247 }
248 } 225 }
249 226
250 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { 227 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
251 return syncpoints.at(syncpoint_id).load(); 228 return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id);
252 } 229 }
253 230
254 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { 231 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
255 std::scoped_lock lock{sync_mutex}; 232 auto& syncpoint_manager = host1x.GetSyncpointManager();
256 u32 current_value = syncpoints.at(syncpoint_id).load(); 233 syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() {
257 if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) {
258 TriggerCpuInterrupt(syncpoint_id, value); 234 TriggerCpuInterrupt(syncpoint_id, value);
259 return; 235 });
260 }
261 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
262 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
263 [value](u32 in_value) { return in_value == value; });
264 if (contains) {
265 return;
266 }
267 interrupt.emplace_back(value);
268 }
269
270 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
271 std::scoped_lock lock{sync_mutex};
272 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
273 const auto iter =
274 std::find_if(interrupt.begin(), interrupt.end(),
275 [value](u32 interrupt_value) { return value == interrupt_value; });
276
277 if (iter == interrupt.end()) {
278 return false;
279 }
280 interrupt.erase(iter);
281 return true;
282 } 236 }
283 237
284 [[nodiscard]] u64 GetTicks() const { 238 [[nodiscard]] u64 GetTicks() const {
@@ -387,8 +341,48 @@ struct GPU::Impl {
387 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 341 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
388 } 342 }
389 343
344 void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
345 Service::Nvidia::NvFence* fences, size_t num_fences) {
346 size_t current_request_counter{};
347 {
348 std::unique_lock<std::mutex> lk(request_swap_mutex);
349 if (free_swap_counters.empty()) {
350 current_request_counter = request_swap_counters.size();
351 request_swap_counters.emplace_back(num_fences);
352 } else {
353 current_request_counter = free_swap_counters.front();
354 request_swap_counters[current_request_counter] = num_fences;
355 free_swap_counters.pop_front();
356 }
357 }
358 const auto wait_fence =
359 RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
360 auto& syncpoint_manager = host1x.GetSyncpointManager();
361 if (num_fences == 0) {
362 renderer->SwapBuffers(framebuffer);
363 }
364 const auto executer = [this, current_request_counter,
365 framebuffer_copy = *framebuffer]() {
366 {
367 std::unique_lock<std::mutex> lk(request_swap_mutex);
368 if (--request_swap_counters[current_request_counter] != 0) {
369 return;
370 }
371 free_swap_counters.push_back(current_request_counter);
372 }
373 renderer->SwapBuffers(&framebuffer_copy);
374 };
375 for (size_t i = 0; i < num_fences; i++) {
376 syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
377 }
378 });
379 gpu_thread.TickGPU();
380 WaitForSyncOperation(wait_fence);
381 }
382
390 GPU& gpu; 383 GPU& gpu;
391 Core::System& system; 384 Core::System& system;
385 Host1x::Host1x& host1x;
392 386
393 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; 387 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
394 std::unique_ptr<VideoCore::RendererBase> renderer; 388 std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -411,18 +405,11 @@ struct GPU::Impl {
411 405
412 std::condition_variable sync_cv; 406 std::condition_variable sync_cv;
413 407
414 struct FlushRequest { 408 std::list<std::function<void(void)>> sync_requests;
415 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) 409 std::atomic<u64> current_sync_fence{};
416 : fence{fence_}, addr{addr_}, size{size_} {} 410 u64 last_sync_fence{};
417 u64 fence; 411 std::mutex sync_request_mutex;
418 VAddr addr; 412 std::condition_variable sync_request_cv;
419 std::size_t size;
420 };
421
422 std::list<FlushRequest> flush_requests;
423 std::atomic<u64> current_flush_fence{};
424 u64 last_flush_fence{};
425 std::mutex flush_request_mutex;
426 413
427 const bool is_async; 414 const bool is_async;
428 415
@@ -433,6 +420,10 @@ struct GPU::Impl {
433 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; 420 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
434 Tegra::Control::ChannelState* current_channel; 421 Tegra::Control::ChannelState* current_channel;
435 s32 bound_channel{-1}; 422 s32 bound_channel{-1};
423
424 std::deque<size_t> free_swap_counters;
425 std::deque<size_t> request_swap_counters;
426 std::mutex request_swap_mutex;
436}; 427};
437 428
438GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) 429GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() {
477} 468}
478 469
479u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 470u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
480 return impl->RequestFlush(addr, size); 471 return impl->RequestSyncOperation(
472 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
473}
474
475u64 GPU::CurrentSyncRequestFence() const {
476 return impl->CurrentSyncRequestFence();
481} 477}
482 478
483u64 GPU::CurrentFlushRequestFence() const { 479void GPU::WaitForSyncOperation(u64 fence) {
484 return impl->CurrentFlushRequestFence(); 480 return impl->WaitForSyncOperation(fence);
485} 481}
486 482
487void GPU::TickWork() { 483void GPU::TickWork() {
488 impl->TickWork(); 484 impl->TickWork();
489} 485}
490 486
487/// Gets a mutable reference to the Host1x interface
488Host1x::Host1x& GPU::Host1x() {
489 return impl->host1x;
490}
491
492/// Gets an immutable reference to the Host1x interface.
493const Host1x::Host1x& GPU::Host1x() const {
494 return impl->host1x;
495}
496
491Engines::Maxwell3D& GPU::Maxwell3D() { 497Engines::Maxwell3D& GPU::Maxwell3D() {
492 return impl->Maxwell3D(); 498 return impl->Maxwell3D();
493} 499}
@@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
536 return impl->ShaderNotify(); 542 return impl->ShaderNotify();
537} 543}
538 544
545void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
546 Service::Nvidia::NvFence* fences, size_t num_fences) {
547 impl->RequestSwapBuffers(framebuffer, fences, num_fences);
548}
549
539void GPU::WaitFence(u32 syncpoint_id, u32 value) { 550void GPU::WaitFence(u32 syncpoint_id, u32 value) {
540 impl->WaitFence(syncpoint_id, value); 551 impl->WaitFence(syncpoint_id, value);
541} 552}
@@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
552 impl->RegisterSyncptInterrupt(syncpoint_id, value); 563 impl->RegisterSyncptInterrupt(syncpoint_id, value);
553} 564}
554 565
555bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
556 return impl->CancelSyncptInterrupt(syncpoint_id, value);
557}
558
559u64 GPU::GetTicks() const { 566u64 GPU::GetTicks() const {
560 return impl->GetTicks(); 567 return impl->GetTicks();
561} 568}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 7e84b0d2f..c1a538257 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -93,6 +93,10 @@ namespace Control {
93struct ChannelState; 93struct ChannelState;
94} 94}
95 95
96namespace Host1x {
97class Host1x;
98} // namespace Host1x
99
96class MemoryManager; 100class MemoryManager;
97 101
98class GPU final { 102class GPU final {
@@ -124,11 +128,19 @@ public:
124 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 128 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
125 129
126 /// Obtains current flush request fence id. 130 /// Obtains current flush request fence id.
127 [[nodiscard]] u64 CurrentFlushRequestFence() const; 131 [[nodiscard]] u64 CurrentSyncRequestFence() const;
132
133 void WaitForSyncOperation(u64 fence);
128 134
129 /// Tick pending requests within the GPU. 135 /// Tick pending requests within the GPU.
130 void TickWork(); 136 void TickWork();
131 137
138 /// Gets a mutable reference to the Host1x interface
139 [[nodiscard]] Host1x::Host1x& Host1x();
140
141 /// Gets an immutable reference to the Host1x interface.
142 [[nodiscard]] const Host1x::Host1x& Host1x() const;
143
132 /// Returns a reference to the Maxwell3D GPU engine. 144 /// Returns a reference to the Maxwell3D GPU engine.
133 [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); 145 [[nodiscard]] Engines::Maxwell3D& Maxwell3D();
134 146
@@ -174,8 +186,6 @@ public:
174 186
175 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); 187 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
176 188
177 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
178
179 [[nodiscard]] u64 GetTicks() const; 189 [[nodiscard]] u64 GetTicks() const;
180 190
181 [[nodiscard]] bool IsAsync() const; 191 [[nodiscard]] bool IsAsync() const;
@@ -184,6 +194,9 @@ public:
184 194
185 void RendererFrameEndNotify(); 195 void RendererFrameEndNotify();
186 196
197 void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
198 Service::Nvidia::NvFence* fences, size_t num_fences);
199
187 /// Performs any additional setup necessary in order to begin GPU emulation. 200 /// Performs any additional setup necessary in order to begin GPU emulation.
188 /// This can be used to launch any necessary threads and register any necessary 201 /// This can be used to launch any necessary threads and register any necessary
189 /// core timing events. 202 /// core timing events.
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9844cde43..2c03545bf 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -93,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
93 } 93 }
94 auto& gpu = system.GPU(); 94 auto& gpu = system.GPU();
95 u64 fence = gpu.RequestFlush(addr, size); 95 u64 fence = gpu.RequestFlush(addr, size);
96 TickGPU();
97 gpu.WaitForSyncOperation(fence);
98}
99
100void ThreadManager::TickGPU() {
96 PushCommand(GPUTickCommand(), true); 101 PushCommand(GPUTickCommand(), true);
97 ASSERT(fence <= gpu.CurrentFlushRequestFence());
98} 102}
99 103
100void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 104void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index c5078a2b3..64628d3e3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -135,6 +135,8 @@ public:
135 135
136 void OnCommandListEnd(); 136 void OnCommandListEnd();
137 137
138 void TickGPU();
139
138private: 140private:
139 /// Pushes a command to be executed by the GPU thread 141 /// Pushes a command to be executed by the GPU thread
140 u64 PushCommand(CommandData&& command_data, bool block = false); 142 u64 PushCommand(CommandData&& command_data, bool block = false);
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index a5eb97b7f..70c47ae03 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -6,11 +6,11 @@
6#include <vector> 6#include <vector>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/settings.h" 8#include "common/settings.h"
9#include "video_core/command_classes/codecs/codec.h"
10#include "video_core/command_classes/codecs/h264.h"
11#include "video_core/command_classes/codecs/vp8.h"
12#include "video_core/command_classes/codecs/vp9.h"
13#include "video_core/gpu.h" 9#include "video_core/gpu.h"
10#include "video_core/host1x/codecs/codec.h"
11#include "video_core/host1x/codecs/h264.h"
12#include "video_core/host1x/codecs/vp8.h"
13#include "video_core/host1x/codecs/vp9.h"
14#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
15 15
16extern "C" { 16extern "C" {
@@ -73,7 +73,7 @@ void AVFrameDeleter(AVFrame* ptr) {
73 av_frame_free(&ptr); 73 av_frame_free(&ptr);
74} 74}
75 75
76Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) 76Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs)
77 : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), 77 : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
78 vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), 78 vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
79 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} 79 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
@@ -168,11 +168,11 @@ void Codec::InitializeGpuDecoder() {
168void Codec::Initialize() { 168void Codec::Initialize() {
169 const AVCodecID codec = [&] { 169 const AVCodecID codec = [&] {
170 switch (current_codec) { 170 switch (current_codec) {
171 case NvdecCommon::VideoCodec::H264: 171 case Host1x::NvdecCommon::VideoCodec::H264:
172 return AV_CODEC_ID_H264; 172 return AV_CODEC_ID_H264;
173 case NvdecCommon::VideoCodec::VP8: 173 case Host1x::NvdecCommon::VideoCodec::VP8:
174 return AV_CODEC_ID_VP8; 174 return AV_CODEC_ID_VP8;
175 case NvdecCommon::VideoCodec::VP9: 175 case Host1x::NvdecCommon::VideoCodec::VP9:
176 return AV_CODEC_ID_VP9; 176 return AV_CODEC_ID_VP9;
177 default: 177 default:
178 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); 178 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
@@ -197,7 +197,7 @@ void Codec::Initialize() {
197 initialized = true; 197 initialized = true;
198} 198}
199 199
200void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 200void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
201 if (current_codec != codec) { 201 if (current_codec != codec) {
202 current_codec = codec; 202 current_codec = codec;
203 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); 203 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
@@ -215,11 +215,11 @@ void Codec::Decode() {
215 bool vp9_hidden_frame = false; 215 bool vp9_hidden_frame = false;
216 const auto& frame_data = [&]() { 216 const auto& frame_data = [&]() {
217 switch (current_codec) { 217 switch (current_codec) {
218 case Tegra::NvdecCommon::VideoCodec::H264: 218 case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
219 return h264_decoder->ComposeFrame(state, is_first_frame); 219 return h264_decoder->ComposeFrame(state, is_first_frame);
220 case Tegra::NvdecCommon::VideoCodec::VP8: 220 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
221 return vp8_decoder->ComposeFrame(state); 221 return vp8_decoder->ComposeFrame(state);
222 case Tegra::NvdecCommon::VideoCodec::VP9: 222 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
223 vp9_decoder->ComposeFrame(state); 223 vp9_decoder->ComposeFrame(state);
224 vp9_hidden_frame = vp9_decoder->WasFrameHidden(); 224 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
225 return vp9_decoder->GetFrameBytes(); 225 return vp9_decoder->GetFrameBytes();
@@ -287,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() {
287 return frame; 287 return frame;
288} 288}
289 289
290NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { 290Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
291 return current_codec; 291 return current_codec;
292} 292}
293 293
294std::string_view Codec::GetCurrentCodecName() const { 294std::string_view Codec::GetCurrentCodecName() const {
295 switch (current_codec) { 295 switch (current_codec) {
296 case NvdecCommon::VideoCodec::None: 296 case Host1x::NvdecCommon::VideoCodec::None:
297 return "None"; 297 return "None";
298 case NvdecCommon::VideoCodec::H264: 298 case Host1x::NvdecCommon::VideoCodec::H264:
299 return "H264"; 299 return "H264";
300 case NvdecCommon::VideoCodec::VP8: 300 case Host1x::NvdecCommon::VideoCodec::VP8:
301 return "VP8"; 301 return "VP8";
302 case NvdecCommon::VideoCodec::H265: 302 case Host1x::NvdecCommon::VideoCodec::H265:
303 return "H265"; 303 return "H265";
304 case NvdecCommon::VideoCodec::VP9: 304 case Host1x::NvdecCommon::VideoCodec::VP9:
305 return "VP9"; 305 return "VP9";
306 default: 306 default:
307 return "Unknown"; 307 return "Unknown";
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
index 0c2405465..117cb3ccd 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/host1x/codecs/codec.h
@@ -6,8 +6,8 @@
6#include <memory> 6#include <memory>
7#include <string_view> 7#include <string_view>
8#include <queue> 8#include <queue>
9 9#include "common/common_types.h"
10#include "video_core/command_classes/nvdec_common.h" 10#include "video_core/host1x/nvdec_common.h"
11 11
12extern "C" { 12extern "C" {
13#if defined(__GNUC__) || defined(__clang__) 13#if defined(__GNUC__) || defined(__clang__)
@@ -34,14 +34,14 @@ class VP9;
34 34
35class Codec { 35class Codec {
36public: 36public:
37 explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); 37 explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs);
38 ~Codec(); 38 ~Codec();
39 39
40 /// Initialize the codec, returning success or failure 40 /// Initialize the codec, returning success or failure
41 void Initialize(); 41 void Initialize();
42 42
43 /// Sets NVDEC video stream codec 43 /// Sets NVDEC video stream codec
44 void SetTargetCodec(NvdecCommon::VideoCodec codec); 44 void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
45 45
46 /// Call decoders to construct headers, decode AVFrame with ffmpeg 46 /// Call decoders to construct headers, decode AVFrame with ffmpeg
47 void Decode(); 47 void Decode();
@@ -50,7 +50,7 @@ public:
50 [[nodiscard]] AVFramePtr GetCurrentFrame(); 50 [[nodiscard]] AVFramePtr GetCurrentFrame();
51 51
52 /// Returns the value of current_codec 52 /// Returns the value of current_codec
53 [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; 53 [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
54 54
55 /// Return name of the current codec 55 /// Return name of the current codec
56 [[nodiscard]] std::string_view GetCurrentCodecName() const; 56 [[nodiscard]] std::string_view GetCurrentCodecName() const;
@@ -63,14 +63,14 @@ private:
63 bool CreateGpuAvDevice(); 63 bool CreateGpuAvDevice();
64 64
65 bool initialized{}; 65 bool initialized{};
66 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; 66 Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
67 67
68 const AVCodec* av_codec{nullptr}; 68 const AVCodec* av_codec{nullptr};
69 AVCodecContext* av_codec_ctx{nullptr}; 69 AVCodecContext* av_codec_ctx{nullptr};
70 AVBufferRef* av_gpu_decoder{nullptr}; 70 AVBufferRef* av_gpu_decoder{nullptr};
71 71
72 GPU& gpu; 72 GPU& gpu;
73 const NvdecCommon::NvdecRegisters& state; 73 const Host1x::NvdecCommon::NvdecRegisters& state;
74 std::unique_ptr<Decoder::H264> h264_decoder; 74 std::unique_ptr<Decoder::H264> h264_decoder;
75 std::unique_ptr<Decoder::VP8> vp8_decoder; 75 std::unique_ptr<Decoder::VP8> vp8_decoder;
76 std::unique_ptr<Decoder::VP9> vp9_decoder; 76 std::unique_ptr<Decoder::VP9> vp9_decoder;
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index e2acd54d4..95534bc85 100644
--- a/src/video_core/command_classes/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -5,8 +5,8 @@
5#include <bit> 5#include <bit>
6 6
7#include "common/settings.h" 7#include "common/settings.h"
8#include "video_core/command_classes/codecs/h264.h"
9#include "video_core/gpu.h" 8#include "video_core/gpu.h"
9#include "video_core/host1x/codecs/h264.h"
10#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11 11
12namespace Tegra::Decoder { 12namespace Tegra::Decoder {
@@ -28,7 +28,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {}
28 28
29H264::~H264() = default; 29H264::~H264() = default;
30 30
31const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state, 31const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
32 bool is_first_frame) { 32 bool is_first_frame) {
33 H264DecoderContext context; 33 H264DecoderContext context;
34 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); 34 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
index 261574364..a98730474 100644
--- a/src/video_core/command_classes/codecs/h264.h
+++ b/src/video_core/host1x/codecs/h264.h
@@ -8,7 +8,7 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_funcs.h" 9#include "common/common_funcs.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/command_classes/nvdec_common.h" 11#include "video_core/host1x/nvdec_common.h"
12 12
13namespace Tegra { 13namespace Tegra {
14class GPU; 14class GPU;
@@ -59,8 +59,8 @@ public:
59 ~H264(); 59 ~H264();
60 60
61 /// Compose the H264 frame for FFmpeg decoding 61 /// Compose the H264 frame for FFmpeg decoding
62 [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state, 62 [[nodiscard]] const std::vector<u8>& ComposeFrame(
63 bool is_first_frame = false); 63 const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
64 64
65private: 65private:
66 std::vector<u8> frame; 66 std::vector<u8> frame;
diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index c83b9bbc2..aac026e17 100644
--- a/src/video_core/command_classes/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -3,8 +3,8 @@
3 3
4#include <vector> 4#include <vector>
5 5
6#include "video_core/command_classes/codecs/vp8.h"
7#include "video_core/gpu.h" 6#include "video_core/gpu.h"
7#include "video_core/host1x/codecs/vp8.h"
8#include "video_core/memory_manager.h" 8#include "video_core/memory_manager.h"
9 9
10namespace Tegra::Decoder { 10namespace Tegra::Decoder {
@@ -12,7 +12,7 @@ VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
12 12
13VP8::~VP8() = default; 13VP8::~VP8() = default;
14 14
15const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { 15const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
16 VP8PictureInfo info; 16 VP8PictureInfo info;
17 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); 17 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
18 18
diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
index 3357667b0..a1dfa5f03 100644
--- a/src/video_core/command_classes/codecs/vp8.h
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -8,7 +8,7 @@
8 8
9#include "common/common_funcs.h" 9#include "common/common_funcs.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/command_classes/nvdec_common.h" 11#include "video_core/host1x/nvdec_common.h"
12 12
13namespace Tegra { 13namespace Tegra {
14class GPU; 14class GPU;
@@ -20,7 +20,8 @@ public:
20 ~VP8(); 20 ~VP8();
21 21
22 /// Compose the VP8 frame for FFmpeg decoding 22 /// Compose the VP8 frame for FFmpeg decoding
23 [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state); 23 [[nodiscard]] const std::vector<u8>& ComposeFrame(
24 const Host1x::NvdecCommon::NvdecRegisters& state);
24 25
25private: 26private:
26 std::vector<u8> frame; 27 std::vector<u8> frame;
diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index c01431441..bc50c6ba4 100644
--- a/src/video_core/command_classes/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -4,8 +4,8 @@
4#include <algorithm> // for std::copy 4#include <algorithm> // for std::copy
5#include <numeric> 5#include <numeric>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "video_core/command_classes/codecs/vp9.h"
8#include "video_core/gpu.h" 7#include "video_core/gpu.h"
8#include "video_core/host1x/codecs/vp9.h"
9#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
10 10
11namespace Tegra::Decoder { 11namespace Tegra::Decoder {
@@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
355 } 355 }
356} 356}
357 357
358Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { 358Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
359 PictureInfo picture_info; 359 PictureInfo picture_info;
360 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); 360 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
361 Vp9PictureInfo vp9_info = picture_info.Convert(); 361 Vp9PictureInfo vp9_info = picture_info.Convert();
@@ -376,7 +376,7 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
376 entropy.Convert(dst); 376 entropy.Convert(dst);
377} 377}
378 378
379Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { 379Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
380 Vp9FrameContainer current_frame{}; 380 Vp9FrameContainer current_frame{};
381 { 381 {
382 gpu.SyncGuestHost(); 382 gpu.SyncGuestHost();
@@ -769,7 +769,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
769 return uncomp_writer; 769 return uncomp_writer;
770} 770}
771 771
772void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { 772void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
773 std::vector<u8> bitstream; 773 std::vector<u8> bitstream;
774 { 774 {
775 Vp9FrameContainer curr_frame = GetCurrentFrame(state); 775 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
index ecc40e8b1..a425c0fa4 100644
--- a/src/video_core/command_classes/codecs/vp9.h
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -8,8 +8,8 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/stream.h" 10#include "common/stream.h"
11#include "video_core/command_classes/codecs/vp9_types.h" 11#include "video_core/host1x/codecs/vp9_types.h"
12#include "video_core/command_classes/nvdec_common.h" 12#include "video_core/host1x/nvdec_common.h"
13 13
14namespace Tegra { 14namespace Tegra {
15class GPU; 15class GPU;
@@ -117,7 +117,7 @@ public:
117 117
118 /// Composes the VP9 frame from the GPU state information. 118 /// Composes the VP9 frame from the GPU state information.
119 /// Based on the official VP9 spec documentation 119 /// Based on the official VP9 spec documentation
120 void ComposeFrame(const NvdecCommon::NvdecRegisters& state); 120 void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
121 121
122 /// Returns true if the most recent frame was a hidden frame. 122 /// Returns true if the most recent frame was a hidden frame.
123 [[nodiscard]] bool WasFrameHidden() const { 123 [[nodiscard]] bool WasFrameHidden() const {
@@ -162,13 +162,15 @@ private:
162 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); 162 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
163 163
164 /// Returns VP9 information from NVDEC provided offset and size 164 /// Returns VP9 information from NVDEC provided offset and size
165 [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); 165 [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
166 const Host1x::NvdecCommon::NvdecRegisters& state);
166 167
167 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct 168 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
168 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); 169 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
169 170
170 /// Returns frame to be decoded after buffering 171 /// Returns frame to be decoded after buffering
171 [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); 172 [[nodiscard]] Vp9FrameContainer GetCurrentFrame(
173 const Host1x::NvdecCommon::NvdecRegisters& state);
172 174
173 /// Use NVDEC providied information to compose the headers for the current frame 175 /// Use NVDEC providied information to compose the headers for the current frame
174 [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); 176 [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
index bb3d8df6e..bb3d8df6e 100644
--- a/src/video_core/command_classes/codecs/vp9_types.h
+++ b/src/video_core/host1x/codecs/vp9_types.h
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp
new file mode 100644
index 000000000..b72b01aa3
--- /dev/null
+++ b/src/video_core/host1x/control.cpp
@@ -0,0 +1,35 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/gpu.h"
7#include "video_core/host1x/control.h"
8#include "video_core/host1x/host1x.h"
9
10namespace Tegra::Host1x {
11
12Control::Control(GPU& gpu_) : gpu(gpu_) {}
13
14Control::~Control() = default;
15
16void Control::ProcessMethod(Method method, u32 argument) {
17 switch (method) {
18 case Method::LoadSyncptPayload32:
19 syncpoint_value = argument;
20 break;
21 case Method::WaitSyncpt:
22 case Method::WaitSyncpt32:
23 Execute(argument);
24 break;
25 default:
26 UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
27 break;
28 }
29}
30
31void Control::Execute(u32 data) {
32 gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value);
33}
34
35} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/host1x/control.h
index bb48a4381..04dac7d51 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/host1x/control.h
@@ -1,5 +1,7 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
2// SPDX-License-Identifier: GPL-2.0-or-later 2// (https://github.com/skyline-emu/)
3// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
4// or any later version Refer to the license.txt file included.
3 5
4#pragma once 6#pragma once
5 7
@@ -7,9 +9,12 @@
7 9
8namespace Tegra { 10namespace Tegra {
9class GPU; 11class GPU;
12
13namespace Host1x {
14
10class Nvdec; 15class Nvdec;
11 16
12class Host1x { 17class Control {
13public: 18public:
14 enum class Method : u32 { 19 enum class Method : u32 {
15 WaitSyncpt = 0x8, 20 WaitSyncpt = 0x8,
@@ -17,8 +22,8 @@ public:
17 WaitSyncpt32 = 0x50, 22 WaitSyncpt32 = 0x50,
18 }; 23 };
19 24
20 explicit Host1x(GPU& gpu); 25 explicit Control(GPU& gpu);
21 ~Host1x(); 26 ~Control();
22 27
23 /// Writes the method into the state, Invoke Execute() if encountered 28 /// Writes the method into the state, Invoke Execute() if encountered
24 void ProcessMethod(Method method, u32 argument); 29 void ProcessMethod(Method method, u32 argument);
@@ -31,4 +36,6 @@ private:
31 GPU& gpu; 36 GPU& gpu;
32}; 37};
33 38
39} // namespace Host1x
40
34} // namespace Tegra 41} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
new file mode 100644
index 000000000..2971be286
--- /dev/null
+++ b/src/video_core/host1x/host1x.h
@@ -0,0 +1,33 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9#include "video_core/host1x/syncpoint_manager.h"
10
11namespace Tegra {
12
13namespace Host1x {
14
15class Host1x {
16public:
17 Host1x() : syncpoint_manager{} {}
18
19 SyncpointManager& GetSyncpointManager() {
20 return syncpoint_manager;
21 }
22
23 const SyncpointManager& GetSyncpointManager() const {
24 return syncpoint_manager;
25 }
26
27private:
28 SyncpointManager syncpoint_manager;
29};
30
31} // namespace Host1x
32
33} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/host1x/nvdec.cpp
index 4fbbe3da6..5f6decd0d 100644
--- a/src/video_core/command_classes/nvdec.cpp
+++ b/src/video_core/host1x/nvdec.cpp
@@ -2,10 +2,10 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/assert.h" 4#include "common/assert.h"
5#include "video_core/command_classes/nvdec.h"
6#include "video_core/gpu.h" 5#include "video_core/gpu.h"
6#include "video_core/host1x/nvdec.h"
7 7
8namespace Tegra { 8namespace Tegra::Host1x {
9 9
10#define NVDEC_REG_INDEX(field_name) \ 10#define NVDEC_REG_INDEX(field_name) \
11 (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) 11 (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
@@ -44,4 +44,4 @@ void Nvdec::Execute() {
44 } 44 }
45} 45}
46 46
47} // namespace Tegra 47} // namespace Tegra::Host1x
diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/host1x/nvdec.h
index 488531fc6..41ba1f7a0 100644
--- a/src/video_core/command_classes/nvdec.h
+++ b/src/video_core/host1x/nvdec.h
@@ -6,11 +6,13 @@
6#include <memory> 6#include <memory>
7#include <vector> 7#include <vector>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/command_classes/codecs/codec.h" 9#include "video_core/host1x/codecs/codec.h"
10 10
11namespace Tegra { 11namespace Tegra {
12class GPU; 12class GPU;
13 13
14namespace Host1x {
15
14class Nvdec { 16class Nvdec {
15public: 17public:
16 explicit Nvdec(GPU& gpu); 18 explicit Nvdec(GPU& gpu);
@@ -30,4 +32,7 @@ private:
30 NvdecCommon::NvdecRegisters state; 32 NvdecCommon::NvdecRegisters state;
31 std::unique_ptr<Codec> codec; 33 std::unique_ptr<Codec> codec;
32}; 34};
35
36} // namespace Host1x
37
33} // namespace Tegra 38} // namespace Tegra
diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/host1x/nvdec_common.h
index 521e5b52b..49d67ebbe 100644
--- a/src/video_core/command_classes/nvdec_common.h
+++ b/src/video_core/host1x/nvdec_common.h
@@ -7,7 +7,7 @@
7#include "common/common_funcs.h" 7#include "common/common_funcs.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9 9
10namespace Tegra::NvdecCommon { 10namespace Tegra::Host1x::NvdecCommon {
11 11
12enum class VideoCodec : u64 { 12enum class VideoCodec : u64 {
13 None = 0x0, 13 None = 0x0,
@@ -94,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
94 94
95#undef ASSERT_REG_POSITION 95#undef ASSERT_REG_POSITION
96 96
97} // namespace Tegra::NvdecCommon 97} // namespace Tegra::Host1x::NvdecCommon
diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp
index 67e58046f..8694f77e2 100644
--- a/src/video_core/command_classes/sync_manager.cpp
+++ b/src/video_core/host1x/sync_manager.cpp
@@ -4,8 +4,12 @@
4#include <algorithm> 4#include <algorithm>
5#include "sync_manager.h" 5#include "sync_manager.h"
6#include "video_core/gpu.h" 6#include "video_core/gpu.h"
7#include "video_core/host1x/host1x.h"
8#include "video_core/host1x/syncpoint_manager.h"
7 9
8namespace Tegra { 10namespace Tegra {
11namespace Host1x {
12
9SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} 13SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
10SyncptIncrManager::~SyncptIncrManager() = default; 14SyncptIncrManager::~SyncptIncrManager() = default;
11 15
@@ -36,8 +40,12 @@ void SyncptIncrManager::IncrementAllDone() {
36 if (!increments[done_count].complete) { 40 if (!increments[done_count].complete) {
37 break; 41 break;
38 } 42 }
39 gpu.IncrementSyncPoint(increments[done_count].syncpt_id); 43 auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
44 syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
45 syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
40 } 46 }
41 increments.erase(increments.begin(), increments.begin() + done_count); 47 increments.erase(increments.begin(), increments.begin() + done_count);
42} 48}
49
50} // namespace Host1x
43} // namespace Tegra 51} // namespace Tegra
diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/host1x/sync_manager.h
index 6dfaae080..aba72d5c5 100644
--- a/src/video_core/command_classes/sync_manager.h
+++ b/src/video_core/host1x/sync_manager.h
@@ -8,7 +8,11 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9 9
10namespace Tegra { 10namespace Tegra {
11
11class GPU; 12class GPU;
13
14namespace Host1x {
15
12struct SyncptIncr { 16struct SyncptIncr {
13 u32 id; 17 u32 id;
14 u32 class_id; 18 u32 class_id;
@@ -44,4 +48,6 @@ private:
44 GPU& gpu; 48 GPU& gpu;
45}; 49};
46 50
51} // namespace Host1x
52
47} // namespace Tegra 53} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
new file mode 100644
index 000000000..c606b8bd0
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -0,0 +1,93 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/host1x/syncpoint_manager.h"
6
7namespace Tegra {
8
9namespace Host1x {
10
11SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
12 std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
13 std::function<void(void)>& action) {
14 if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
15 action();
16 return {};
17 }
18
19 std::unique_lock<std::mutex> lk(guard);
20 if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
21 action();
22 return {};
23 }
24 auto it = action_storage.begin();
25 while (it != action_storage.end()) {
26 if (it->expected_value >= expected_value) {
27 break;
28 }
29 ++it;
30 }
31 return action_storage.emplace(it, expected_value, action);
32}
33
34void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
35 ActionHandle& handle) {
36 std::unique_lock<std::mutex> lk(guard);
37 action_storage.erase(handle);
38}
39
40void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
41 DeregisterAction(guest_action_storage[syncpoint_id], handle);
42}
43
44void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
45 DeregisterAction(host_action_storage[syncpoint_id], handle);
46}
47
48void SyncpointManager::IncrementGuest(u32 syncpoint_id) {
49 Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]);
50}
51
52void SyncpointManager::IncrementHost(u32 syncpoint_id) {
53 Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]);
54}
55
56void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) {
57 Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value);
58}
59
60void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) {
61 Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value);
62}
63
64void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
65 std::list<RegisteredAction>& action_storage) {
66 auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
67
68 std::unique_lock<std::mutex> lk(guard);
69 auto it = action_storage.begin();
70 while (it != action_storage.end()) {
71 if (it->expected_value > new_value) {
72 break;
73 }
74 it->action();
75 it = action_storage.erase(it);
76 }
77 wait_cv.notify_all();
78}
79
80void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
81 u32 expected_value) {
82 const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; };
83 if (pred()) {
84 return;
85 }
86
87 std::unique_lock<std::mutex> lk(guard);
88 wait_cv.wait(lk, pred);
89}
90
91} // namespace Host1x
92
93} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
new file mode 100644
index 000000000..0ecc040ab
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <functional>
11#include <list>
12#include <mutex>
13
14#include "common/common_types.h"
15
16namespace Tegra {
17
18namespace Host1x {
19
20class SyncpointManager {
21public:
22 u32 GetGuestSyncpointValue(u32 id) {
23 return syncpoints_guest[id].load(std::memory_order_acquire);
24 }
25
26 u32 GetHostSyncpointValue(u32 id) {
27 return syncpoints_host[id].load(std::memory_order_acquire);
28 }
29
30 struct RegisteredAction {
31 RegisteredAction(u32 expected_value_, std::function<void(void)>& action_)
32 : expected_value{expected_value_}, action{action_} {}
33 u32 expected_value;
34 std::function<void(void)> action;
35 };
36 using ActionHandle = std::list<RegisteredAction>::iterator;
37
38 template <typename Func>
39 ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
40 std::function<void(void)> func(action);
41 return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
42 expected_value, func);
43 }
44
45 template <typename Func>
46 ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
47 std::function<void(void)> func(action);
48 return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
49 expected_value, func);
50 }
51
52 void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle);
53
54 void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle);
55
56 void IncrementGuest(u32 syncpoint_id);
57
58 void IncrementHost(u32 syncpoint_id);
59
60 void WaitGuest(u32 syncpoint_id, u32 expected_value);
61
62 void WaitHost(u32 syncpoint_id, u32 expected_value);
63
64 bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) {
65 return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
66 }
67
68 bool IsReadyHost(u32 syncpoint_id, u32 expected_value) {
69 return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
70 }
71
72private:
73 void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
74 std::list<RegisteredAction>& action_storage);
75
76 ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
77 std::list<RegisteredAction>& action_storage, u32 expected_value,
78 std::function<void(void)>& action);
79
80 void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
81
82 void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
83
84 static constexpr size_t NUM_MAX_SYNCPOINTS = 192;
85
86 std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{};
87 std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{};
88
89 std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage;
90 std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage;
91
92 std::mutex guard;
93 std::condition_variable wait_guest_cv;
94 std::condition_variable wait_host_cv;
95};
96
97} // namespace Host1x
98
99} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/host1x/vic.cpp
index 7c17df353..a9422670a 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -18,14 +18,17 @@ extern "C" {
18#include "common/bit_field.h" 18#include "common/bit_field.h"
19#include "common/logging/log.h" 19#include "common/logging/log.h"
20 20
21#include "video_core/command_classes/nvdec.h"
22#include "video_core/command_classes/vic.h"
23#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
24#include "video_core/gpu.h" 22#include "video_core/gpu.h"
23#include "video_core/host1x/nvdec.h"
24#include "video_core/host1x/vic.h"
25#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
26#include "video_core/textures/decoders.h" 26#include "video_core/textures/decoders.h"
27 27
28namespace Tegra { 28namespace Tegra {
29
30namespace Host1x {
31
29namespace { 32namespace {
30enum class VideoPixelFormat : u64_le { 33enum class VideoPixelFormat : u64_le {
31 RGBA8 = 0x1f, 34 RGBA8 = 0x1f,
@@ -235,4 +238,6 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
235 chroma_buffer.size()); 238 chroma_buffer.size());
236} 239}
237 240
241} // namespace Host1x
242
238} // namespace Tegra 243} // namespace Tegra
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/host1x/vic.h
index 010daa6b6..c51f8af7e 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/host1x/vic.h
@@ -11,6 +11,9 @@ struct SwsContext;
11 11
12namespace Tegra { 12namespace Tegra {
13class GPU; 13class GPU;
14
15namespace Host1x {
16
14class Nvdec; 17class Nvdec;
15union VicConfig; 18union VicConfig;
16 19
@@ -40,7 +43,7 @@ private:
40 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); 43 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
41 44
42 GPU& gpu; 45 GPU& gpu;
43 std::shared_ptr<Tegra::Nvdec> nvdec_processor; 46 std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
44 47
45 /// Avoid reallocation of the following buffers every frame, as their 48 /// Avoid reallocation of the following buffers every frame, as their
46 /// size does not change during a stream 49 /// size does not change during a stream
@@ -58,4 +61,6 @@ private:
58 s32 scaler_height{}; 61 s32 scaler_height{};
59}; 62};
60 63
64} // namespace Host1x
65
61} // namespace Tegra 66} // namespace Tegra