summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
authorGravatar Fernando S2021-10-06 20:02:31 +0200
committerGravatar GitHub2021-10-06 20:02:31 +0200
commitf84328934f5e09894a69d9fa1d2f6a34c715321b (patch)
treeeafe64da6ca2bb195cf5523d3fa13f89f117f897 /src/video_core/gpu.cpp
parentMerge pull request #7090 from Moonlacer/tas_spacing_addition (diff)
parentnvflinger: Use jthread and stop_token for VSync thread (diff)
downloadyuzu-f84328934f5e09894a69d9fa1d2f6a34c715321b.tar.gz
yuzu-f84328934f5e09894a69d9fa1d2f6a34c715321b.tar.xz
yuzu-f84328934f5e09894a69d9fa1d2f6a34c715321b.zip
Merge pull request #7118 from ameerj/vc-gpu-impl
gpu: Migrate implementation to the cpp file
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp1215
1 files changed, 794 insertions, 421 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2ae3639b5..ab7c21a49 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,540 +2,913 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <atomic>
5#include <chrono> 7#include <chrono>
8#include <condition_variable>
9#include <list>
10#include <memory>
6 11
7#include "common/assert.h" 12#include "common/assert.h"
8#include "common/microprofile.h" 13#include "common/microprofile.h"
9#include "common/settings.h" 14#include "common/settings.h"
10#include "core/core.h" 15#include "core/core.h"
11#include "core/core_timing.h" 16#include "core/core_timing.h"
12#include "core/core_timing_util.h"
13#include "core/frontend/emu_window.h" 17#include "core/frontend/emu_window.h"
14#include "core/hardware_interrupt_manager.h" 18#include "core/hardware_interrupt_manager.h"
15#include "core/memory.h" 19#include "core/hle/service/nvdrv/nvdata.h"
20#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "core/perf_stats.h" 21#include "core/perf_stats.h"
22#include "video_core/cdma_pusher.h"
23#include "video_core/dma_pusher.h"
17#include "video_core/engines/fermi_2d.h" 24#include "video_core/engines/fermi_2d.h"
18#include "video_core/engines/kepler_compute.h" 25#include "video_core/engines/kepler_compute.h"
19#include "video_core/engines/kepler_memory.h" 26#include "video_core/engines/kepler_memory.h"
20#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
22#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h"
23#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
24#include "video_core/renderer_base.h" 32#include "video_core/renderer_base.h"
25#include "video_core/shader_notify.h" 33#include "video_core/shader_notify.h"
26#include "video_core/video_core.h"
27 34
28namespace Tegra { 35namespace Tegra {
29 36
30MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 37MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
31 38
32GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) 39struct GPU::Impl {
33 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
34 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, 41 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 42 system)},
36 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 43 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 44 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 45 fermi_2d{std::make_unique<Engines::Fermi2D>()},
39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 46 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 47 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
41 gpu_thread{system_, is_async_} {} 48 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
49 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
50 gpu_thread{system_, is_async_} {}
51
52 ~Impl() = default;
53
54 /// Binds a renderer to the GPU.
55 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
56 renderer = std::move(renderer_);
57 rasterizer = renderer->ReadRasterizer();
58
59 memory_manager->BindRasterizer(rasterizer);
60 maxwell_3d->BindRasterizer(rasterizer);
61 fermi_2d->BindRasterizer(rasterizer);
62 kepler_compute->BindRasterizer(rasterizer);
63 maxwell_dma->BindRasterizer(rasterizer);
64 }
42 65
43GPU::~GPU() = default; 66 /// Calls a GPU method.
67 void CallMethod(const GPU::MethodCall& method_call) {
68 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
69 method_call.subchannel);
70
71 ASSERT(method_call.subchannel < bound_engines.size());
72
73 if (ExecuteMethodOnEngine(method_call.method)) {
74 CallEngineMethod(method_call);
75 } else {
76 CallPullerMethod(method_call);
77 }
78 }
79
80 /// Calls a GPU multivalue method.
81 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
82 u32 methods_pending) {
83 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
44 84
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 85 ASSERT(subchannel < bound_engines.size());
46 renderer = std::move(renderer_); 86
47 rasterizer = renderer->ReadRasterizer(); 87 if (ExecuteMethodOnEngine(method)) {
88 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
89 } else {
90 for (std::size_t i = 0; i < amount; i++) {
91 CallPullerMethod(GPU::MethodCall{
92 method,
93 base_start[i],
94 subchannel,
95 methods_pending - static_cast<u32>(i),
96 });
97 }
98 }
99 }
100
101 /// Flush all current written commands into the host GPU for execution.
102 void FlushCommands() {
103 rasterizer->FlushCommands();
104 }
105
106 /// Synchronizes CPU writes with Host GPU memory.
107 void SyncGuestHost() {
108 rasterizer->SyncGuestHost();
109 }
110
111 /// Signal the ending of command list.
112 void OnCommandListEnd() {
113 if (is_async) {
114 // This command only applies to asynchronous GPU mode
115 gpu_thread.OnCommandListEnd();
116 }
117 }
118
119 /// Request a host GPU memory flush from the CPU.
120 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
121 std::unique_lock lck{flush_request_mutex};
122 const u64 fence = ++last_flush_fence;
123 flush_requests.emplace_back(fence, addr, size);
124 return fence;
125 }
126
127 /// Obtains current flush request fence id.
128 [[nodiscard]] u64 CurrentFlushRequestFence() const {
129 return current_flush_fence.load(std::memory_order_relaxed);
130 }
131
132 /// Tick pending requests within the GPU.
133 void TickWork() {
134 std::unique_lock lck{flush_request_mutex};
135 while (!flush_requests.empty()) {
136 auto& request = flush_requests.front();
137 const u64 fence = request.fence;
138 const VAddr addr = request.addr;
139 const std::size_t size = request.size;
140 flush_requests.pop_front();
141 flush_request_mutex.unlock();
142 rasterizer->FlushRegion(addr, size);
143 current_flush_fence.store(fence);
144 flush_request_mutex.lock();
145 }
146 }
147
148 /// Returns a reference to the Maxwell3D GPU engine.
149 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
150 return *maxwell_3d;
151 }
152
153 /// Returns a const reference to the Maxwell3D GPU engine.
154 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
155 return *maxwell_3d;
156 }
157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
160 return *kepler_compute;
161 }
162
163 /// Returns a reference to the KeplerCompute GPU engine.
164 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
165 return *kepler_compute;
166 }
167
168 /// Returns a reference to the GPU memory manager.
169 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
170 return *memory_manager;
171 }
172
173 /// Returns a const reference to the GPU memory manager.
174 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
175 return *memory_manager;
176 }
177
178 /// Returns a reference to the GPU DMA pusher.
179 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
180 return *dma_pusher;
181 }
182
183 /// Returns a const reference to the GPU DMA pusher.
184 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
185 return *dma_pusher;
186 }
187
188 /// Returns a reference to the GPU CDMA pusher.
189 [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
190 return *cdma_pusher;
191 }
192
193 /// Returns a const reference to the GPU CDMA pusher.
194 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
195 return *cdma_pusher;
196 }
197
198 /// Returns a reference to the underlying renderer.
199 [[nodiscard]] VideoCore::RendererBase& Renderer() {
200 return *renderer;
201 }
202
203 /// Returns a const reference to the underlying renderer.
204 [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
205 return *renderer;
206 }
207
208 /// Returns a reference to the shader notifier.
209 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
210 return *shader_notify;
211 }
212
213 /// Returns a const reference to the shader notifier.
214 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
215 return *shader_notify;
216 }
217
218 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
219 void WaitFence(u32 syncpoint_id, u32 value) {
220 // Synced GPU, is always in sync
221 if (!is_async) {
222 return;
223 }
224 if (syncpoint_id == UINT32_MAX) {
225 // TODO: Research what this does.
226 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
227 return;
228 }
229 MICROPROFILE_SCOPE(GPU_wait);
230 std::unique_lock lock{sync_mutex};
231 sync_cv.wait(lock, [=, this] {
232 if (shutting_down.load(std::memory_order_relaxed)) {
233 // We're shutting down, ensure no threads continue to wait for the next syncpoint
234 return true;
235 }
236 return syncpoints.at(syncpoint_id).load() >= value;
237 });
238 }
239
240 void IncrementSyncPoint(u32 syncpoint_id) {
241 auto& syncpoint = syncpoints.at(syncpoint_id);
242 syncpoint++;
243 std::lock_guard lock{sync_mutex};
244 sync_cv.notify_all();
245 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
246 if (!interrupt.empty()) {
247 u32 value = syncpoint.load();
248 auto it = interrupt.begin();
249 while (it != interrupt.end()) {
250 if (value >= *it) {
251 TriggerCpuInterrupt(syncpoint_id, *it);
252 it = interrupt.erase(it);
253 continue;
254 }
255 it++;
256 }
257 }
258 }
259
260 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
261 return syncpoints.at(syncpoint_id).load();
262 }
263
264 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
265 std::lock_guard lock{sync_mutex};
266 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
267 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
268 [value](u32 in_value) { return in_value == value; });
269 if (contains) {
270 return;
271 }
272 interrupt.emplace_back(value);
273 }
274
275 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
276 std::lock_guard lock{sync_mutex};
277 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
278 const auto iter =
279 std::find_if(interrupt.begin(), interrupt.end(),
280 [value](u32 interrupt_value) { return value == interrupt_value; });
281
282 if (iter == interrupt.end()) {
283 return false;
284 }
285 interrupt.erase(iter);
286 return true;
287 }
288
289 [[nodiscard]] u64 GetTicks() const {
290 // This values were reversed engineered by fincs from NVN
291 // The gpu clock is reported in units of 385/625 nanoseconds
292 constexpr u64 gpu_ticks_num = 384;
293 constexpr u64 gpu_ticks_den = 625;
294
295 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
296 if (Settings::values.use_fast_gpu_time.GetValue()) {
297 nanoseconds /= 256;
298 }
299 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
300 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
301 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
302 }
303
304 [[nodiscard]] bool IsAsync() const {
305 return is_async;
306 }
307
308 [[nodiscard]] bool UseNvdec() const {
309 return use_nvdec;
310 }
311
312 void RendererFrameEndNotify() {
313 system.GetPerfStats().EndGameFrame();
314 }
315
316 /// Performs any additional setup necessary in order to begin GPU emulation.
317 /// This can be used to launch any necessary threads and register any necessary
318 /// core timing events.
319 void Start() {
320 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
321 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
322 cpu_context->MakeCurrent();
323 }
324
325 /// Obtain the CPU Context
326 void ObtainContext() {
327 cpu_context->MakeCurrent();
328 }
329
330 /// Release the CPU Context
331 void ReleaseContext() {
332 cpu_context->DoneCurrent();
333 }
334
335 /// Push GPU command entries to be processed
336 void PushGPUEntries(Tegra::CommandList&& entries) {
337 gpu_thread.SubmitList(std::move(entries));
338 }
339
340 /// Push GPU command buffer entries to be processed
341 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
342 if (!use_nvdec) {
343 return;
344 }
345
346 if (!cdma_pusher) {
347 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
348 }
349
350 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
351 // TODO(ameerj): RE proper async nvdec operation
352 // gpu_thread.SubmitCommandBuffer(std::move(entries));
353
354 cdma_pusher->ProcessEntries(std::move(entries));
355 }
356
357 /// Frees the CDMAPusher instance to free up resources
358 void ClearCdmaInstance() {
359 cdma_pusher.reset();
360 }
361
362 /// Swap buffers (render frame)
363 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
364 gpu_thread.SwapBuffers(framebuffer);
365 }
366
367 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
368 void FlushRegion(VAddr addr, u64 size) {
369 gpu_thread.FlushRegion(addr, size);
370 }
371
372 /// Notify rasterizer that any caches of the specified region should be invalidated
373 void InvalidateRegion(VAddr addr, u64 size) {
374 gpu_thread.InvalidateRegion(addr, size);
375 }
376
377 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
378 void FlushAndInvalidateRegion(VAddr addr, u64 size) {
379 gpu_thread.FlushAndInvalidateRegion(addr, size);
380 }
381
382 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const {
383 auto& interrupt_manager = system.InterruptManager();
384 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
385 }
386
387 void ProcessBindMethod(const GPU::MethodCall& method_call) {
388 // Bind the current subchannel to the desired engine id.
389 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
390 method_call.argument);
391 const auto engine_id = static_cast<EngineID>(method_call.argument);
392 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
393 switch (engine_id) {
394 case EngineID::FERMI_TWOD_A:
395 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
396 break;
397 case EngineID::MAXWELL_B:
398 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
399 break;
400 case EngineID::KEPLER_COMPUTE_B:
401 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
402 break;
403 case EngineID::MAXWELL_DMA_COPY_A:
404 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
405 break;
406 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
407 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
408 break;
409 default:
410 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
411 }
412 }
48 413
49 memory_manager->BindRasterizer(rasterizer); 414 void ProcessFenceActionMethod() {
50 maxwell_3d->BindRasterizer(rasterizer); 415 switch (regs.fence_action.op) {
51 fermi_2d->BindRasterizer(rasterizer); 416 case GPU::FenceOperation::Acquire:
52 kepler_compute->BindRasterizer(rasterizer); 417 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
53 maxwell_dma->BindRasterizer(rasterizer); 418 break;
419 case GPU::FenceOperation::Increment:
420 IncrementSyncPoint(regs.fence_action.syncpoint_id);
421 break;
422 default:
423 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
424 }
425 }
426
427 void ProcessWaitForInterruptMethod() {
428 // TODO(bunnei) ImplementMe
429 LOG_WARNING(HW_GPU, "(STUBBED) called");
430 }
431
432 void ProcessSemaphoreTriggerMethod() {
433 const auto semaphoreOperationMask = 0xF;
434 const auto op =
435 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
436 if (op == GpuSemaphoreOperation::WriteLong) {
437 struct Block {
438 u32 sequence;
439 u32 zeros = 0;
440 u64 timestamp;
441 };
442
443 Block block{};
444 block.sequence = regs.semaphore_sequence;
445 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
446 // CoreTiming
447 block.timestamp = GetTicks();
448 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
449 sizeof(block));
450 } else {
451 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
452 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
453 (op == GpuSemaphoreOperation::AcquireGequal &&
454 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
455 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
456 // Nothing to do in this case
457 } else {
458 regs.acquire_source = true;
459 regs.acquire_value = regs.semaphore_sequence;
460 if (op == GpuSemaphoreOperation::AcquireEqual) {
461 regs.acquire_active = true;
462 regs.acquire_mode = false;
463 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
464 regs.acquire_active = true;
465 regs.acquire_mode = true;
466 } else if (op == GpuSemaphoreOperation::AcquireMask) {
467 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
468 // semaphore_sequence, gives a non-0 result
469 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
470 } else {
471 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
472 }
473 }
474 }
475 }
476
477 void ProcessSemaphoreRelease() {
478 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
479 regs.semaphore_release);
480 }
481
482 void ProcessSemaphoreAcquire() {
483 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
484 const auto value = regs.semaphore_acquire;
485 if (word != value) {
486 regs.acquire_active = true;
487 regs.acquire_value = value;
488 // TODO(kemathe73) figure out how to do the acquire_timeout
489 regs.acquire_mode = false;
490 regs.acquire_source = false;
491 }
492 }
493
494 /// Calls a GPU puller method.
495 void CallPullerMethod(const GPU::MethodCall& method_call) {
496 regs.reg_array[method_call.method] = method_call.argument;
497 const auto method = static_cast<BufferMethods>(method_call.method);
498
499 switch (method) {
500 case BufferMethods::BindObject: {
501 ProcessBindMethod(method_call);
502 break;
503 }
504 case BufferMethods::Nop:
505 case BufferMethods::SemaphoreAddressHigh:
506 case BufferMethods::SemaphoreAddressLow:
507 case BufferMethods::SemaphoreSequence:
508 case BufferMethods::UnkCacheFlush:
509 case BufferMethods::WrcacheFlush:
510 case BufferMethods::FenceValue:
511 break;
512 case BufferMethods::RefCnt:
513 rasterizer->SignalReference();
514 break;
515 case BufferMethods::FenceAction:
516 ProcessFenceActionMethod();
517 break;
518 case BufferMethods::WaitForInterrupt:
519 ProcessWaitForInterruptMethod();
520 break;
521 case BufferMethods::SemaphoreTrigger: {
522 ProcessSemaphoreTriggerMethod();
523 break;
524 }
525 case BufferMethods::NotifyIntr: {
526 // TODO(Kmather73): Research and implement this method.
527 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
528 break;
529 }
530 case BufferMethods::Unk28: {
531 // TODO(Kmather73): Research and implement this method.
532 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
533 break;
534 }
535 case BufferMethods::SemaphoreAcquire: {
536 ProcessSemaphoreAcquire();
537 break;
538 }
539 case BufferMethods::SemaphoreRelease: {
540 ProcessSemaphoreRelease();
541 break;
542 }
543 case BufferMethods::Yield: {
544 // TODO(Kmather73): Research and implement this method.
545 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
546 break;
547 }
548 default:
549 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
550 break;
551 }
552 }
553
554 /// Calls a GPU engine method.
555 void CallEngineMethod(const GPU::MethodCall& method_call) {
556 const EngineID engine = bound_engines[method_call.subchannel];
557
558 switch (engine) {
559 case EngineID::FERMI_TWOD_A:
560 fermi_2d->CallMethod(method_call.method, method_call.argument,
561 method_call.IsLastCall());
562 break;
563 case EngineID::MAXWELL_B:
564 maxwell_3d->CallMethod(method_call.method, method_call.argument,
565 method_call.IsLastCall());
566 break;
567 case EngineID::KEPLER_COMPUTE_B:
568 kepler_compute->CallMethod(method_call.method, method_call.argument,
569 method_call.IsLastCall());
570 break;
571 case EngineID::MAXWELL_DMA_COPY_A:
572 maxwell_dma->CallMethod(method_call.method, method_call.argument,
573 method_call.IsLastCall());
574 break;
575 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
576 kepler_memory->CallMethod(method_call.method, method_call.argument,
577 method_call.IsLastCall());
578 break;
579 default:
580 UNIMPLEMENTED_MSG("Unimplemented engine");
581 }
582 }
583
584 /// Calls a GPU engine multivalue method.
585 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
586 u32 methods_pending) {
587 const EngineID engine = bound_engines[subchannel];
588
589 switch (engine) {
590 case EngineID::FERMI_TWOD_A:
591 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
592 break;
593 case EngineID::MAXWELL_B:
594 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
595 break;
596 case EngineID::KEPLER_COMPUTE_B:
597 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
598 break;
599 case EngineID::MAXWELL_DMA_COPY_A:
600 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
601 break;
602 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
603 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
604 break;
605 default:
606 UNIMPLEMENTED_MSG("Unimplemented engine");
607 }
608 }
609
610 /// Determines where the method should be executed.
611 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
612 const auto buffer_method = static_cast<BufferMethods>(method);
613 return buffer_method >= BufferMethods::NonPullerMethods;
614 }
615
616 struct Regs {
617 static constexpr size_t NUM_REGS = 0x40;
618
619 union {
620 struct {
621 INSERT_PADDING_WORDS_NOINIT(0x4);
622 struct {
623 u32 address_high;
624 u32 address_low;
625
626 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
627 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
628 address_low);
629 }
630 } semaphore_address;
631
632 u32 semaphore_sequence;
633 u32 semaphore_trigger;
634 INSERT_PADDING_WORDS_NOINIT(0xC);
635
636 // The pusher and the puller share the reference counter, the pusher only has read
637 // access
638 u32 reference_count;
639 INSERT_PADDING_WORDS_NOINIT(0x5);
640
641 u32 semaphore_acquire;
642 u32 semaphore_release;
643 u32 fence_value;
644 GPU::FenceAction fence_action;
645 INSERT_PADDING_WORDS_NOINIT(0xE2);
646
647 // Puller state
648 u32 acquire_mode;
649 u32 acquire_source;
650 u32 acquire_active;
651 u32 acquire_timeout;
652 u32 acquire_value;
653 };
654 std::array<u32, NUM_REGS> reg_array;
655 };
656 } regs{};
657
658 GPU& gpu;
659 Core::System& system;
660 std::unique_ptr<Tegra::MemoryManager> memory_manager;
661 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
662 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
663 std::unique_ptr<VideoCore::RendererBase> renderer;
664 VideoCore::RasterizerInterface* rasterizer = nullptr;
665 const bool use_nvdec;
666
667 /// Mapping of command subchannels to their bound engine ids
668 std::array<EngineID, 8> bound_engines{};
669 /// 3D engine
670 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
671 /// 2D engine
672 std::unique_ptr<Engines::Fermi2D> fermi_2d;
673 /// Compute engine
674 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
675 /// DMA engine
676 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
677 /// Inline memory engine
678 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
679 /// Shader build notifier
680 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
681 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
682 std::atomic_bool shutting_down{};
683
684 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
685
686 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
687
688 std::mutex sync_mutex;
689 std::mutex device_mutex;
690
691 std::condition_variable sync_cv;
692
693 struct FlushRequest {
694 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
695 : fence{fence_}, addr{addr_}, size{size_} {}
696 u64 fence;
697 VAddr addr;
698 std::size_t size;
699 };
700
701 std::list<FlushRequest> flush_requests;
702 std::atomic<u64> current_flush_fence{};
703 u64 last_flush_fence{};
704 std::mutex flush_request_mutex;
705
706 const bool is_async;
707
708 VideoCommon::GPUThread::ThreadManager gpu_thread;
709 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
710
711#define ASSERT_REG_POSITION(field_name, position) \
712 static_assert(offsetof(Regs, field_name) == position * 4, \
713 "Field " #field_name " has invalid position")
714
715 ASSERT_REG_POSITION(semaphore_address, 0x4);
716 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
717 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
718 ASSERT_REG_POSITION(reference_count, 0x14);
719 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
720 ASSERT_REG_POSITION(semaphore_release, 0x1B);
721 ASSERT_REG_POSITION(fence_value, 0x1C);
722 ASSERT_REG_POSITION(fence_action, 0x1D);
723
724 ASSERT_REG_POSITION(acquire_mode, 0x100);
725 ASSERT_REG_POSITION(acquire_source, 0x101);
726 ASSERT_REG_POSITION(acquire_active, 0x102);
727 ASSERT_REG_POSITION(acquire_timeout, 0x103);
728 ASSERT_REG_POSITION(acquire_value, 0x104);
729
730#undef ASSERT_REG_POSITION
731
732 enum class GpuSemaphoreOperation {
733 AcquireEqual = 0x1,
734 WriteLong = 0x2,
735 AcquireGequal = 0x4,
736 AcquireMask = 0x8,
737 };
738};
739
740GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
741 : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {}
742
743GPU::~GPU() = default;
744
745void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
746 impl->BindRenderer(std::move(renderer));
54} 747}
55 748
56Engines::Maxwell3D& GPU::Maxwell3D() { 749void GPU::CallMethod(const MethodCall& method_call) {
57 return *maxwell_3d; 750 impl->CallMethod(method_call);
58} 751}
59 752
60const Engines::Maxwell3D& GPU::Maxwell3D() const { 753void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
61 return *maxwell_3d; 754 u32 methods_pending) {
755 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
62} 756}
63 757
64Engines::KeplerCompute& GPU::KeplerCompute() { 758void GPU::FlushCommands() {
65 return *kepler_compute; 759 impl->FlushCommands();
66} 760}
67 761
68const Engines::KeplerCompute& GPU::KeplerCompute() const { 762void GPU::SyncGuestHost() {
69 return *kepler_compute; 763 impl->SyncGuestHost();
70} 764}
71 765
72MemoryManager& GPU::MemoryManager() { 766void GPU::OnCommandListEnd() {
73 return *memory_manager; 767 impl->OnCommandListEnd();
74} 768}
75 769
76const MemoryManager& GPU::MemoryManager() const { 770u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
77 return *memory_manager; 771 return impl->RequestFlush(addr, size);
78} 772}
79 773
80DmaPusher& GPU::DmaPusher() { 774u64 GPU::CurrentFlushRequestFence() const {
81 return *dma_pusher; 775 return impl->CurrentFlushRequestFence();
82} 776}
83 777
84Tegra::CDmaPusher& GPU::CDmaPusher() { 778void GPU::TickWork() {
85 return *cdma_pusher; 779 impl->TickWork();
86} 780}
87 781
88const DmaPusher& GPU::DmaPusher() const { 782Engines::Maxwell3D& GPU::Maxwell3D() {
89 return *dma_pusher; 783 return impl->Maxwell3D();
90} 784}
91 785
92const Tegra::CDmaPusher& GPU::CDmaPusher() const { 786const Engines::Maxwell3D& GPU::Maxwell3D() const {
93 return *cdma_pusher; 787 return impl->Maxwell3D();
94} 788}
95 789
96void GPU::WaitFence(u32 syncpoint_id, u32 value) { 790Engines::KeplerCompute& GPU::KeplerCompute() {
97 // Synced GPU, is always in sync 791 return impl->KeplerCompute();
98 if (!is_async) {
99 return;
100 }
101 if (syncpoint_id == UINT32_MAX) {
102 // TODO: Research what this does.
103 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
104 return;
105 }
106 MICROPROFILE_SCOPE(GPU_wait);
107 std::unique_lock lock{sync_mutex};
108 sync_cv.wait(lock, [=, this] {
109 if (shutting_down.load(std::memory_order_relaxed)) {
110 // We're shutting down, ensure no threads continue to wait for the next syncpoint
111 return true;
112 }
113 return syncpoints.at(syncpoint_id).load() >= value;
114 });
115}
116
117void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
118 auto& syncpoint = syncpoints.at(syncpoint_id);
119 syncpoint++;
120 std::lock_guard lock{sync_mutex};
121 sync_cv.notify_all();
122 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
123 if (!interrupt.empty()) {
124 u32 value = syncpoint.load();
125 auto it = interrupt.begin();
126 while (it != interrupt.end()) {
127 if (value >= *it) {
128 TriggerCpuInterrupt(syncpoint_id, *it);
129 it = interrupt.erase(it);
130 continue;
131 }
132 it++;
133 }
134 }
135} 792}
136 793
137u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { 794const Engines::KeplerCompute& GPU::KeplerCompute() const {
138 return syncpoints.at(syncpoint_id).load(); 795 return impl->KeplerCompute();
139} 796}
140 797
141void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 798Tegra::MemoryManager& GPU::MemoryManager() {
142 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 799 return impl->MemoryManager();
143 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
144 [value](u32 in_value) { return in_value == value; });
145 if (contains) {
146 return;
147 }
148 interrupt.emplace_back(value);
149} 800}
150 801
151bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 802const Tegra::MemoryManager& GPU::MemoryManager() const {
152 std::lock_guard lock{sync_mutex}; 803 return impl->MemoryManager();
153 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 804}
154 const auto iter =
155 std::find_if(interrupt.begin(), interrupt.end(),
156 [value](u32 interrupt_value) { return value == interrupt_value; });
157 805
158 if (iter == interrupt.end()) { 806Tegra::DmaPusher& GPU::DmaPusher() {
159 return false; 807 return impl->DmaPusher();
160 }
161 interrupt.erase(iter);
162 return true;
163} 808}
164 809
165u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 810const Tegra::DmaPusher& GPU::DmaPusher() const {
166 std::unique_lock lck{flush_request_mutex}; 811 return impl->DmaPusher();
167 const u64 fence = ++last_flush_fence;
168 flush_requests.emplace_back(fence, addr, size);
169 return fence;
170} 812}
171 813
172void GPU::TickWork() { 814Tegra::CDmaPusher& GPU::CDmaPusher() {
173 std::unique_lock lck{flush_request_mutex}; 815 return impl->CDmaPusher();
174 while (!flush_requests.empty()) {
175 auto& request = flush_requests.front();
176 const u64 fence = request.fence;
177 const VAddr addr = request.addr;
178 const std::size_t size = request.size;
179 flush_requests.pop_front();
180 flush_request_mutex.unlock();
181 rasterizer->FlushRegion(addr, size);
182 current_flush_fence.store(fence);
183 flush_request_mutex.lock();
184 }
185} 816}
186 817
187u64 GPU::GetTicks() const { 818const Tegra::CDmaPusher& GPU::CDmaPusher() const {
188 // This values were reversed engineered by fincs from NVN 819 return impl->CDmaPusher();
189 // The gpu clock is reported in units of 385/625 nanoseconds 820}
190 constexpr u64 gpu_ticks_num = 384;
191 constexpr u64 gpu_ticks_den = 625;
192 821
193 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); 822VideoCore::RendererBase& GPU::Renderer() {
194 if (Settings::values.use_fast_gpu_time.GetValue()) { 823 return impl->Renderer();
195 nanoseconds /= 256;
196 }
197 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
198 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
199 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
200} 824}
201 825
202void GPU::RendererFrameEndNotify() { 826const VideoCore::RendererBase& GPU::Renderer() const {
203 system.GetPerfStats().EndGameFrame(); 827 return impl->Renderer();
204} 828}
205 829
206void GPU::FlushCommands() { 830VideoCore::ShaderNotify& GPU::ShaderNotify() {
207 rasterizer->FlushCommands(); 831 return impl->ShaderNotify();
208} 832}
209 833
210void GPU::SyncGuestHost() { 834const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
211 rasterizer->SyncGuestHost(); 835 return impl->ShaderNotify();
212} 836}
213 837
214enum class GpuSemaphoreOperation { 838void GPU::WaitFence(u32 syncpoint_id, u32 value) {
215 AcquireEqual = 0x1, 839 impl->WaitFence(syncpoint_id, value);
216 WriteLong = 0x2, 840}
217 AcquireGequal = 0x4,
218 AcquireMask = 0x8,
219};
220 841
221void GPU::CallMethod(const MethodCall& method_call) { 842void GPU::IncrementSyncPoint(u32 syncpoint_id) {
222 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 843 impl->IncrementSyncPoint(syncpoint_id);
223 method_call.subchannel); 844}
224 845
225 ASSERT(method_call.subchannel < bound_engines.size()); 846u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
847 return impl->GetSyncpointValue(syncpoint_id);
848}
226 849
227 if (ExecuteMethodOnEngine(method_call.method)) { 850void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
228 CallEngineMethod(method_call); 851 impl->RegisterSyncptInterrupt(syncpoint_id, value);
229 } else {
230 CallPullerMethod(method_call);
231 }
232} 852}
233 853
234void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 854bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
235 u32 methods_pending) { 855 return impl->CancelSyncptInterrupt(syncpoint_id, value);
236 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
237
238 ASSERT(subchannel < bound_engines.size());
239
240 if (ExecuteMethodOnEngine(method)) {
241 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
242 } else {
243 for (std::size_t i = 0; i < amount; i++) {
244 CallPullerMethod(MethodCall{
245 method,
246 base_start[i],
247 subchannel,
248 methods_pending - static_cast<u32>(i),
249 });
250 }
251 }
252} 856}
253 857
254bool GPU::ExecuteMethodOnEngine(u32 method) { 858u64 GPU::GetTicks() const {
255 const auto buffer_method = static_cast<BufferMethods>(method); 859 return impl->GetTicks();
256 return buffer_method >= BufferMethods::NonPullerMethods; 860}
257}
258
259void GPU::CallPullerMethod(const MethodCall& method_call) {
260 regs.reg_array[method_call.method] = method_call.argument;
261 const auto method = static_cast<BufferMethods>(method_call.method);
262
263 switch (method) {
264 case BufferMethods::BindObject: {
265 ProcessBindMethod(method_call);
266 break;
267 }
268 case BufferMethods::Nop:
269 case BufferMethods::SemaphoreAddressHigh:
270 case BufferMethods::SemaphoreAddressLow:
271 case BufferMethods::SemaphoreSequence:
272 case BufferMethods::UnkCacheFlush:
273 case BufferMethods::WrcacheFlush:
274 case BufferMethods::FenceValue:
275 break;
276 case BufferMethods::RefCnt:
277 rasterizer->SignalReference();
278 break;
279 case BufferMethods::FenceAction:
280 ProcessFenceActionMethod();
281 break;
282 case BufferMethods::WaitForInterrupt:
283 ProcessWaitForInterruptMethod();
284 break;
285 case BufferMethods::SemaphoreTrigger: {
286 ProcessSemaphoreTriggerMethod();
287 break;
288 }
289 case BufferMethods::NotifyIntr: {
290 // TODO(Kmather73): Research and implement this method.
291 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
292 break;
293 }
294 case BufferMethods::Unk28: {
295 // TODO(Kmather73): Research and implement this method.
296 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
297 break;
298 }
299 case BufferMethods::SemaphoreAcquire: {
300 ProcessSemaphoreAcquire();
301 break;
302 }
303 case BufferMethods::SemaphoreRelease: {
304 ProcessSemaphoreRelease();
305 break;
306 }
307 case BufferMethods::Yield: {
308 // TODO(Kmather73): Research and implement this method.
309 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
310 break;
311 }
312 default:
313 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
314 break;
315 }
316}
317
318void GPU::CallEngineMethod(const MethodCall& method_call) {
319 const EngineID engine = bound_engines[method_call.subchannel];
320
321 switch (engine) {
322 case EngineID::FERMI_TWOD_A:
323 fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
324 break;
325 case EngineID::MAXWELL_B:
326 maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
327 break;
328 case EngineID::KEPLER_COMPUTE_B:
329 kepler_compute->CallMethod(method_call.method, method_call.argument,
330 method_call.IsLastCall());
331 break;
332 case EngineID::MAXWELL_DMA_COPY_A:
333 maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
334 break;
335 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
336 kepler_memory->CallMethod(method_call.method, method_call.argument,
337 method_call.IsLastCall());
338 break;
339 default:
340 UNIMPLEMENTED_MSG("Unimplemented engine");
341 }
342}
343
344void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
345 u32 methods_pending) {
346 const EngineID engine = bound_engines[subchannel];
347
348 switch (engine) {
349 case EngineID::FERMI_TWOD_A:
350 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
351 break;
352 case EngineID::MAXWELL_B:
353 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
354 break;
355 case EngineID::KEPLER_COMPUTE_B:
356 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
357 break;
358 case EngineID::MAXWELL_DMA_COPY_A:
359 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
360 break;
361 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
362 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
363 break;
364 default:
365 UNIMPLEMENTED_MSG("Unimplemented engine");
366 }
367}
368
369void GPU::ProcessBindMethod(const MethodCall& method_call) {
370 // Bind the current subchannel to the desired engine id.
371 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
372 method_call.argument);
373 const auto engine_id = static_cast<EngineID>(method_call.argument);
374 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
375 switch (engine_id) {
376 case EngineID::FERMI_TWOD_A:
377 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
378 break;
379 case EngineID::MAXWELL_B:
380 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
381 break;
382 case EngineID::KEPLER_COMPUTE_B:
383 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
384 break;
385 case EngineID::MAXWELL_DMA_COPY_A:
386 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
387 break;
388 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
389 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
390 break;
391 default:
392 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
393 }
394}
395
396void GPU::ProcessFenceActionMethod() {
397 switch (regs.fence_action.op) {
398 case FenceOperation::Acquire:
399 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
400 break;
401 case FenceOperation::Increment:
402 IncrementSyncPoint(regs.fence_action.syncpoint_id);
403 break;
404 default:
405 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
406 }
407}
408
409void GPU::ProcessWaitForInterruptMethod() {
410 // TODO(bunnei) ImplementMe
411 LOG_WARNING(HW_GPU, "(STUBBED) called");
412}
413
414void GPU::ProcessSemaphoreTriggerMethod() {
415 const auto semaphoreOperationMask = 0xF;
416 const auto op =
417 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
418 if (op == GpuSemaphoreOperation::WriteLong) {
419 struct Block {
420 u32 sequence;
421 u32 zeros = 0;
422 u64 timestamp;
423 };
424 861
425 Block block{}; 862bool GPU::IsAsync() const {
426 block.sequence = regs.semaphore_sequence; 863 return impl->IsAsync();
427 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
428 // CoreTiming
429 block.timestamp = GetTicks();
430 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
431 sizeof(block));
432 } else {
433 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
434 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
435 (op == GpuSemaphoreOperation::AcquireGequal &&
436 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
437 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
438 // Nothing to do in this case
439 } else {
440 regs.acquire_source = true;
441 regs.acquire_value = regs.semaphore_sequence;
442 if (op == GpuSemaphoreOperation::AcquireEqual) {
443 regs.acquire_active = true;
444 regs.acquire_mode = false;
445 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
446 regs.acquire_active = true;
447 regs.acquire_mode = true;
448 } else if (op == GpuSemaphoreOperation::AcquireMask) {
449 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
450 // semaphore_sequence, gives a non-0 result
451 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
452 } else {
453 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
454 }
455 }
456 }
457} 864}
458 865
459void GPU::ProcessSemaphoreRelease() { 866bool GPU::UseNvdec() const {
460 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); 867 return impl->UseNvdec();
461} 868}
462 869
463void GPU::ProcessSemaphoreAcquire() { 870void GPU::RendererFrameEndNotify() {
464 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); 871 impl->RendererFrameEndNotify();
465 const auto value = regs.semaphore_acquire;
466 if (word != value) {
467 regs.acquire_active = true;
468 regs.acquire_value = value;
469 // TODO(kemathe73) figure out how to do the acquire_timeout
470 regs.acquire_mode = false;
471 regs.acquire_source = false;
472 }
473} 872}
474 873
475void GPU::Start() { 874void GPU::Start() {
476 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 875 impl->Start();
477 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
478 cpu_context->MakeCurrent();
479} 876}
480 877
481void GPU::ObtainContext() { 878void GPU::ObtainContext() {
482 cpu_context->MakeCurrent(); 879 impl->ObtainContext();
483} 880}
484 881
485void GPU::ReleaseContext() { 882void GPU::ReleaseContext() {
486 cpu_context->DoneCurrent(); 883 impl->ReleaseContext();
487} 884}
488 885
489void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 886void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
490 gpu_thread.SubmitList(std::move(entries)); 887 impl->PushGPUEntries(std::move(entries));
491} 888}
492 889
493void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 890void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
494 if (!use_nvdec) { 891 impl->PushCommandBuffer(entries);
495 return;
496 }
497
498 if (!cdma_pusher) {
499 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
500 }
501
502 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
503 // TODO(ameerj): RE proper async nvdec operation
504 // gpu_thread.SubmitCommandBuffer(std::move(entries));
505
506 cdma_pusher->ProcessEntries(std::move(entries));
507} 892}
508 893
509void GPU::ClearCdmaInstance() { 894void GPU::ClearCdmaInstance() {
510 cdma_pusher.reset(); 895 impl->ClearCdmaInstance();
511} 896}
512 897
513void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 898void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
514 gpu_thread.SwapBuffers(framebuffer); 899 impl->SwapBuffers(framebuffer);
515} 900}
516 901
517void GPU::FlushRegion(VAddr addr, u64 size) { 902void GPU::FlushRegion(VAddr addr, u64 size) {
518 gpu_thread.FlushRegion(addr, size); 903 impl->FlushRegion(addr, size);
519} 904}
520 905
521void GPU::InvalidateRegion(VAddr addr, u64 size) { 906void GPU::InvalidateRegion(VAddr addr, u64 size) {
522 gpu_thread.InvalidateRegion(addr, size); 907 impl->InvalidateRegion(addr, size);
523} 908}
524 909
525void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 910void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
526 gpu_thread.FlushAndInvalidateRegion(addr, size); 911 impl->FlushAndInvalidateRegion(addr, size);
527}
528
529void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
530 auto& interrupt_manager = system.InterruptManager();
531 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
532}
533
534void GPU::OnCommandListEnd() {
535 if (is_async) {
536 // This command only applies to asynchronous GPU mode
537 gpu_thread.OnCommandListEnd();
538 }
539} 912}
540 913
541} // namespace Tegra 914} // namespace Tegra