summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
authorGravatar ameerj2021-10-01 00:57:02 -0400
committerGravatar ameerj2021-10-03 00:35:57 -0400
commit427bf76e621cf0833bc1bbec7d8be891297223e7 (patch)
tree5cc146d21972e1a7c424219482ef3393787afe6a /src/video_core/gpu.cpp
parentMerge pull request #7061 from ameerj/dma-buffer-misc (diff)
downloadyuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.gz
yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.xz
yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.zip
gpu: Migrate implementation to the cpp file
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp1220
1 files changed, 800 insertions, 420 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2ae3639b5..520675873 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,540 +2,920 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <atomic>
5#include <chrono> 7#include <chrono>
8#include <condition_variable>
9#include <list>
10#include <memory>
6 11
7#include "common/assert.h" 12#include "common/assert.h"
8#include "common/microprofile.h" 13#include "common/microprofile.h"
9#include "common/settings.h" 14#include "common/settings.h"
10#include "core/core.h" 15#include "core/core.h"
11#include "core/core_timing.h" 16#include "core/core_timing.h"
12#include "core/core_timing_util.h"
13#include "core/frontend/emu_window.h" 17#include "core/frontend/emu_window.h"
14#include "core/hardware_interrupt_manager.h" 18#include "core/hardware_interrupt_manager.h"
15#include "core/memory.h" 19#include "core/hle/service/nvdrv/nvdata.h"
20#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "core/perf_stats.h" 21#include "core/perf_stats.h"
22#include "video_core/cdma_pusher.h"
23#include "video_core/dma_pusher.h"
17#include "video_core/engines/fermi_2d.h" 24#include "video_core/engines/fermi_2d.h"
18#include "video_core/engines/kepler_compute.h" 25#include "video_core/engines/kepler_compute.h"
19#include "video_core/engines/kepler_memory.h" 26#include "video_core/engines/kepler_memory.h"
20#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
22#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h"
23#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
24#include "video_core/renderer_base.h" 32#include "video_core/renderer_base.h"
25#include "video_core/shader_notify.h" 33#include "video_core/shader_notify.h"
26#include "video_core/video_core.h"
27 34
28namespace Tegra { 35namespace Tegra {
29 36
30MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 37MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
31 38
32GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) 39struct GPU::Impl {
33 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
34 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, 41 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 42 system)},
36 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 43 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 44 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 45 fermi_2d{std::make_unique<Engines::Fermi2D>()},
39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 46 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 47 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
41 gpu_thread{system_, is_async_} {} 48 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
49 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
50 gpu_thread{system_, is_async_} {}
51
52 ~Impl() = default;
53
54 /// Binds a renderer to the GPU.
55 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
56 renderer = std::move(renderer_);
57 rasterizer = renderer->ReadRasterizer();
58
59 memory_manager->BindRasterizer(rasterizer);
60 maxwell_3d->BindRasterizer(rasterizer);
61 fermi_2d->BindRasterizer(rasterizer);
62 kepler_compute->BindRasterizer(rasterizer);
63 maxwell_dma->BindRasterizer(rasterizer);
64 }
65
66 /// Calls a GPU method.
67 void CallMethod(const GPU::MethodCall& method_call) {
68 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
69 method_call.subchannel);
70
71 ASSERT(method_call.subchannel < bound_engines.size());
72
73 if (ExecuteMethodOnEngine(method_call.method)) {
74 CallEngineMethod(method_call);
75 } else {
76 CallPullerMethod(method_call);
77 }
78 }
79
80 /// Calls a GPU multivalue method.
81 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
82 u32 methods_pending) {
83 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
84
85 ASSERT(subchannel < bound_engines.size());
86
87 if (ExecuteMethodOnEngine(method)) {
88 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
89 } else {
90 for (std::size_t i = 0; i < amount; i++) {
91 CallPullerMethod(GPU::MethodCall{
92 method,
93 base_start[i],
94 subchannel,
95 methods_pending - static_cast<u32>(i),
96 });
97 }
98 }
99 }
100
101 /// Flush all current written commands into the host GPU for execution.
102 void FlushCommands() {
103 rasterizer->FlushCommands();
104 }
105
106 /// Synchronizes CPU writes with Host GPU memory.
107 void SyncGuestHost() {
108 rasterizer->SyncGuestHost();
109 }
110
111 /// Signal the ending of command list.
112 void OnCommandListEnd() {
113 if (is_async) {
114 // This command only applies to asynchronous GPU mode
115 gpu_thread.OnCommandListEnd();
116 }
117 }
118
119 /// Request a host GPU memory flush from the CPU.
120 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
121 std::unique_lock lck{flush_request_mutex};
122 const u64 fence = ++last_flush_fence;
123 flush_requests.emplace_back(fence, addr, size);
124 return fence;
125 }
126
127 /// Obtains current flush request fence id.
128 [[nodiscard]] u64 CurrentFlushRequestFence() const {
129 return current_flush_fence.load(std::memory_order_relaxed);
130 }
131
132 /// Tick pending requests within the GPU.
133 void TickWork() {
134 std::unique_lock lck{flush_request_mutex};
135 while (!flush_requests.empty()) {
136 auto& request = flush_requests.front();
137 const u64 fence = request.fence;
138 const VAddr addr = request.addr;
139 const std::size_t size = request.size;
140 flush_requests.pop_front();
141 flush_request_mutex.unlock();
142 rasterizer->FlushRegion(addr, size);
143 current_flush_fence.store(fence);
144 flush_request_mutex.lock();
145 }
146 }
147
148 /// Returns a reference to the Maxwell3D GPU engine.
149 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
150 return *maxwell_3d;
151 }
152
153 /// Returns a const reference to the Maxwell3D GPU engine.
154 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
155 return *maxwell_3d;
156 }
157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
160 return *kepler_compute;
161 }
162
163 /// Returns a reference to the KeplerCompute GPU engine.
164 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
165 return *kepler_compute;
166 }
167
168 /// Returns a reference to the GPU memory manager.
169 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
170 return *memory_manager;
171 }
172
173 /// Returns a const reference to the GPU memory manager.
174 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
175 return *memory_manager;
176 }
177
178 /// Returns a reference to the GPU DMA pusher.
179 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
180 return *dma_pusher;
181 }
182
183 /// Returns a const reference to the GPU DMA pusher.
184 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
185 return *dma_pusher;
186 }
187
188 /// Returns a reference to the GPU CDMA pusher.
189 [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
190 return *cdma_pusher;
191 }
192
193 /// Returns a const reference to the GPU CDMA pusher.
194 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
195 return *cdma_pusher;
196 }
197
198 /// Returns a reference to the underlying renderer.
199 [[nodiscard]] VideoCore::RendererBase& Renderer() {
200 return *renderer;
201 }
202
203 /// Returns a const reference to the underlying renderer.
204 [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
205 return *renderer;
206 }
207
208 /// Returns a reference to the shader notifier.
209 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
210 return *shader_notify;
211 }
212
213 /// Returns a const reference to the shader notifier.
214 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
215 return *shader_notify;
216 }
217
218 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
219 void WaitFence(u32 syncpoint_id, u32 value) {
220 // Synced GPU, is always in sync
221 if (!is_async) {
222 return;
223 }
224 if (syncpoint_id == UINT32_MAX) {
225 // TODO: Research what this does.
226 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
227 return;
228 }
229 MICROPROFILE_SCOPE(GPU_wait);
230 std::unique_lock lock{sync_mutex};
231 sync_cv.wait(lock, [=, this] {
232 if (shutting_down.load(std::memory_order_relaxed)) {
233 // We're shutting down, ensure no threads continue to wait for the next syncpoint
234 return true;
235 }
236 return syncpoints.at(syncpoint_id).load() >= value;
237 });
238 }
239
240 void IncrementSyncPoint(u32 syncpoint_id) {
241 auto& syncpoint = syncpoints.at(syncpoint_id);
242 syncpoint++;
243 std::lock_guard lock{sync_mutex};
244 sync_cv.notify_all();
245 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
246 if (!interrupt.empty()) {
247 u32 value = syncpoint.load();
248 auto it = interrupt.begin();
249 while (it != interrupt.end()) {
250 if (value >= *it) {
251 TriggerCpuInterrupt(syncpoint_id, *it);
252 it = interrupt.erase(it);
253 continue;
254 }
255 it++;
256 }
257 }
258 }
259
260 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
261 return syncpoints.at(syncpoint_id).load();
262 }
263
264 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
265 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
266 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
267 [value](u32 in_value) { return in_value == value; });
268 if (contains) {
269 return;
270 }
271 interrupt.emplace_back(value);
272 }
273
274 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
275 std::lock_guard lock{sync_mutex};
276 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
277 const auto iter =
278 std::find_if(interrupt.begin(), interrupt.end(),
279 [value](u32 interrupt_value) { return value == interrupt_value; });
280
281 if (iter == interrupt.end()) {
282 return false;
283 }
284 interrupt.erase(iter);
285 return true;
286 }
287
288 [[nodiscard]] u64 GetTicks() const {
289 // This values were reversed engineered by fincs from NVN
290 // The gpu clock is reported in units of 385/625 nanoseconds
291 constexpr u64 gpu_ticks_num = 384;
292 constexpr u64 gpu_ticks_den = 625;
293
294 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
295 if (Settings::values.use_fast_gpu_time.GetValue()) {
296 nanoseconds /= 256;
297 }
298 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
299 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
300 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
301 }
302
303 [[nodiscard]] std::unique_lock<std::mutex> LockSync() {
304 return std::unique_lock{sync_mutex};
305 }
306
307 [[nodiscard]] bool IsAsync() const {
308 return is_async;
309 }
310
311 [[nodiscard]] bool UseNvdec() const {
312 return use_nvdec;
313 }
314
315 void RendererFrameEndNotify() {
316 system.GetPerfStats().EndGameFrame();
317 }
318
319 /// Performs any additional setup necessary in order to begin GPU emulation.
320 /// This can be used to launch any necessary threads and register any necessary
321 /// core timing events.
322 void Start() {
323 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
324 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
325 cpu_context->MakeCurrent();
326 }
327
328 /// Obtain the CPU Context
329 void ObtainContext() {
330 cpu_context->MakeCurrent();
331 }
332
333 /// Release the CPU Context
334 void ReleaseContext() {
335 cpu_context->DoneCurrent();
336 }
337
338 /// Push GPU command entries to be processed
339 void PushGPUEntries(Tegra::CommandList&& entries) {
340 gpu_thread.SubmitList(std::move(entries));
341 }
342
343 /// Push GPU command buffer entries to be processed
344 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
345 if (!use_nvdec) {
346 return;
347 }
348
349 if (!cdma_pusher) {
350 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
351 }
352
353 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
354 // TODO(ameerj): RE proper async nvdec operation
355 // gpu_thread.SubmitCommandBuffer(std::move(entries));
356
357 cdma_pusher->ProcessEntries(std::move(entries));
358 }
359
360 /// Frees the CDMAPusher instance to free up resources
361 void ClearCdmaInstance() {
362 cdma_pusher.reset();
363 }
364
365 /// Swap buffers (render frame)
366 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
367 gpu_thread.SwapBuffers(framebuffer);
368 }
369
370 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
371 void FlushRegion(VAddr addr, u64 size) {
372 gpu_thread.FlushRegion(addr, size);
373 }
374
375 /// Notify rasterizer that any caches of the specified region should be invalidated
376 void InvalidateRegion(VAddr addr, u64 size) {
377 gpu_thread.InvalidateRegion(addr, size);
378 }
379
380 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
381 void FlushAndInvalidateRegion(VAddr addr, u64 size) {
382 gpu_thread.FlushAndInvalidateRegion(addr, size);
383 }
384
385 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const {
386 auto& interrupt_manager = system.InterruptManager();
387 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
388 }
389
390 void ProcessBindMethod(const GPU::MethodCall& method_call) {
391 // Bind the current subchannel to the desired engine id.
392 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
393 method_call.argument);
394 const auto engine_id = static_cast<EngineID>(method_call.argument);
395 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
396 switch (engine_id) {
397 case EngineID::FERMI_TWOD_A:
398 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
399 break;
400 case EngineID::MAXWELL_B:
401 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
402 break;
403 case EngineID::KEPLER_COMPUTE_B:
404 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
405 break;
406 case EngineID::MAXWELL_DMA_COPY_A:
407 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
408 break;
409 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
410 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
411 break;
412 default:
413 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
414 }
415 }
416
417 void ProcessFenceActionMethod() {
418 switch (regs.fence_action.op) {
419 case GPU::FenceOperation::Acquire:
420 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
421 break;
422 case GPU::FenceOperation::Increment:
423 IncrementSyncPoint(regs.fence_action.syncpoint_id);
424 break;
425 default:
426 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
427 }
428 }
429
430 void ProcessWaitForInterruptMethod() {
431 // TODO(bunnei) ImplementMe
432 LOG_WARNING(HW_GPU, "(STUBBED) called");
433 }
434
435 void ProcessSemaphoreTriggerMethod() {
436 const auto semaphoreOperationMask = 0xF;
437 const auto op =
438 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
439 if (op == GpuSemaphoreOperation::WriteLong) {
440 struct Block {
441 u32 sequence;
442 u32 zeros = 0;
443 u64 timestamp;
444 };
445
446 Block block{};
447 block.sequence = regs.semaphore_sequence;
448 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
449 // CoreTiming
450 block.timestamp = GetTicks();
451 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
452 sizeof(block));
453 } else {
454 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
455 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
456 (op == GpuSemaphoreOperation::AcquireGequal &&
457 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
458 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
459 // Nothing to do in this case
460 } else {
461 regs.acquire_source = true;
462 regs.acquire_value = regs.semaphore_sequence;
463 if (op == GpuSemaphoreOperation::AcquireEqual) {
464 regs.acquire_active = true;
465 regs.acquire_mode = false;
466 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
467 regs.acquire_active = true;
468 regs.acquire_mode = true;
469 } else if (op == GpuSemaphoreOperation::AcquireMask) {
470 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
471 // semaphore_sequence, gives a non-0 result
472 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
473 } else {
474 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
475 }
476 }
477 }
478 }
479
480 void ProcessSemaphoreRelease() {
481 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
482 regs.semaphore_release);
483 }
484
485 void ProcessSemaphoreAcquire() {
486 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
487 const auto value = regs.semaphore_acquire;
488 if (word != value) {
489 regs.acquire_active = true;
490 regs.acquire_value = value;
491 // TODO(kemathe73) figure out how to do the acquire_timeout
492 regs.acquire_mode = false;
493 regs.acquire_source = false;
494 }
495 }
496
497 /// Calls a GPU puller method.
498 void CallPullerMethod(const GPU::MethodCall& method_call) {
499 regs.reg_array[method_call.method] = method_call.argument;
500 const auto method = static_cast<BufferMethods>(method_call.method);
501
502 switch (method) {
503 case BufferMethods::BindObject: {
504 ProcessBindMethod(method_call);
505 break;
506 }
507 case BufferMethods::Nop:
508 case BufferMethods::SemaphoreAddressHigh:
509 case BufferMethods::SemaphoreAddressLow:
510 case BufferMethods::SemaphoreSequence:
511 case BufferMethods::UnkCacheFlush:
512 case BufferMethods::WrcacheFlush:
513 case BufferMethods::FenceValue:
514 break;
515 case BufferMethods::RefCnt:
516 rasterizer->SignalReference();
517 break;
518 case BufferMethods::FenceAction:
519 ProcessFenceActionMethod();
520 break;
521 case BufferMethods::WaitForInterrupt:
522 ProcessWaitForInterruptMethod();
523 break;
524 case BufferMethods::SemaphoreTrigger: {
525 ProcessSemaphoreTriggerMethod();
526 break;
527 }
528 case BufferMethods::NotifyIntr: {
529 // TODO(Kmather73): Research and implement this method.
530 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
531 break;
532 }
533 case BufferMethods::Unk28: {
534 // TODO(Kmather73): Research and implement this method.
535 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
536 break;
537 }
538 case BufferMethods::SemaphoreAcquire: {
539 ProcessSemaphoreAcquire();
540 break;
541 }
542 case BufferMethods::SemaphoreRelease: {
543 ProcessSemaphoreRelease();
544 break;
545 }
546 case BufferMethods::Yield: {
547 // TODO(Kmather73): Research and implement this method.
548 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
549 break;
550 }
551 default:
552 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
553 break;
554 }
555 }
556
557 /// Calls a GPU engine method.
558 void CallEngineMethod(const GPU::MethodCall& method_call) {
559 const EngineID engine = bound_engines[method_call.subchannel];
560
561 switch (engine) {
562 case EngineID::FERMI_TWOD_A:
563 fermi_2d->CallMethod(method_call.method, method_call.argument,
564 method_call.IsLastCall());
565 break;
566 case EngineID::MAXWELL_B:
567 maxwell_3d->CallMethod(method_call.method, method_call.argument,
568 method_call.IsLastCall());
569 break;
570 case EngineID::KEPLER_COMPUTE_B:
571 kepler_compute->CallMethod(method_call.method, method_call.argument,
572 method_call.IsLastCall());
573 break;
574 case EngineID::MAXWELL_DMA_COPY_A:
575 maxwell_dma->CallMethod(method_call.method, method_call.argument,
576 method_call.IsLastCall());
577 break;
578 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
579 kepler_memory->CallMethod(method_call.method, method_call.argument,
580 method_call.IsLastCall());
581 break;
582 default:
583 UNIMPLEMENTED_MSG("Unimplemented engine");
584 }
585 }
586
587 /// Calls a GPU engine multivalue method.
588 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
589 u32 methods_pending) {
590 const EngineID engine = bound_engines[subchannel];
591
592 switch (engine) {
593 case EngineID::FERMI_TWOD_A:
594 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
595 break;
596 case EngineID::MAXWELL_B:
597 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
598 break;
599 case EngineID::KEPLER_COMPUTE_B:
600 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
601 break;
602 case EngineID::MAXWELL_DMA_COPY_A:
603 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
604 break;
605 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
606 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
607 break;
608 default:
609 UNIMPLEMENTED_MSG("Unimplemented engine");
610 }
611 }
612
613 /// Determines where the method should be executed.
614 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
615 const auto buffer_method = static_cast<BufferMethods>(method);
616 return buffer_method >= BufferMethods::NonPullerMethods;
617 }
618
619 struct Regs {
620 static constexpr size_t NUM_REGS = 0x40;
621
622 union {
623 struct {
624 INSERT_PADDING_WORDS_NOINIT(0x4);
625 struct {
626 u32 address_high;
627 u32 address_low;
628
629 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
630 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
631 address_low);
632 }
633 } semaphore_address;
634
635 u32 semaphore_sequence;
636 u32 semaphore_trigger;
637 INSERT_PADDING_WORDS_NOINIT(0xC);
638
639 // The pusher and the puller share the reference counter, the pusher only has read
640 // access
641 u32 reference_count;
642 INSERT_PADDING_WORDS_NOINIT(0x5);
643
644 u32 semaphore_acquire;
645 u32 semaphore_release;
646 u32 fence_value;
647 GPU::FenceAction fence_action;
648 INSERT_PADDING_WORDS_NOINIT(0xE2);
649
650 // Puller state
651 u32 acquire_mode;
652 u32 acquire_source;
653 u32 acquire_active;
654 u32 acquire_timeout;
655 u32 acquire_value;
656 };
657 std::array<u32, NUM_REGS> reg_array;
658 };
659 } regs{};
660
661 GPU& gpu;
662 Core::System& system;
663 std::unique_ptr<Tegra::MemoryManager> memory_manager;
664 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
665 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
666 std::unique_ptr<VideoCore::RendererBase> renderer;
667 VideoCore::RasterizerInterface* rasterizer = nullptr;
668 const bool use_nvdec;
669
670 /// Mapping of command subchannels to their bound engine ids
671 std::array<EngineID, 8> bound_engines{};
672 /// 3D engine
673 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
674 /// 2D engine
675 std::unique_ptr<Engines::Fermi2D> fermi_2d;
676 /// Compute engine
677 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
678 /// DMA engine
679 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
680 /// Inline memory engine
681 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
682 /// Shader build notifier
683 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
684 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
685 std::atomic_bool shutting_down{};
686
687 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
688
689 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
690
691 std::mutex sync_mutex;
692 std::mutex device_mutex;
693
694 std::condition_variable sync_cv;
695
696 struct FlushRequest {
697 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
698 : fence{fence_}, addr{addr_}, size{size_} {}
699 u64 fence;
700 VAddr addr;
701 std::size_t size;
702 };
703
704 std::list<FlushRequest> flush_requests;
705 std::atomic<u64> current_flush_fence{};
706 u64 last_flush_fence{};
707 std::mutex flush_request_mutex;
708
709 const bool is_async;
710
711 VideoCommon::GPUThread::ThreadManager gpu_thread;
712 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
713
714#define ASSERT_REG_POSITION(field_name, position) \
715 static_assert(offsetof(Regs, field_name) == position * 4, \
716 "Field " #field_name " has invalid position")
717
718 ASSERT_REG_POSITION(semaphore_address, 0x4);
719 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
720 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
721 ASSERT_REG_POSITION(reference_count, 0x14);
722 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
723 ASSERT_REG_POSITION(semaphore_release, 0x1B);
724 ASSERT_REG_POSITION(fence_value, 0x1C);
725 ASSERT_REG_POSITION(fence_action, 0x1D);
726
727 ASSERT_REG_POSITION(acquire_mode, 0x100);
728 ASSERT_REG_POSITION(acquire_source, 0x101);
729 ASSERT_REG_POSITION(acquire_active, 0x102);
730 ASSERT_REG_POSITION(acquire_timeout, 0x103);
731 ASSERT_REG_POSITION(acquire_value, 0x104);
732
733#undef ASSERT_REG_POSITION
734
735 enum class GpuSemaphoreOperation {
736 AcquireEqual = 0x1,
737 WriteLong = 0x2,
738 AcquireGequal = 0x4,
739 AcquireMask = 0x8,
740 };
741};
742
743GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
744 : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {}
42 745
43GPU::~GPU() = default; 746GPU::~GPU() = default;
44 747
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 748void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
46 renderer = std::move(renderer_); 749 impl->BindRenderer(std::move(renderer));
47 rasterizer = renderer->ReadRasterizer(); 750}
48 751
49 memory_manager->BindRasterizer(rasterizer); 752void GPU::CallMethod(const MethodCall& method_call) {
50 maxwell_3d->BindRasterizer(rasterizer); 753 impl->CallMethod(method_call);
51 fermi_2d->BindRasterizer(rasterizer);
52 kepler_compute->BindRasterizer(rasterizer);
53 maxwell_dma->BindRasterizer(rasterizer);
54} 754}
55 755
56Engines::Maxwell3D& GPU::Maxwell3D() { 756void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
57 return *maxwell_3d; 757 u32 methods_pending) {
758 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
58} 759}
59 760
60const Engines::Maxwell3D& GPU::Maxwell3D() const { 761void GPU::FlushCommands() {
61 return *maxwell_3d; 762 impl->FlushCommands();
62} 763}
63 764
64Engines::KeplerCompute& GPU::KeplerCompute() { 765void GPU::SyncGuestHost() {
65 return *kepler_compute; 766 impl->SyncGuestHost();
66} 767}
67 768
68const Engines::KeplerCompute& GPU::KeplerCompute() const { 769void GPU::OnCommandListEnd() {
69 return *kepler_compute; 770 impl->OnCommandListEnd();
70} 771}
71 772
72MemoryManager& GPU::MemoryManager() { 773u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
73 return *memory_manager; 774 return impl->RequestFlush(addr, size);
74} 775}
75 776
76const MemoryManager& GPU::MemoryManager() const { 777u64 GPU::CurrentFlushRequestFence() const {
77 return *memory_manager; 778 return impl->CurrentFlushRequestFence();
78} 779}
79 780
80DmaPusher& GPU::DmaPusher() { 781void GPU::TickWork() {
81 return *dma_pusher; 782 impl->TickWork();
82} 783}
83 784
84Tegra::CDmaPusher& GPU::CDmaPusher() { 785Engines::Maxwell3D& GPU::Maxwell3D() {
85 return *cdma_pusher; 786 return impl->Maxwell3D();
86} 787}
87 788
88const DmaPusher& GPU::DmaPusher() const { 789const Engines::Maxwell3D& GPU::Maxwell3D() const {
89 return *dma_pusher; 790 return impl->Maxwell3D();
90} 791}
91 792
92const Tegra::CDmaPusher& GPU::CDmaPusher() const { 793Engines::KeplerCompute& GPU::KeplerCompute() {
93 return *cdma_pusher; 794 return impl->KeplerCompute();
94} 795}
95 796
96void GPU::WaitFence(u32 syncpoint_id, u32 value) { 797const Engines::KeplerCompute& GPU::KeplerCompute() const {
97 // Synced GPU, is always in sync 798 return impl->KeplerCompute();
98 if (!is_async) {
99 return;
100 }
101 if (syncpoint_id == UINT32_MAX) {
102 // TODO: Research what this does.
103 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
104 return;
105 }
106 MICROPROFILE_SCOPE(GPU_wait);
107 std::unique_lock lock{sync_mutex};
108 sync_cv.wait(lock, [=, this] {
109 if (shutting_down.load(std::memory_order_relaxed)) {
110 // We're shutting down, ensure no threads continue to wait for the next syncpoint
111 return true;
112 }
113 return syncpoints.at(syncpoint_id).load() >= value;
114 });
115}
116
117void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
118 auto& syncpoint = syncpoints.at(syncpoint_id);
119 syncpoint++;
120 std::lock_guard lock{sync_mutex};
121 sync_cv.notify_all();
122 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
123 if (!interrupt.empty()) {
124 u32 value = syncpoint.load();
125 auto it = interrupt.begin();
126 while (it != interrupt.end()) {
127 if (value >= *it) {
128 TriggerCpuInterrupt(syncpoint_id, *it);
129 it = interrupt.erase(it);
130 continue;
131 }
132 it++;
133 }
134 }
135} 799}
136 800
137u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { 801Tegra::MemoryManager& GPU::MemoryManager() {
138 return syncpoints.at(syncpoint_id).load(); 802 return impl->MemoryManager();
139} 803}
140 804
141void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 805const Tegra::MemoryManager& GPU::MemoryManager() const {
142 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 806 return impl->MemoryManager();
143 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
144 [value](u32 in_value) { return in_value == value; });
145 if (contains) {
146 return;
147 }
148 interrupt.emplace_back(value);
149} 807}
150 808
151bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 809Tegra::DmaPusher& GPU::DmaPusher() {
152 std::lock_guard lock{sync_mutex}; 810 return impl->DmaPusher();
153 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 811}
154 const auto iter =
155 std::find_if(interrupt.begin(), interrupt.end(),
156 [value](u32 interrupt_value) { return value == interrupt_value; });
157 812
158 if (iter == interrupt.end()) { 813const Tegra::DmaPusher& GPU::DmaPusher() const {
159 return false; 814 return impl->DmaPusher();
160 }
161 interrupt.erase(iter);
162 return true;
163} 815}
164 816
165u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 817Tegra::CDmaPusher& GPU::CDmaPusher() {
166 std::unique_lock lck{flush_request_mutex}; 818 return impl->CDmaPusher();
167 const u64 fence = ++last_flush_fence;
168 flush_requests.emplace_back(fence, addr, size);
169 return fence;
170} 819}
171 820
172void GPU::TickWork() { 821const Tegra::CDmaPusher& GPU::CDmaPusher() const {
173 std::unique_lock lck{flush_request_mutex}; 822 return impl->CDmaPusher();
174 while (!flush_requests.empty()) {
175 auto& request = flush_requests.front();
176 const u64 fence = request.fence;
177 const VAddr addr = request.addr;
178 const std::size_t size = request.size;
179 flush_requests.pop_front();
180 flush_request_mutex.unlock();
181 rasterizer->FlushRegion(addr, size);
182 current_flush_fence.store(fence);
183 flush_request_mutex.lock();
184 }
185} 823}
186 824
187u64 GPU::GetTicks() const { 825VideoCore::RendererBase& GPU::Renderer() {
188 // This values were reversed engineered by fincs from NVN 826 return impl->Renderer();
189 // The gpu clock is reported in units of 385/625 nanoseconds 827}
190 constexpr u64 gpu_ticks_num = 384;
191 constexpr u64 gpu_ticks_den = 625;
192 828
193 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); 829const VideoCore::RendererBase& GPU::Renderer() const {
194 if (Settings::values.use_fast_gpu_time.GetValue()) { 830 return impl->Renderer();
195 nanoseconds /= 256;
196 }
197 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
198 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
199 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
200} 831}
201 832
202void GPU::RendererFrameEndNotify() { 833VideoCore::ShaderNotify& GPU::ShaderNotify() {
203 system.GetPerfStats().EndGameFrame(); 834 return impl->ShaderNotify();
204} 835}
205 836
206void GPU::FlushCommands() { 837const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
207 rasterizer->FlushCommands(); 838 return impl->ShaderNotify();
208} 839}
209 840
210void GPU::SyncGuestHost() { 841void GPU::WaitFence(u32 syncpoint_id, u32 value) {
211 rasterizer->SyncGuestHost(); 842 impl->WaitFence(syncpoint_id, value);
212} 843}
213 844
214enum class GpuSemaphoreOperation { 845void GPU::IncrementSyncPoint(u32 syncpoint_id) {
215 AcquireEqual = 0x1, 846 impl->IncrementSyncPoint(syncpoint_id);
216 WriteLong = 0x2, 847}
217 AcquireGequal = 0x4,
218 AcquireMask = 0x8,
219};
220 848
221void GPU::CallMethod(const MethodCall& method_call) { 849u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
222 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 850 return impl->GetSyncpointValue(syncpoint_id);
223 method_call.subchannel); 851}
224 852
225 ASSERT(method_call.subchannel < bound_engines.size()); 853void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
854 impl->RegisterSyncptInterrupt(syncpoint_id, value);
855}
226 856
227 if (ExecuteMethodOnEngine(method_call.method)) { 857bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
228 CallEngineMethod(method_call); 858 return impl->CancelSyncptInterrupt(syncpoint_id, value);
229 } else {
230 CallPullerMethod(method_call);
231 }
232} 859}
233 860
234void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 861u64 GPU::GetTicks() const {
235 u32 methods_pending) { 862 return impl->GetTicks();
236 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
237
238 ASSERT(subchannel < bound_engines.size());
239
240 if (ExecuteMethodOnEngine(method)) {
241 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
242 } else {
243 for (std::size_t i = 0; i < amount; i++) {
244 CallPullerMethod(MethodCall{
245 method,
246 base_start[i],
247 subchannel,
248 methods_pending - static_cast<u32>(i),
249 });
250 }
251 }
252} 863}
253 864
254bool GPU::ExecuteMethodOnEngine(u32 method) { 865std::unique_lock<std::mutex> GPU::LockSync() {
255 const auto buffer_method = static_cast<BufferMethods>(method); 866 return impl->LockSync();
256 return buffer_method >= BufferMethods::NonPullerMethods; 867}
257}
258
259void GPU::CallPullerMethod(const MethodCall& method_call) {
260 regs.reg_array[method_call.method] = method_call.argument;
261 const auto method = static_cast<BufferMethods>(method_call.method);
262
263 switch (method) {
264 case BufferMethods::BindObject: {
265 ProcessBindMethod(method_call);
266 break;
267 }
268 case BufferMethods::Nop:
269 case BufferMethods::SemaphoreAddressHigh:
270 case BufferMethods::SemaphoreAddressLow:
271 case BufferMethods::SemaphoreSequence:
272 case BufferMethods::UnkCacheFlush:
273 case BufferMethods::WrcacheFlush:
274 case BufferMethods::FenceValue:
275 break;
276 case BufferMethods::RefCnt:
277 rasterizer->SignalReference();
278 break;
279 case BufferMethods::FenceAction:
280 ProcessFenceActionMethod();
281 break;
282 case BufferMethods::WaitForInterrupt:
283 ProcessWaitForInterruptMethod();
284 break;
285 case BufferMethods::SemaphoreTrigger: {
286 ProcessSemaphoreTriggerMethod();
287 break;
288 }
289 case BufferMethods::NotifyIntr: {
290 // TODO(Kmather73): Research and implement this method.
291 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
292 break;
293 }
294 case BufferMethods::Unk28: {
295 // TODO(Kmather73): Research and implement this method.
296 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
297 break;
298 }
299 case BufferMethods::SemaphoreAcquire: {
300 ProcessSemaphoreAcquire();
301 break;
302 }
303 case BufferMethods::SemaphoreRelease: {
304 ProcessSemaphoreRelease();
305 break;
306 }
307 case BufferMethods::Yield: {
308 // TODO(Kmather73): Research and implement this method.
309 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
310 break;
311 }
312 default:
313 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
314 break;
315 }
316}
317
318void GPU::CallEngineMethod(const MethodCall& method_call) {
319 const EngineID engine = bound_engines[method_call.subchannel];
320
321 switch (engine) {
322 case EngineID::FERMI_TWOD_A:
323 fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
324 break;
325 case EngineID::MAXWELL_B:
326 maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
327 break;
328 case EngineID::KEPLER_COMPUTE_B:
329 kepler_compute->CallMethod(method_call.method, method_call.argument,
330 method_call.IsLastCall());
331 break;
332 case EngineID::MAXWELL_DMA_COPY_A:
333 maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
334 break;
335 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
336 kepler_memory->CallMethod(method_call.method, method_call.argument,
337 method_call.IsLastCall());
338 break;
339 default:
340 UNIMPLEMENTED_MSG("Unimplemented engine");
341 }
342}
343
344void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
345 u32 methods_pending) {
346 const EngineID engine = bound_engines[subchannel];
347
348 switch (engine) {
349 case EngineID::FERMI_TWOD_A:
350 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
351 break;
352 case EngineID::MAXWELL_B:
353 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
354 break;
355 case EngineID::KEPLER_COMPUTE_B:
356 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
357 break;
358 case EngineID::MAXWELL_DMA_COPY_A:
359 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
360 break;
361 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
362 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
363 break;
364 default:
365 UNIMPLEMENTED_MSG("Unimplemented engine");
366 }
367}
368
369void GPU::ProcessBindMethod(const MethodCall& method_call) {
370 // Bind the current subchannel to the desired engine id.
371 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
372 method_call.argument);
373 const auto engine_id = static_cast<EngineID>(method_call.argument);
374 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
375 switch (engine_id) {
376 case EngineID::FERMI_TWOD_A:
377 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
378 break;
379 case EngineID::MAXWELL_B:
380 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
381 break;
382 case EngineID::KEPLER_COMPUTE_B:
383 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
384 break;
385 case EngineID::MAXWELL_DMA_COPY_A:
386 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
387 break;
388 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
389 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
390 break;
391 default:
392 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
393 }
394}
395
396void GPU::ProcessFenceActionMethod() {
397 switch (regs.fence_action.op) {
398 case FenceOperation::Acquire:
399 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
400 break;
401 case FenceOperation::Increment:
402 IncrementSyncPoint(regs.fence_action.syncpoint_id);
403 break;
404 default:
405 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
406 }
407}
408
409void GPU::ProcessWaitForInterruptMethod() {
410 // TODO(bunnei) ImplementMe
411 LOG_WARNING(HW_GPU, "(STUBBED) called");
412}
413
414void GPU::ProcessSemaphoreTriggerMethod() {
415 const auto semaphoreOperationMask = 0xF;
416 const auto op =
417 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
418 if (op == GpuSemaphoreOperation::WriteLong) {
419 struct Block {
420 u32 sequence;
421 u32 zeros = 0;
422 u64 timestamp;
423 };
424 868
425 Block block{}; 869bool GPU::IsAsync() const {
426 block.sequence = regs.semaphore_sequence; 870 return impl->IsAsync();
427 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
428 // CoreTiming
429 block.timestamp = GetTicks();
430 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
431 sizeof(block));
432 } else {
433 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
434 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
435 (op == GpuSemaphoreOperation::AcquireGequal &&
436 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
437 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
438 // Nothing to do in this case
439 } else {
440 regs.acquire_source = true;
441 regs.acquire_value = regs.semaphore_sequence;
442 if (op == GpuSemaphoreOperation::AcquireEqual) {
443 regs.acquire_active = true;
444 regs.acquire_mode = false;
445 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
446 regs.acquire_active = true;
447 regs.acquire_mode = true;
448 } else if (op == GpuSemaphoreOperation::AcquireMask) {
449 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
450 // semaphore_sequence, gives a non-0 result
451 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
452 } else {
453 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
454 }
455 }
456 }
457} 871}
458 872
459void GPU::ProcessSemaphoreRelease() { 873bool GPU::UseNvdec() const {
460 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); 874 return impl->UseNvdec();
461} 875}
462 876
463void GPU::ProcessSemaphoreAcquire() { 877void GPU::RendererFrameEndNotify() {
464 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); 878 impl->RendererFrameEndNotify();
465 const auto value = regs.semaphore_acquire;
466 if (word != value) {
467 regs.acquire_active = true;
468 regs.acquire_value = value;
469 // TODO(kemathe73) figure out how to do the acquire_timeout
470 regs.acquire_mode = false;
471 regs.acquire_source = false;
472 }
473} 879}
474 880
475void GPU::Start() { 881void GPU::Start() {
476 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 882 impl->Start();
477 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
478 cpu_context->MakeCurrent();
479} 883}
480 884
481void GPU::ObtainContext() { 885void GPU::ObtainContext() {
482 cpu_context->MakeCurrent(); 886 impl->ObtainContext();
483} 887}
484 888
485void GPU::ReleaseContext() { 889void GPU::ReleaseContext() {
486 cpu_context->DoneCurrent(); 890 impl->ReleaseContext();
487} 891}
488 892
489void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 893void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
490 gpu_thread.SubmitList(std::move(entries)); 894 impl->PushGPUEntries(std::move(entries));
491} 895}
492 896
493void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 897void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
494 if (!use_nvdec) { 898 impl->PushCommandBuffer(entries);
495 return;
496 }
497
498 if (!cdma_pusher) {
499 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
500 }
501
502 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
503 // TODO(ameerj): RE proper async nvdec operation
504 // gpu_thread.SubmitCommandBuffer(std::move(entries));
505
506 cdma_pusher->ProcessEntries(std::move(entries));
507} 899}
508 900
509void GPU::ClearCdmaInstance() { 901void GPU::ClearCdmaInstance() {
510 cdma_pusher.reset(); 902 impl->ClearCdmaInstance();
511} 903}
512 904
513void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 905void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
514 gpu_thread.SwapBuffers(framebuffer); 906 impl->SwapBuffers(framebuffer);
515} 907}
516 908
517void GPU::FlushRegion(VAddr addr, u64 size) { 909void GPU::FlushRegion(VAddr addr, u64 size) {
518 gpu_thread.FlushRegion(addr, size); 910 impl->FlushRegion(addr, size);
519} 911}
520 912
521void GPU::InvalidateRegion(VAddr addr, u64 size) { 913void GPU::InvalidateRegion(VAddr addr, u64 size) {
522 gpu_thread.InvalidateRegion(addr, size); 914 impl->InvalidateRegion(addr, size);
523} 915}
524 916
525void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 917void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
526 gpu_thread.FlushAndInvalidateRegion(addr, size); 918 impl->FlushAndInvalidateRegion(addr, size);
527}
528
529void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
530 auto& interrupt_manager = system.InterruptManager();
531 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
532}
533
534void GPU::OnCommandListEnd() {
535 if (is_async) {
536 // This command only applies to asynchronous GPU mode
537 gpu_thread.OnCommandListEnd();
538 }
539} 919}
540 920
541} // namespace Tegra 921} // namespace Tegra