summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp179
1 files changed, 133 insertions, 46 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4bb9256e9..6ab06775f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core_timing.h" 10#include "core/core_timing.h"
11#include "core/core_timing_util.h" 11#include "core/core_timing_util.h"
12#include "core/frontend/emu_window.h" 12#include "core/frontend/emu_window.h"
13#include "core/hardware_interrupt_manager.h"
13#include "core/memory.h" 14#include "core/memory.h"
14#include "core/settings.h" 15#include "core/settings.h"
15#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
@@ -27,15 +28,17 @@ namespace Tegra {
27 28
28MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 29MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
29 30
30GPU::GPU(Core::System& system_, bool is_async_) 31GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
31 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 32 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
32 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, 33 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
34 cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_},
33 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
34 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 36 fermi_2d{std::make_unique<Engines::Fermi2D>()},
35 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
36 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
37 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
38 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} 40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
41 gpu_thread{system_, is_async_} {}
39 42
40GPU::~GPU() = default; 43GPU::~GPU() = default;
41 44
@@ -77,31 +80,46 @@ DmaPusher& GPU::DmaPusher() {
77 return *dma_pusher; 80 return *dma_pusher;
78} 81}
79 82
83Tegra::CDmaPusher& GPU::CDmaPusher() {
84 return *cdma_pusher;
85}
86
80const DmaPusher& GPU::DmaPusher() const { 87const DmaPusher& GPU::DmaPusher() const {
81 return *dma_pusher; 88 return *dma_pusher;
82} 89}
83 90
91const Tegra::CDmaPusher& GPU::CDmaPusher() const {
92 return *cdma_pusher;
93}
94
84void GPU::WaitFence(u32 syncpoint_id, u32 value) { 95void GPU::WaitFence(u32 syncpoint_id, u32 value) {
85 // Synced GPU, is always in sync 96 // Synced GPU, is always in sync
86 if (!is_async) { 97 if (!is_async) {
87 return; 98 return;
88 } 99 }
100 if (syncpoint_id == UINT32_MAX) {
101 // TODO: Research what this does.
102 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
103 return;
104 }
89 MICROPROFILE_SCOPE(GPU_wait); 105 MICROPROFILE_SCOPE(GPU_wait);
90 std::unique_lock lock{sync_mutex}; 106 std::unique_lock lock{sync_mutex};
91 sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); 107 sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
92} 108}
93 109
94void GPU::IncrementSyncPoint(const u32 syncpoint_id) { 110void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
95 syncpoints[syncpoint_id]++; 111 auto& syncpoint = syncpoints.at(syncpoint_id);
112 syncpoint++;
96 std::lock_guard lock{sync_mutex}; 113 std::lock_guard lock{sync_mutex};
97 sync_cv.notify_all(); 114 sync_cv.notify_all();
98 if (!syncpt_interrupts[syncpoint_id].empty()) { 115 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
99 u32 value = syncpoints[syncpoint_id].load(); 116 if (!interrupt.empty()) {
100 auto it = syncpt_interrupts[syncpoint_id].begin(); 117 u32 value = syncpoint.load();
101 while (it != syncpt_interrupts[syncpoint_id].end()) { 118 auto it = interrupt.begin();
119 while (it != interrupt.end()) {
102 if (value >= *it) { 120 if (value >= *it) {
103 TriggerCpuInterrupt(syncpoint_id, *it); 121 TriggerCpuInterrupt(syncpoint_id, *it);
104 it = syncpt_interrupts[syncpoint_id].erase(it); 122 it = interrupt.erase(it);
105 continue; 123 continue;
106 } 124 }
107 it++; 125 it++;
@@ -110,22 +128,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
110} 128}
111 129
112u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { 130u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
113 return syncpoints[syncpoint_id].load(); 131 return syncpoints.at(syncpoint_id).load();
114} 132}
115 133
116void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 134void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
117 auto& interrupt = syncpt_interrupts[syncpoint_id]; 135 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
118 bool contains = std::any_of(interrupt.begin(), interrupt.end(), 136 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
119 [value](u32 in_value) { return in_value == value; }); 137 [value](u32 in_value) { return in_value == value; });
120 if (contains) { 138 if (contains) {
121 return; 139 return;
122 } 140 }
123 syncpt_interrupts[syncpoint_id].emplace_back(value); 141 interrupt.emplace_back(value);
124} 142}
125 143
126bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 144bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
127 std::lock_guard lock{sync_mutex}; 145 std::lock_guard lock{sync_mutex};
128 auto& interrupt = syncpt_interrupts[syncpoint_id]; 146 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
129 const auto iter = 147 const auto iter =
130 std::find_if(interrupt.begin(), interrupt.end(), 148 std::find_if(interrupt.begin(), interrupt.end(),
131 [value](u32 interrupt_value) { return value == interrupt_value; }); 149 [value](u32 interrupt_value) { return value == interrupt_value; });
@@ -182,34 +200,6 @@ void GPU::SyncGuestHost() {
182 renderer->Rasterizer().SyncGuestHost(); 200 renderer->Rasterizer().SyncGuestHost();
183} 201}
184 202
185void GPU::OnCommandListEnd() {
186 renderer->Rasterizer().ReleaseFences();
187}
188// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
189// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
190// So the values you see in docs might be multiplied by 4.
191enum class BufferMethods {
192 BindObject = 0x0,
193 Nop = 0x2,
194 SemaphoreAddressHigh = 0x4,
195 SemaphoreAddressLow = 0x5,
196 SemaphoreSequence = 0x6,
197 SemaphoreTrigger = 0x7,
198 NotifyIntr = 0x8,
199 WrcacheFlush = 0x9,
200 Unk28 = 0xA,
201 UnkCacheFlush = 0xB,
202 RefCnt = 0x14,
203 SemaphoreAcquire = 0x1A,
204 SemaphoreRelease = 0x1B,
205 FenceValue = 0x1C,
206 FenceAction = 0x1D,
207 Unk78 = 0x1E,
208 Unk7c = 0x1F,
209 Yield = 0x20,
210 NonPullerMethods = 0x40,
211};
212
213enum class GpuSemaphoreOperation { 203enum class GpuSemaphoreOperation {
214 AcquireEqual = 0x1, 204 AcquireEqual = 0x1,
215 WriteLong = 0x2, 205 WriteLong = 0x2,
@@ -240,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32
240 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); 230 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
241 } else { 231 } else {
242 for (std::size_t i = 0; i < amount; i++) { 232 for (std::size_t i = 0; i < amount; i++) {
243 CallPullerMethod( 233 CallPullerMethod(MethodCall{
244 {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); 234 method,
235 base_start[i],
236 subchannel,
237 methods_pending - static_cast<u32>(i),
238 });
245 } 239 }
246 } 240 }
247} 241}
@@ -268,7 +262,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
268 case BufferMethods::UnkCacheFlush: 262 case BufferMethods::UnkCacheFlush:
269 case BufferMethods::WrcacheFlush: 263 case BufferMethods::WrcacheFlush:
270 case BufferMethods::FenceValue: 264 case BufferMethods::FenceValue:
265 break;
271 case BufferMethods::FenceAction: 266 case BufferMethods::FenceAction:
267 ProcessFenceActionMethod();
268 break;
269 case BufferMethods::WaitForInterrupt:
270 ProcessWaitForInterruptMethod();
272 break; 271 break;
273 case BufferMethods::SemaphoreTrigger: { 272 case BufferMethods::SemaphoreTrigger: {
274 ProcessSemaphoreTriggerMethod(); 273 ProcessSemaphoreTriggerMethod();
@@ -298,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
298 break; 297 break;
299 } 298 }
300 default: 299 default:
301 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", 300 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
302 static_cast<u32>(method));
303 break; 301 break;
304 } 302 }
305} 303}
@@ -378,10 +376,28 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
378 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); 376 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
379 break; 377 break;
380 default: 378 default:
381 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); 379 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
380 }
381}
382
383void GPU::ProcessFenceActionMethod() {
384 switch (regs.fence_action.op) {
385 case FenceOperation::Acquire:
386 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
387 break;
388 case FenceOperation::Increment:
389 IncrementSyncPoint(regs.fence_action.syncpoint_id);
390 break;
391 default:
392 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
382 } 393 }
383} 394}
384 395
396void GPU::ProcessWaitForInterruptMethod() {
397 // TODO(bunnei) ImplementMe
398 LOG_WARNING(HW_GPU, "(STUBBED) called");
399}
400
385void GPU::ProcessSemaphoreTriggerMethod() { 401void GPU::ProcessSemaphoreTriggerMethod() {
386 const auto semaphoreOperationMask = 0xF; 402 const auto semaphoreOperationMask = 0xF;
387 const auto op = 403 const auto op =
@@ -443,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
443 } 459 }
444} 460}
445 461
462void GPU::Start() {
463 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
464 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
465 cpu_context->MakeCurrent();
466}
467
468void GPU::ObtainContext() {
469 cpu_context->MakeCurrent();
470}
471
472void GPU::ReleaseContext() {
473 cpu_context->DoneCurrent();
474}
475
476void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
477 gpu_thread.SubmitList(std::move(entries));
478}
479
480void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
481 if (!use_nvdec) {
482 return;
483 }
484 // This condition fires when a video stream ends, clear all intermediary data
485 if (entries[0].raw == 0xDEADB33F) {
486 cdma_pusher.reset();
487 return;
488 }
489 if (!cdma_pusher) {
490 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
491 }
492
493 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
494 // TODO(ameerj): RE proper async nvdec operation
495 // gpu_thread.SubmitCommandBuffer(std::move(entries));
496
497 cdma_pusher->Push(std::move(entries));
498 cdma_pusher->DispatchCalls();
499}
500
501void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
502 gpu_thread.SwapBuffers(framebuffer);
503}
504
505void GPU::FlushRegion(VAddr addr, u64 size) {
506 gpu_thread.FlushRegion(addr, size);
507}
508
509void GPU::InvalidateRegion(VAddr addr, u64 size) {
510 gpu_thread.InvalidateRegion(addr, size);
511}
512
513void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
514 gpu_thread.FlushAndInvalidateRegion(addr, size);
515}
516
517void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
518 auto& interrupt_manager = system.InterruptManager();
519 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
520}
521
522void GPU::WaitIdle() const {
523 gpu_thread.WaitIdle();
524}
525
526void GPU::OnCommandListEnd() {
527 if (is_async) {
528 // This command only applies to asynchronous GPU mode
529 gpu_thread.OnCommandListEnd();
530 }
531}
532
446} // namespace Tegra 533} // namespace Tegra