summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
authorGravatar Fernando S2022-10-06 21:29:53 +0200
committerGravatar GitHub2022-10-06 21:29:53 +0200
commit1effa578f12f79d7816e3543291f302f126cc1d2 (patch)
tree14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/gpu.cpp
parentMerge pull request #9025 from FernandoS27/slava-ukrayini (diff)
parentvulkan_blitter: Fix pool allocation double free. (diff)
downloadyuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp706
1 files changed, 173 insertions, 533 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 33431f2a0..28b38273e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -14,10 +14,11 @@
14#include "core/core.h" 14#include "core/core.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/hardware_interrupt_manager.h"
18#include "core/hle/service/nvdrv/nvdata.h" 17#include "core/hle/service/nvdrv/nvdata.h"
19#include "core/perf_stats.h" 18#include "core/perf_stats.h"
20#include "video_core/cdma_pusher.h" 19#include "video_core/cdma_pusher.h"
20#include "video_core/control/channel_state.h"
21#include "video_core/control/scheduler.h"
21#include "video_core/dma_pusher.h" 22#include "video_core/dma_pusher.h"
22#include "video_core/engines/fermi_2d.h" 23#include "video_core/engines/fermi_2d.h"
23#include "video_core/engines/kepler_compute.h" 24#include "video_core/engines/kepler_compute.h"
@@ -26,75 +27,64 @@
26#include "video_core/engines/maxwell_dma.h" 27#include "video_core/engines/maxwell_dma.h"
27#include "video_core/gpu.h" 28#include "video_core/gpu.h"
28#include "video_core/gpu_thread.h" 29#include "video_core/gpu_thread.h"
30#include "video_core/host1x/host1x.h"
31#include "video_core/host1x/syncpoint_manager.h"
29#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
30#include "video_core/renderer_base.h" 33#include "video_core/renderer_base.h"
31#include "video_core/shader_notify.h" 34#include "video_core/shader_notify.h"
32 35
33namespace Tegra { 36namespace Tegra {
34 37
35MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
36
37struct GPU::Impl { 38struct GPU::Impl {
38 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) 39 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
39 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( 40 : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
40 system)},
41 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
42 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
43 fermi_2d{std::make_unique<Engines::Fermi2D>()},
44 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
45 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
46 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
47 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 41 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
48 gpu_thread{system_, is_async_} {} 42 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
49 43
50 ~Impl() = default; 44 ~Impl() = default;
51 45
52 /// Binds a renderer to the GPU. 46 std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
53 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 47 auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
54 renderer = std::move(renderer_); 48 channels.emplace(channel_id, channel_state);
55 rasterizer = renderer->ReadRasterizer(); 49 scheduler->DeclareChannel(channel_state);
56 50 return channel_state;
57 memory_manager->BindRasterizer(rasterizer);
58 maxwell_3d->BindRasterizer(rasterizer);
59 fermi_2d->BindRasterizer(rasterizer);
60 kepler_compute->BindRasterizer(rasterizer);
61 kepler_memory->BindRasterizer(rasterizer);
62 maxwell_dma->BindRasterizer(rasterizer);
63 } 51 }
64 52
65 /// Calls a GPU method. 53 void BindChannel(s32 channel_id) {
66 void CallMethod(const GPU::MethodCall& method_call) { 54 if (bound_channel == channel_id) {
67 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 55 return;
68 method_call.subchannel); 56 }
57 auto it = channels.find(channel_id);
58 ASSERT(it != channels.end());
59 bound_channel = channel_id;
60 current_channel = it->second.get();
69 61
70 ASSERT(method_call.subchannel < bound_engines.size()); 62 rasterizer->BindChannel(*current_channel);
63 }
71 64
72 if (ExecuteMethodOnEngine(method_call.method)) { 65 std::shared_ptr<Control::ChannelState> AllocateChannel() {
73 CallEngineMethod(method_call); 66 return CreateChannel(new_channel_id++);
74 } else {
75 CallPullerMethod(method_call);
76 }
77 } 67 }
78 68
79 /// Calls a GPU multivalue method. 69 void InitChannel(Control::ChannelState& to_init) {
80 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 70 to_init.Init(system, gpu);
81 u32 methods_pending) { 71 to_init.BindRasterizer(rasterizer);
82 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); 72 rasterizer->InitializeChannel(to_init);
73 }
83 74
84 ASSERT(subchannel < bound_engines.size()); 75 void InitAddressSpace(Tegra::MemoryManager& memory_manager) {
76 memory_manager.BindRasterizer(rasterizer);
77 }
85 78
86 if (ExecuteMethodOnEngine(method)) { 79 void ReleaseChannel(Control::ChannelState& to_release) {
87 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); 80 UNIMPLEMENTED();
88 } else { 81 }
89 for (std::size_t i = 0; i < amount; i++) { 82
90 CallPullerMethod(GPU::MethodCall{ 83 /// Binds a renderer to the GPU.
91 method, 84 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
92 base_start[i], 85 renderer = std::move(renderer_);
93 subchannel, 86 rasterizer = renderer->ReadRasterizer();
94 methods_pending - static_cast<u32>(i), 87 host1x.MemoryManager().BindRasterizer(rasterizer);
95 });
96 }
97 }
98 } 88 }
99 89
100 /// Flush all current written commands into the host GPU for execution. 90 /// Flush all current written commands into the host GPU for execution.
@@ -103,85 +93,82 @@ struct GPU::Impl {
103 } 93 }
104 94
105 /// Synchronizes CPU writes with Host GPU memory. 95 /// Synchronizes CPU writes with Host GPU memory.
106 void SyncGuestHost() { 96 void InvalidateGPUCache() {
107 rasterizer->SyncGuestHost(); 97 rasterizer->InvalidateGPUCache();
108 } 98 }
109 99
110 /// Signal the ending of command list. 100 /// Signal the ending of command list.
111 void OnCommandListEnd() { 101 void OnCommandListEnd() {
112 if (is_async) { 102 gpu_thread.OnCommandListEnd();
113 // This command only applies to asynchronous GPU mode
114 gpu_thread.OnCommandListEnd();
115 }
116 } 103 }
117 104
118 /// Request a host GPU memory flush from the CPU. 105 /// Request a host GPU memory flush from the CPU.
119 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { 106 template <typename Func>
120 std::unique_lock lck{flush_request_mutex}; 107 [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
121 const u64 fence = ++last_flush_fence; 108 std::unique_lock lck{sync_request_mutex};
122 flush_requests.emplace_back(fence, addr, size); 109 const u64 fence = ++last_sync_fence;
110 sync_requests.emplace_back(action);
123 return fence; 111 return fence;
124 } 112 }
125 113
126 /// Obtains current flush request fence id. 114 /// Obtains current flush request fence id.
127 [[nodiscard]] u64 CurrentFlushRequestFence() const { 115 [[nodiscard]] u64 CurrentSyncRequestFence() const {
128 return current_flush_fence.load(std::memory_order_relaxed); 116 return current_sync_fence.load(std::memory_order_relaxed);
117 }
118
119 void WaitForSyncOperation(const u64 fence) {
120 std::unique_lock lck{sync_request_mutex};
121 sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
129 } 122 }
130 123
131 /// Tick pending requests within the GPU. 124 /// Tick pending requests within the GPU.
132 void TickWork() { 125 void TickWork() {
133 std::unique_lock lck{flush_request_mutex}; 126 std::unique_lock lck{sync_request_mutex};
134 while (!flush_requests.empty()) { 127 while (!sync_requests.empty()) {
135 auto& request = flush_requests.front(); 128 auto request = std::move(sync_requests.front());
136 const u64 fence = request.fence; 129 sync_requests.pop_front();
137 const VAddr addr = request.addr; 130 sync_request_mutex.unlock();
138 const std::size_t size = request.size; 131 request();
139 flush_requests.pop_front(); 132 current_sync_fence.fetch_add(1, std::memory_order_release);
140 flush_request_mutex.unlock(); 133 sync_request_mutex.lock();
141 rasterizer->FlushRegion(addr, size); 134 sync_request_cv.notify_all();
142 current_flush_fence.store(fence);
143 flush_request_mutex.lock();
144 } 135 }
145 } 136 }
146 137
147 /// Returns a reference to the Maxwell3D GPU engine. 138 /// Returns a reference to the Maxwell3D GPU engine.
148 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { 139 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
149 return *maxwell_3d; 140 ASSERT(current_channel);
141 return *current_channel->maxwell_3d;
150 } 142 }
151 143
152 /// Returns a const reference to the Maxwell3D GPU engine. 144 /// Returns a const reference to the Maxwell3D GPU engine.
153 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { 145 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
154 return *maxwell_3d; 146 ASSERT(current_channel);
147 return *current_channel->maxwell_3d;
155 } 148 }
156 149
157 /// Returns a reference to the KeplerCompute GPU engine. 150 /// Returns a reference to the KeplerCompute GPU engine.
158 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { 151 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
159 return *kepler_compute; 152 ASSERT(current_channel);
153 return *current_channel->kepler_compute;
160 } 154 }
161 155
162 /// Returns a reference to the KeplerCompute GPU engine. 156 /// Returns a reference to the KeplerCompute GPU engine.
163 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { 157 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
164 return *kepler_compute; 158 ASSERT(current_channel);
165 } 159 return *current_channel->kepler_compute;
166
167 /// Returns a reference to the GPU memory manager.
168 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
169 return *memory_manager;
170 }
171
172 /// Returns a const reference to the GPU memory manager.
173 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
174 return *memory_manager;
175 } 160 }
176 161
177 /// Returns a reference to the GPU DMA pusher. 162 /// Returns a reference to the GPU DMA pusher.
178 [[nodiscard]] Tegra::DmaPusher& DmaPusher() { 163 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
179 return *dma_pusher; 164 ASSERT(current_channel);
165 return *current_channel->dma_pusher;
180 } 166 }
181 167
182 /// Returns a const reference to the GPU DMA pusher. 168 /// Returns a const reference to the GPU DMA pusher.
183 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { 169 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
184 return *dma_pusher; 170 ASSERT(current_channel);
171 return *current_channel->dma_pusher;
185 } 172 }
186 173
187 /// Returns a reference to the underlying renderer. 174 /// Returns a reference to the underlying renderer.
@@ -204,77 +191,6 @@ struct GPU::Impl {
204 return *shader_notify; 191 return *shader_notify;
205 } 192 }
206 193
207 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
208 void WaitFence(u32 syncpoint_id, u32 value) {
209 // Synced GPU, is always in sync
210 if (!is_async) {
211 return;
212 }
213 if (syncpoint_id == UINT32_MAX) {
214 // TODO: Research what this does.
215 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
216 return;
217 }
218 MICROPROFILE_SCOPE(GPU_wait);
219 std::unique_lock lock{sync_mutex};
220 sync_cv.wait(lock, [=, this] {
221 if (shutting_down.load(std::memory_order_relaxed)) {
222 // We're shutting down, ensure no threads continue to wait for the next syncpoint
223 return true;
224 }
225 return syncpoints.at(syncpoint_id).load() >= value;
226 });
227 }
228
229 void IncrementSyncPoint(u32 syncpoint_id) {
230 auto& syncpoint = syncpoints.at(syncpoint_id);
231 syncpoint++;
232 std::scoped_lock lock{sync_mutex};
233 sync_cv.notify_all();
234 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
235 if (!interrupt.empty()) {
236 u32 value = syncpoint.load();
237 auto it = interrupt.begin();
238 while (it != interrupt.end()) {
239 if (value >= *it) {
240 TriggerCpuInterrupt(syncpoint_id, *it);
241 it = interrupt.erase(it);
242 continue;
243 }
244 it++;
245 }
246 }
247 }
248
249 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
250 return syncpoints.at(syncpoint_id).load();
251 }
252
253 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
254 std::scoped_lock lock{sync_mutex};
255 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
256 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
257 [value](u32 in_value) { return in_value == value; });
258 if (contains) {
259 return;
260 }
261 interrupt.emplace_back(value);
262 }
263
264 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
265 std::scoped_lock lock{sync_mutex};
266 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
267 const auto iter =
268 std::find_if(interrupt.begin(), interrupt.end(),
269 [value](u32 interrupt_value) { return value == interrupt_value; });
270
271 if (iter == interrupt.end()) {
272 return false;
273 }
274 interrupt.erase(iter);
275 return true;
276 }
277
278 [[nodiscard]] u64 GetTicks() const { 194 [[nodiscard]] u64 GetTicks() const {
279 // This values were reversed engineered by fincs from NVN 195 // This values were reversed engineered by fincs from NVN
280 // The gpu clock is reported in units of 385/625 nanoseconds 196 // The gpu clock is reported in units of 385/625 nanoseconds
@@ -306,7 +222,7 @@ struct GPU::Impl {
306 /// This can be used to launch any necessary threads and register any necessary 222 /// This can be used to launch any necessary threads and register any necessary
307 /// core timing events. 223 /// core timing events.
308 void Start() { 224 void Start() {
309 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 225 gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
310 cpu_context = renderer->GetRenderWindow().CreateSharedContext(); 226 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
311 cpu_context->MakeCurrent(); 227 cpu_context->MakeCurrent();
312 } 228 }
@@ -328,8 +244,8 @@ struct GPU::Impl {
328 } 244 }
329 245
330 /// Push GPU command entries to be processed 246 /// Push GPU command entries to be processed
331 void PushGPUEntries(Tegra::CommandList&& entries) { 247 void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
332 gpu_thread.SubmitList(std::move(entries)); 248 gpu_thread.SubmitList(channel, std::move(entries));
333 } 249 }
334 250
335 /// Push GPU command buffer entries to be processed 251 /// Push GPU command buffer entries to be processed
@@ -339,7 +255,7 @@ struct GPU::Impl {
339 } 255 }
340 256
341 if (!cdma_pushers.contains(id)) { 257 if (!cdma_pushers.contains(id)) {
342 cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(gpu)); 258 cdma_pushers.insert_or_assign(id, std::make_unique<Tegra::CDmaPusher>(host1x));
343 } 259 }
344 260
345 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working 261 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
@@ -376,308 +292,55 @@ struct GPU::Impl {
376 gpu_thread.FlushAndInvalidateRegion(addr, size); 292 gpu_thread.FlushAndInvalidateRegion(addr, size);
377 } 293 }
378 294
379 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const { 295 void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
380 auto& interrupt_manager = system.InterruptManager(); 296 std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
381 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 297 size_t current_request_counter{};
382 } 298 {
383 299 std::unique_lock<std::mutex> lk(request_swap_mutex);
384 void ProcessBindMethod(const GPU::MethodCall& method_call) { 300 if (free_swap_counters.empty()) {
385 // Bind the current subchannel to the desired engine id. 301 current_request_counter = request_swap_counters.size();
386 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, 302 request_swap_counters.emplace_back(num_fences);
387 method_call.argument);
388 const auto engine_id = static_cast<EngineID>(method_call.argument);
389 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
390 switch (engine_id) {
391 case EngineID::FERMI_TWOD_A:
392 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
393 break;
394 case EngineID::MAXWELL_B:
395 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
396 break;
397 case EngineID::KEPLER_COMPUTE_B:
398 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
399 break;
400 case EngineID::MAXWELL_DMA_COPY_A:
401 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
402 break;
403 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
404 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
405 break;
406 default:
407 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
408 }
409 }
410
411 void ProcessFenceActionMethod() {
412 switch (regs.fence_action.op) {
413 case GPU::FenceOperation::Acquire:
414 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
415 break;
416 case GPU::FenceOperation::Increment:
417 IncrementSyncPoint(regs.fence_action.syncpoint_id);
418 break;
419 default:
420 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
421 }
422 }
423
424 void ProcessWaitForInterruptMethod() {
425 // TODO(bunnei) ImplementMe
426 LOG_WARNING(HW_GPU, "(STUBBED) called");
427 }
428
429 void ProcessSemaphoreTriggerMethod() {
430 const auto semaphoreOperationMask = 0xF;
431 const auto op =
432 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
433 if (op == GpuSemaphoreOperation::WriteLong) {
434 struct Block {
435 u32 sequence;
436 u32 zeros = 0;
437 u64 timestamp;
438 };
439
440 Block block{};
441 block.sequence = regs.semaphore_sequence;
442 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
443 // CoreTiming
444 block.timestamp = GetTicks();
445 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
446 sizeof(block));
447 } else {
448 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
449 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
450 (op == GpuSemaphoreOperation::AcquireGequal &&
451 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
452 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
453 // Nothing to do in this case
454 } else { 303 } else {
455 regs.acquire_source = true; 304 current_request_counter = free_swap_counters.front();
456 regs.acquire_value = regs.semaphore_sequence; 305 request_swap_counters[current_request_counter] = num_fences;
457 if (op == GpuSemaphoreOperation::AcquireEqual) { 306 free_swap_counters.pop_front();
458 regs.acquire_active = true;
459 regs.acquire_mode = false;
460 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
461 regs.acquire_active = true;
462 regs.acquire_mode = true;
463 } else if (op == GpuSemaphoreOperation::AcquireMask) {
464 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
465 // semaphore_sequence, gives a non-0 result
466 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
467 } else {
468 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
469 }
470 } 307 }
471 } 308 }
472 } 309 const auto wait_fence =
473 310 RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
474 void ProcessSemaphoreRelease() { 311 auto& syncpoint_manager = host1x.GetSyncpointManager();
475 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), 312 if (num_fences == 0) {
476 regs.semaphore_release); 313 renderer->SwapBuffers(framebuffer);
477 } 314 }
478 315 const auto executer = [this, current_request_counter,
479 void ProcessSemaphoreAcquire() { 316 framebuffer_copy = *framebuffer]() {
480 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); 317 {
481 const auto value = regs.semaphore_acquire; 318 std::unique_lock<std::mutex> lk(request_swap_mutex);
482 if (word != value) { 319 if (--request_swap_counters[current_request_counter] != 0) {
483 regs.acquire_active = true; 320 return;
484 regs.acquire_value = value; 321 }
485 // TODO(kemathe73) figure out how to do the acquire_timeout 322 free_swap_counters.push_back(current_request_counter);
486 regs.acquire_mode = false;
487 regs.acquire_source = false;
488 }
489 }
490
491 /// Calls a GPU puller method.
492 void CallPullerMethod(const GPU::MethodCall& method_call) {
493 regs.reg_array[method_call.method] = method_call.argument;
494 const auto method = static_cast<BufferMethods>(method_call.method);
495
496 switch (method) {
497 case BufferMethods::BindObject: {
498 ProcessBindMethod(method_call);
499 break;
500 }
501 case BufferMethods::Nop:
502 case BufferMethods::SemaphoreAddressHigh:
503 case BufferMethods::SemaphoreAddressLow:
504 case BufferMethods::SemaphoreSequence:
505 break;
506 case BufferMethods::UnkCacheFlush:
507 rasterizer->SyncGuestHost();
508 break;
509 case BufferMethods::WrcacheFlush:
510 rasterizer->SignalReference();
511 break;
512 case BufferMethods::FenceValue:
513 break;
514 case BufferMethods::RefCnt:
515 rasterizer->SignalReference();
516 break;
517 case BufferMethods::FenceAction:
518 ProcessFenceActionMethod();
519 break;
520 case BufferMethods::WaitForInterrupt:
521 rasterizer->WaitForIdle();
522 break;
523 case BufferMethods::SemaphoreTrigger: {
524 ProcessSemaphoreTriggerMethod();
525 break;
526 }
527 case BufferMethods::NotifyIntr: {
528 // TODO(Kmather73): Research and implement this method.
529 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
530 break;
531 }
532 case BufferMethods::Unk28: {
533 // TODO(Kmather73): Research and implement this method.
534 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
535 break;
536 }
537 case BufferMethods::SemaphoreAcquire: {
538 ProcessSemaphoreAcquire();
539 break;
540 }
541 case BufferMethods::SemaphoreRelease: {
542 ProcessSemaphoreRelease();
543 break;
544 }
545 case BufferMethods::Yield: {
546 // TODO(Kmather73): Research and implement this method.
547 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
548 break;
549 }
550 default:
551 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
552 break;
553 }
554 }
555
556 /// Calls a GPU engine method.
557 void CallEngineMethod(const GPU::MethodCall& method_call) {
558 const EngineID engine = bound_engines[method_call.subchannel];
559
560 switch (engine) {
561 case EngineID::FERMI_TWOD_A:
562 fermi_2d->CallMethod(method_call.method, method_call.argument,
563 method_call.IsLastCall());
564 break;
565 case EngineID::MAXWELL_B:
566 maxwell_3d->CallMethod(method_call.method, method_call.argument,
567 method_call.IsLastCall());
568 break;
569 case EngineID::KEPLER_COMPUTE_B:
570 kepler_compute->CallMethod(method_call.method, method_call.argument,
571 method_call.IsLastCall());
572 break;
573 case EngineID::MAXWELL_DMA_COPY_A:
574 maxwell_dma->CallMethod(method_call.method, method_call.argument,
575 method_call.IsLastCall());
576 break;
577 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
578 kepler_memory->CallMethod(method_call.method, method_call.argument,
579 method_call.IsLastCall());
580 break;
581 default:
582 UNIMPLEMENTED_MSG("Unimplemented engine");
583 }
584 }
585
586 /// Calls a GPU engine multivalue method.
587 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
588 u32 methods_pending) {
589 const EngineID engine = bound_engines[subchannel];
590
591 switch (engine) {
592 case EngineID::FERMI_TWOD_A:
593 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
594 break;
595 case EngineID::MAXWELL_B:
596 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
597 break;
598 case EngineID::KEPLER_COMPUTE_B:
599 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
600 break;
601 case EngineID::MAXWELL_DMA_COPY_A:
602 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
603 break;
604 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
605 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
606 break;
607 default:
608 UNIMPLEMENTED_MSG("Unimplemented engine");
609 }
610 }
611
612 /// Determines where the method should be executed.
613 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
614 const auto buffer_method = static_cast<BufferMethods>(method);
615 return buffer_method >= BufferMethods::NonPullerMethods;
616 }
617
618 struct Regs {
619 static constexpr size_t NUM_REGS = 0x40;
620
621 union {
622 struct {
623 INSERT_PADDING_WORDS_NOINIT(0x4);
624 struct {
625 u32 address_high;
626 u32 address_low;
627
628 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
629 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
630 address_low);
631 } 323 }
632 } semaphore_address; 324 renderer->SwapBuffers(&framebuffer_copy);
633 325 };
634 u32 semaphore_sequence; 326 for (size_t i = 0; i < num_fences; i++) {
635 u32 semaphore_trigger; 327 syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
636 INSERT_PADDING_WORDS_NOINIT(0xC); 328 }
637 329 });
638 // The pusher and the puller share the reference counter, the pusher only has read 330 gpu_thread.TickGPU();
639 // access 331 WaitForSyncOperation(wait_fence);
640 u32 reference_count; 332 }
641 INSERT_PADDING_WORDS_NOINIT(0x5);
642
643 u32 semaphore_acquire;
644 u32 semaphore_release;
645 u32 fence_value;
646 GPU::FenceAction fence_action;
647 INSERT_PADDING_WORDS_NOINIT(0xE2);
648
649 // Puller state
650 u32 acquire_mode;
651 u32 acquire_source;
652 u32 acquire_active;
653 u32 acquire_timeout;
654 u32 acquire_value;
655 };
656 std::array<u32, NUM_REGS> reg_array;
657 };
658 } regs{};
659 333
660 GPU& gpu; 334 GPU& gpu;
661 Core::System& system; 335 Core::System& system;
662 std::unique_ptr<Tegra::MemoryManager> memory_manager; 336 Host1x::Host1x& host1x;
663 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 337
664 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; 338 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
665 std::unique_ptr<VideoCore::RendererBase> renderer; 339 std::unique_ptr<VideoCore::RendererBase> renderer;
666 VideoCore::RasterizerInterface* rasterizer = nullptr; 340 VideoCore::RasterizerInterface* rasterizer = nullptr;
667 const bool use_nvdec; 341 const bool use_nvdec;
668 342
669 /// Mapping of command subchannels to their bound engine ids 343 s32 new_channel_id{1};
670 std::array<EngineID, 8> bound_engines{};
671 /// 3D engine
672 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
673 /// 2D engine
674 std::unique_ptr<Engines::Fermi2D> fermi_2d;
675 /// Compute engine
676 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
677 /// DMA engine
678 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
679 /// Inline memory engine
680 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
681 /// Shader build notifier 344 /// Shader build notifier
682 std::unique_ptr<VideoCore::ShaderNotify> shader_notify; 345 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
683 /// When true, we are about to shut down emulation session, so terminate outstanding tasks 346 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -692,51 +355,25 @@ struct GPU::Impl {
692 355
693 std::condition_variable sync_cv; 356 std::condition_variable sync_cv;
694 357
695 struct FlushRequest { 358 std::list<std::function<void()>> sync_requests;
696 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) 359 std::atomic<u64> current_sync_fence{};
697 : fence{fence_}, addr{addr_}, size{size_} {} 360 u64 last_sync_fence{};
698 u64 fence; 361 std::mutex sync_request_mutex;
699 VAddr addr; 362 std::condition_variable sync_request_cv;
700 std::size_t size;
701 };
702
703 std::list<FlushRequest> flush_requests;
704 std::atomic<u64> current_flush_fence{};
705 u64 last_flush_fence{};
706 std::mutex flush_request_mutex;
707 363
708 const bool is_async; 364 const bool is_async;
709 365
710 VideoCommon::GPUThread::ThreadManager gpu_thread; 366 VideoCommon::GPUThread::ThreadManager gpu_thread;
711 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; 367 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
712 368
713#define ASSERT_REG_POSITION(field_name, position) \ 369 std::unique_ptr<Tegra::Control::Scheduler> scheduler;
714 static_assert(offsetof(Regs, field_name) == position * 4, \ 370 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
715 "Field " #field_name " has invalid position") 371 Tegra::Control::ChannelState* current_channel;
716 372 s32 bound_channel{-1};
717 ASSERT_REG_POSITION(semaphore_address, 0x4); 373
718 ASSERT_REG_POSITION(semaphore_sequence, 0x6); 374 std::deque<size_t> free_swap_counters;
719 ASSERT_REG_POSITION(semaphore_trigger, 0x7); 375 std::deque<size_t> request_swap_counters;
720 ASSERT_REG_POSITION(reference_count, 0x14); 376 std::mutex request_swap_mutex;
721 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
722 ASSERT_REG_POSITION(semaphore_release, 0x1B);
723 ASSERT_REG_POSITION(fence_value, 0x1C);
724 ASSERT_REG_POSITION(fence_action, 0x1D);
725
726 ASSERT_REG_POSITION(acquire_mode, 0x100);
727 ASSERT_REG_POSITION(acquire_source, 0x101);
728 ASSERT_REG_POSITION(acquire_active, 0x102);
729 ASSERT_REG_POSITION(acquire_timeout, 0x103);
730 ASSERT_REG_POSITION(acquire_value, 0x104);
731
732#undef ASSERT_REG_POSITION
733
734 enum class GpuSemaphoreOperation {
735 AcquireEqual = 0x1,
736 WriteLong = 0x2,
737 AcquireGequal = 0x4,
738 AcquireMask = 0x8,
739 };
740}; 377};
741 378
742GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) 379GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -744,25 +381,36 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
744 381
745GPU::~GPU() = default; 382GPU::~GPU() = default;
746 383
747void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { 384std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
748 impl->BindRenderer(std::move(renderer)); 385 return impl->AllocateChannel();
386}
387
388void GPU::InitChannel(Control::ChannelState& to_init) {
389 impl->InitChannel(to_init);
390}
391
392void GPU::BindChannel(s32 channel_id) {
393 impl->BindChannel(channel_id);
749} 394}
750 395
751void GPU::CallMethod(const MethodCall& method_call) { 396void GPU::ReleaseChannel(Control::ChannelState& to_release) {
752 impl->CallMethod(method_call); 397 impl->ReleaseChannel(to_release);
753} 398}
754 399
755void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 400void GPU::InitAddressSpace(Tegra::MemoryManager& memory_manager) {
756 u32 methods_pending) { 401 impl->InitAddressSpace(memory_manager);
757 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); 402}
403
404void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
405 impl->BindRenderer(std::move(renderer));
758} 406}
759 407
760void GPU::FlushCommands() { 408void GPU::FlushCommands() {
761 impl->FlushCommands(); 409 impl->FlushCommands();
762} 410}
763 411
764void GPU::SyncGuestHost() { 412void GPU::InvalidateGPUCache() {
765 impl->SyncGuestHost(); 413 impl->InvalidateGPUCache();
766} 414}
767 415
768void GPU::OnCommandListEnd() { 416void GPU::OnCommandListEnd() {
@@ -770,17 +418,32 @@ void GPU::OnCommandListEnd() {
770} 418}
771 419
772u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 420u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
773 return impl->RequestFlush(addr, size); 421 return impl->RequestSyncOperation(
422 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
774} 423}
775 424
776u64 GPU::CurrentFlushRequestFence() const { 425u64 GPU::CurrentSyncRequestFence() const {
777 return impl->CurrentFlushRequestFence(); 426 return impl->CurrentSyncRequestFence();
427}
428
429void GPU::WaitForSyncOperation(u64 fence) {
430 return impl->WaitForSyncOperation(fence);
778} 431}
779 432
780void GPU::TickWork() { 433void GPU::TickWork() {
781 impl->TickWork(); 434 impl->TickWork();
782} 435}
783 436
437/// Gets a mutable reference to the Host1x interface
438Host1x::Host1x& GPU::Host1x() {
439 return impl->host1x;
440}
441
442/// Gets an immutable reference to the Host1x interface.
443const Host1x::Host1x& GPU::Host1x() const {
444 return impl->host1x;
445}
446
784Engines::Maxwell3D& GPU::Maxwell3D() { 447Engines::Maxwell3D& GPU::Maxwell3D() {
785 return impl->Maxwell3D(); 448 return impl->Maxwell3D();
786} 449}
@@ -797,14 +460,6 @@ const Engines::KeplerCompute& GPU::KeplerCompute() const {
797 return impl->KeplerCompute(); 460 return impl->KeplerCompute();
798} 461}
799 462
800Tegra::MemoryManager& GPU::MemoryManager() {
801 return impl->MemoryManager();
802}
803
804const Tegra::MemoryManager& GPU::MemoryManager() const {
805 return impl->MemoryManager();
806}
807
808Tegra::DmaPusher& GPU::DmaPusher() { 463Tegra::DmaPusher& GPU::DmaPusher() {
809 return impl->DmaPusher(); 464 return impl->DmaPusher();
810} 465}
@@ -829,24 +484,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
829 return impl->ShaderNotify(); 484 return impl->ShaderNotify();
830} 485}
831 486
832void GPU::WaitFence(u32 syncpoint_id, u32 value) { 487void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
833 impl->WaitFence(syncpoint_id, value); 488 std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
834} 489 impl->RequestSwapBuffers(framebuffer, fences, num_fences);
835
836void GPU::IncrementSyncPoint(u32 syncpoint_id) {
837 impl->IncrementSyncPoint(syncpoint_id);
838}
839
840u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
841 return impl->GetSyncpointValue(syncpoint_id);
842}
843
844void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
845 impl->RegisterSyncptInterrupt(syncpoint_id, value);
846}
847
848bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
849 return impl->CancelSyncptInterrupt(syncpoint_id, value);
850} 490}
851 491
852u64 GPU::GetTicks() const { 492u64 GPU::GetTicks() const {
@@ -881,8 +521,8 @@ void GPU::ReleaseContext() {
881 impl->ReleaseContext(); 521 impl->ReleaseContext();
882} 522}
883 523
884void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 524void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
885 impl->PushGPUEntries(std::move(entries)); 525 impl->PushGPUEntries(channel, std::move(entries));
886} 526}
887 527
888void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { 528void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {