summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.h
diff options
context:
space:
mode:
authorGravatar ameerj2021-10-01 00:57:02 -0400
committerGravatar ameerj2021-10-03 00:35:57 -0400
commit427bf76e621cf0833bc1bbec7d8be891297223e7 (patch)
tree5cc146d21972e1a7c424219482ef3393787afe6a /src/video_core/gpu.h
parentMerge pull request #7061 from ameerj/dma-buffer-misc (diff)
downloadyuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.gz
yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.xz
yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.zip
gpu: Migrate implementation to the cpp file
Diffstat (limited to 'src/video_core/gpu.h')
-rw-r--r--src/video_core/gpu.h217
1 files changed, 27 insertions, 190 deletions
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e6a02a71b..39b304823 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -4,19 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <list>
11#include <memory> 7#include <memory>
12#include <mutex> 8#include <mutex>
9
10#include "common/bit_field.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "core/hle/service/nvdrv/nvdata.h"
15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h" 12#include "video_core/cdma_pusher.h"
17#include "video_core/dma_pusher.h"
18#include "video_core/framebuffer_config.h" 13#include "video_core/framebuffer_config.h"
19#include "video_core/gpu_thread.h"
20 14
21using CacheAddr = std::uintptr_t; 15using CacheAddr = std::uintptr_t;
22[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { 16[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -40,6 +34,9 @@ class ShaderNotify;
40} // namespace VideoCore 34} // namespace VideoCore
41 35
42namespace Tegra { 36namespace Tegra {
37class DmaPusher;
38class CDmaPusher;
39struct CommandList;
43 40
44enum class RenderTargetFormat : u32 { 41enum class RenderTargetFormat : u32 {
45 NONE = 0x0, 42 NONE = 0x0,
@@ -138,7 +135,18 @@ public:
138 } 135 }
139 }; 136 };
140 137
141 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); 138 enum class FenceOperation : u32 {
139 Acquire = 0,
140 Increment = 1,
141 };
142
143 union FenceAction {
144 u32 raw;
145 BitField<0, 1, FenceOperation> op;
146 BitField<8, 24, u32> syncpoint_id;
147 };
148
149 explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
142 ~GPU(); 150 ~GPU();
143 151
144 /// Binds a renderer to the GPU. 152 /// Binds a renderer to the GPU.
@@ -162,9 +170,7 @@ public:
162 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 170 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
163 171
164 /// Obtains current flush request fence id. 172 /// Obtains current flush request fence id.
165 [[nodiscard]] u64 CurrentFlushRequestFence() const { 173 [[nodiscard]] u64 CurrentFlushRequestFence() const;
166 return current_flush_fence.load(std::memory_order_relaxed);
167 }
168 174
169 /// Tick pending requests within the GPU. 175 /// Tick pending requests within the GPU.
170 void TickWork(); 176 void TickWork();
@@ -200,24 +206,16 @@ public:
200 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; 206 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
201 207
202 /// Returns a reference to the underlying renderer. 208 /// Returns a reference to the underlying renderer.
203 [[nodiscard]] VideoCore::RendererBase& Renderer() { 209 [[nodiscard]] VideoCore::RendererBase& Renderer();
204 return *renderer;
205 }
206 210
207 /// Returns a const reference to the underlying renderer. 211 /// Returns a const reference to the underlying renderer.
208 [[nodiscard]] const VideoCore::RendererBase& Renderer() const { 212 [[nodiscard]] const VideoCore::RendererBase& Renderer() const;
209 return *renderer;
210 }
211 213
212 /// Returns a reference to the shader notifier. 214 /// Returns a reference to the shader notifier.
213 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { 215 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify();
214 return *shader_notify;
215 }
216 216
217 /// Returns a const reference to the shader notifier. 217 /// Returns a const reference to the shader notifier.
218 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { 218 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
219 return *shader_notify;
220 }
221 219
222 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 220 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
223 void WaitFence(u32 syncpoint_id, u32 value); 221 void WaitFence(u32 syncpoint_id, u32 value);
@@ -232,80 +230,14 @@ public:
232 230
233 [[nodiscard]] u64 GetTicks() const; 231 [[nodiscard]] u64 GetTicks() const;
234 232
235 [[nodiscard]] std::unique_lock<std::mutex> LockSync() { 233 [[nodiscard]] std::unique_lock<std::mutex> LockSync();
236 return std::unique_lock{sync_mutex};
237 }
238 234
239 [[nodiscard]] bool IsAsync() const { 235 [[nodiscard]] bool IsAsync() const;
240 return is_async;
241 }
242 236
243 [[nodiscard]] bool UseNvdec() const { 237 [[nodiscard]] bool UseNvdec() const;
244 return use_nvdec;
245 }
246 238
247 void RendererFrameEndNotify(); 239 void RendererFrameEndNotify();
248 240
249 enum class FenceOperation : u32 {
250 Acquire = 0,
251 Increment = 1,
252 };
253
254 union FenceAction {
255 u32 raw;
256 BitField<0, 1, FenceOperation> op;
257 BitField<8, 24, u32> syncpoint_id;
258
259 [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
260 FenceAction result{};
261 result.op.Assign(op);
262 result.syncpoint_id.Assign(syncpoint_id);
263 return {result.raw};
264 }
265 };
266
267 struct Regs {
268 static constexpr size_t NUM_REGS = 0x40;
269
270 union {
271 struct {
272 INSERT_PADDING_WORDS_NOINIT(0x4);
273 struct {
274 u32 address_high;
275 u32 address_low;
276
277 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
278 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
279 address_low);
280 }
281 } semaphore_address;
282
283 u32 semaphore_sequence;
284 u32 semaphore_trigger;
285 INSERT_PADDING_WORDS_NOINIT(0xC);
286
287 // The pusher and the puller share the reference counter, the pusher only has read
288 // access
289 u32 reference_count;
290 INSERT_PADDING_WORDS_NOINIT(0x5);
291
292 u32 semaphore_acquire;
293 u32 semaphore_release;
294 u32 fence_value;
295 FenceAction fence_action;
296 INSERT_PADDING_WORDS_NOINIT(0xE2);
297
298 // Puller state
299 u32 acquire_mode;
300 u32 acquire_source;
301 u32 acquire_active;
302 u32 acquire_timeout;
303 u32 acquire_value;
304 };
305 std::array<u32, NUM_REGS> reg_array;
306 };
307 } regs{};
308
309 /// Performs any additional setup necessary in order to begin GPU emulation. 241 /// Performs any additional setup necessary in order to begin GPU emulation.
310 /// This can be used to launch any necessary threads and register any necessary 242 /// This can be used to launch any necessary threads and register any necessary
311 /// core timing events. 243 /// core timing events.
@@ -338,104 +270,9 @@ public:
338 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 270 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
339 void FlushAndInvalidateRegion(VAddr addr, u64 size); 271 void FlushAndInvalidateRegion(VAddr addr, u64 size);
340 272
341protected:
342 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
343
344private:
345 void ProcessBindMethod(const MethodCall& method_call);
346 void ProcessFenceActionMethod();
347 void ProcessWaitForInterruptMethod();
348 void ProcessSemaphoreTriggerMethod();
349 void ProcessSemaphoreRelease();
350 void ProcessSemaphoreAcquire();
351
352 /// Calls a GPU puller method.
353 void CallPullerMethod(const MethodCall& method_call);
354
355 /// Calls a GPU engine method.
356 void CallEngineMethod(const MethodCall& method_call);
357
358 /// Calls a GPU engine multivalue method.
359 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
360 u32 methods_pending);
361
362 /// Determines where the method should be executed.
363 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
364
365protected:
366 Core::System& system;
367 std::unique_ptr<Tegra::MemoryManager> memory_manager;
368 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
369 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
370 std::unique_ptr<VideoCore::RendererBase> renderer;
371 VideoCore::RasterizerInterface* rasterizer = nullptr;
372 const bool use_nvdec;
373
374private: 273private:
375 /// Mapping of command subchannels to their bound engine ids 274 struct Impl;
376 std::array<EngineID, 8> bound_engines = {}; 275 std::unique_ptr<Impl> impl;
377 /// 3D engine
378 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
379 /// 2D engine
380 std::unique_ptr<Engines::Fermi2D> fermi_2d;
381 /// Compute engine
382 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
383 /// DMA engine
384 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
385 /// Inline memory engine
386 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
387 /// Shader build notifier
388 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
389 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
390 std::atomic_bool shutting_down{};
391
392 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
393
394 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
395
396 std::mutex sync_mutex;
397 std::mutex device_mutex;
398
399 std::condition_variable sync_cv;
400
401 struct FlushRequest {
402 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
403 : fence{fence_}, addr{addr_}, size{size_} {}
404 u64 fence;
405 VAddr addr;
406 std::size_t size;
407 };
408
409 std::list<FlushRequest> flush_requests;
410 std::atomic<u64> current_flush_fence{};
411 u64 last_flush_fence{};
412 std::mutex flush_request_mutex;
413
414 const bool is_async;
415
416 VideoCommon::GPUThread::ThreadManager gpu_thread;
417 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
418}; 276};
419 277
420#define ASSERT_REG_POSITION(field_name, position) \
421 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
422 "Field " #field_name " has invalid position")
423
424ASSERT_REG_POSITION(semaphore_address, 0x4);
425ASSERT_REG_POSITION(semaphore_sequence, 0x6);
426ASSERT_REG_POSITION(semaphore_trigger, 0x7);
427ASSERT_REG_POSITION(reference_count, 0x14);
428ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
429ASSERT_REG_POSITION(semaphore_release, 0x1B);
430ASSERT_REG_POSITION(fence_value, 0x1C);
431ASSERT_REG_POSITION(fence_action, 0x1D);
432
433ASSERT_REG_POSITION(acquire_mode, 0x100);
434ASSERT_REG_POSITION(acquire_source, 0x101);
435ASSERT_REG_POSITION(acquire_active, 0x102);
436ASSERT_REG_POSITION(acquire_timeout, 0x103);
437ASSERT_REG_POSITION(acquire_value, 0x104);
438
439#undef ASSERT_REG_POSITION
440
441} // namespace Tegra 278} // namespace Tegra