diff options
| author | 2021-10-01 00:57:02 -0400 | |
|---|---|---|
| committer | 2021-10-03 00:35:57 -0400 | |
| commit | 427bf76e621cf0833bc1bbec7d8be891297223e7 (patch) | |
| tree | 5cc146d21972e1a7c424219482ef3393787afe6a /src/video_core/gpu.h | |
| parent | Merge pull request #7061 from ameerj/dma-buffer-misc (diff) | |
| download | yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.gz yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.tar.xz yuzu-427bf76e621cf0833bc1bbec7d8be891297223e7.zip | |
gpu: Migrate implementation to the cpp file
Diffstat (limited to 'src/video_core/gpu.h')
| -rw-r--r-- | src/video_core/gpu.h | 217 |
1 files changed, 27 insertions, 190 deletions
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e6a02a71b..39b304823 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -4,19 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <list> | ||
| 11 | #include <memory> | 7 | #include <memory> |
| 12 | #include <mutex> | 8 | #include <mutex> |
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | #include "core/hle/service/nvflinger/buffer_queue.h" | ||
| 16 | #include "video_core/cdma_pusher.h" | 12 | #include "video_core/cdma_pusher.h" |
| 17 | #include "video_core/dma_pusher.h" | ||
| 18 | #include "video_core/framebuffer_config.h" | 13 | #include "video_core/framebuffer_config.h" |
| 19 | #include "video_core/gpu_thread.h" | ||
| 20 | 14 | ||
| 21 | using CacheAddr = std::uintptr_t; | 15 | using CacheAddr = std::uintptr_t; |
| 22 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { | 16 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { |
| @@ -40,6 +34,9 @@ class ShaderNotify; | |||
| 40 | } // namespace VideoCore | 34 | } // namespace VideoCore |
| 41 | 35 | ||
| 42 | namespace Tegra { | 36 | namespace Tegra { |
| 37 | class DmaPusher; | ||
| 38 | class CDmaPusher; | ||
| 39 | struct CommandList; | ||
| 43 | 40 | ||
| 44 | enum class RenderTargetFormat : u32 { | 41 | enum class RenderTargetFormat : u32 { |
| 45 | NONE = 0x0, | 42 | NONE = 0x0, |
| @@ -138,7 +135,18 @@ public: | |||
| 138 | } | 135 | } |
| 139 | }; | 136 | }; |
| 140 | 137 | ||
| 141 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); | 138 | enum class FenceOperation : u32 { |
| 139 | Acquire = 0, | ||
| 140 | Increment = 1, | ||
| 141 | }; | ||
| 142 | |||
| 143 | union FenceAction { | ||
| 144 | u32 raw; | ||
| 145 | BitField<0, 1, FenceOperation> op; | ||
| 146 | BitField<8, 24, u32> syncpoint_id; | ||
| 147 | }; | ||
| 148 | |||
| 149 | explicit GPU(Core::System& system, bool is_async, bool use_nvdec); | ||
| 142 | ~GPU(); | 150 | ~GPU(); |
| 143 | 151 | ||
| 144 | /// Binds a renderer to the GPU. | 152 | /// Binds a renderer to the GPU. |
| @@ -162,9 +170,7 @@ public: | |||
| 162 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 170 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 163 | 171 | ||
| 164 | /// Obtains current flush request fence id. | 172 | /// Obtains current flush request fence id. |
| 165 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | 173 | [[nodiscard]] u64 CurrentFlushRequestFence() const; |
| 166 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 167 | } | ||
| 168 | 174 | ||
| 169 | /// Tick pending requests within the GPU. | 175 | /// Tick pending requests within the GPU. |
| 170 | void TickWork(); | 176 | void TickWork(); |
| @@ -200,24 +206,16 @@ public: | |||
| 200 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; | 206 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; |
| 201 | 207 | ||
| 202 | /// Returns a reference to the underlying renderer. | 208 | /// Returns a reference to the underlying renderer. |
| 203 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | 209 | [[nodiscard]] VideoCore::RendererBase& Renderer(); |
| 204 | return *renderer; | ||
| 205 | } | ||
| 206 | 210 | ||
| 207 | /// Returns a const reference to the underlying renderer. | 211 | /// Returns a const reference to the underlying renderer. |
| 208 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | 212 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const; |
| 209 | return *renderer; | ||
| 210 | } | ||
| 211 | 213 | ||
| 212 | /// Returns a reference to the shader notifier. | 214 | /// Returns a reference to the shader notifier. |
| 213 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | 215 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify(); |
| 214 | return *shader_notify; | ||
| 215 | } | ||
| 216 | 216 | ||
| 217 | /// Returns a const reference to the shader notifier. | 217 | /// Returns a const reference to the shader notifier. |
| 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; |
| 219 | return *shader_notify; | ||
| 220 | } | ||
| 221 | 219 | ||
| 222 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 220 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 223 | void WaitFence(u32 syncpoint_id, u32 value); | 221 | void WaitFence(u32 syncpoint_id, u32 value); |
| @@ -232,80 +230,14 @@ public: | |||
| 232 | 230 | ||
| 233 | [[nodiscard]] u64 GetTicks() const; | 231 | [[nodiscard]] u64 GetTicks() const; |
| 234 | 232 | ||
| 235 | [[nodiscard]] std::unique_lock<std::mutex> LockSync() { | 233 | [[nodiscard]] std::unique_lock<std::mutex> LockSync(); |
| 236 | return std::unique_lock{sync_mutex}; | ||
| 237 | } | ||
| 238 | 234 | ||
| 239 | [[nodiscard]] bool IsAsync() const { | 235 | [[nodiscard]] bool IsAsync() const; |
| 240 | return is_async; | ||
| 241 | } | ||
| 242 | 236 | ||
| 243 | [[nodiscard]] bool UseNvdec() const { | 237 | [[nodiscard]] bool UseNvdec() const; |
| 244 | return use_nvdec; | ||
| 245 | } | ||
| 246 | 238 | ||
| 247 | void RendererFrameEndNotify(); | 239 | void RendererFrameEndNotify(); |
| 248 | 240 | ||
| 249 | enum class FenceOperation : u32 { | ||
| 250 | Acquire = 0, | ||
| 251 | Increment = 1, | ||
| 252 | }; | ||
| 253 | |||
| 254 | union FenceAction { | ||
| 255 | u32 raw; | ||
| 256 | BitField<0, 1, FenceOperation> op; | ||
| 257 | BitField<8, 24, u32> syncpoint_id; | ||
| 258 | |||
| 259 | [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 260 | FenceAction result{}; | ||
| 261 | result.op.Assign(op); | ||
| 262 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 263 | return {result.raw}; | ||
| 264 | } | ||
| 265 | }; | ||
| 266 | |||
| 267 | struct Regs { | ||
| 268 | static constexpr size_t NUM_REGS = 0x40; | ||
| 269 | |||
| 270 | union { | ||
| 271 | struct { | ||
| 272 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 273 | struct { | ||
| 274 | u32 address_high; | ||
| 275 | u32 address_low; | ||
| 276 | |||
| 277 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 278 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 279 | address_low); | ||
| 280 | } | ||
| 281 | } semaphore_address; | ||
| 282 | |||
| 283 | u32 semaphore_sequence; | ||
| 284 | u32 semaphore_trigger; | ||
| 285 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 286 | |||
| 287 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 288 | // access | ||
| 289 | u32 reference_count; | ||
| 290 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 291 | |||
| 292 | u32 semaphore_acquire; | ||
| 293 | u32 semaphore_release; | ||
| 294 | u32 fence_value; | ||
| 295 | FenceAction fence_action; | ||
| 296 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 297 | |||
| 298 | // Puller state | ||
| 299 | u32 acquire_mode; | ||
| 300 | u32 acquire_source; | ||
| 301 | u32 acquire_active; | ||
| 302 | u32 acquire_timeout; | ||
| 303 | u32 acquire_value; | ||
| 304 | }; | ||
| 305 | std::array<u32, NUM_REGS> reg_array; | ||
| 306 | }; | ||
| 307 | } regs{}; | ||
| 308 | |||
| 309 | /// Performs any additional setup necessary in order to begin GPU emulation. | 241 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 310 | /// This can be used to launch any necessary threads and register any necessary | 242 | /// This can be used to launch any necessary threads and register any necessary |
| 311 | /// core timing events. | 243 | /// core timing events. |
| @@ -338,104 +270,9 @@ public: | |||
| 338 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 270 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 339 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 271 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 340 | 272 | ||
| 341 | protected: | ||
| 342 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; | ||
| 343 | |||
| 344 | private: | ||
| 345 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 346 | void ProcessFenceActionMethod(); | ||
| 347 | void ProcessWaitForInterruptMethod(); | ||
| 348 | void ProcessSemaphoreTriggerMethod(); | ||
| 349 | void ProcessSemaphoreRelease(); | ||
| 350 | void ProcessSemaphoreAcquire(); | ||
| 351 | |||
| 352 | /// Calls a GPU puller method. | ||
| 353 | void CallPullerMethod(const MethodCall& method_call); | ||
| 354 | |||
| 355 | /// Calls a GPU engine method. | ||
| 356 | void CallEngineMethod(const MethodCall& method_call); | ||
| 357 | |||
| 358 | /// Calls a GPU engine multivalue method. | ||
| 359 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 360 | u32 methods_pending); | ||
| 361 | |||
| 362 | /// Determines where the method should be executed. | ||
| 363 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | ||
| 364 | |||
| 365 | protected: | ||
| 366 | Core::System& system; | ||
| 367 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||
| 368 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 369 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 370 | std::unique_ptr<VideoCore::RendererBase> renderer; | ||
| 371 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 372 | const bool use_nvdec; | ||
| 373 | |||
| 374 | private: | 273 | private: |
| 375 | /// Mapping of command subchannels to their bound engine ids | 274 | struct Impl; |
| 376 | std::array<EngineID, 8> bound_engines = {}; | 275 | std::unique_ptr<Impl> impl; |
| 377 | /// 3D engine | ||
| 378 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 379 | /// 2D engine | ||
| 380 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 381 | /// Compute engine | ||
| 382 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 383 | /// DMA engine | ||
| 384 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 385 | /// Inline memory engine | ||
| 386 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 387 | /// Shader build notifier | ||
| 388 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 389 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||
| 390 | std::atomic_bool shutting_down{}; | ||
| 391 | |||
| 392 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 393 | |||
| 394 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 395 | |||
| 396 | std::mutex sync_mutex; | ||
| 397 | std::mutex device_mutex; | ||
| 398 | |||
| 399 | std::condition_variable sync_cv; | ||
| 400 | |||
| 401 | struct FlushRequest { | ||
| 402 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||
| 403 | : fence{fence_}, addr{addr_}, size{size_} {} | ||
| 404 | u64 fence; | ||
| 405 | VAddr addr; | ||
| 406 | std::size_t size; | ||
| 407 | }; | ||
| 408 | |||
| 409 | std::list<FlushRequest> flush_requests; | ||
| 410 | std::atomic<u64> current_flush_fence{}; | ||
| 411 | u64 last_flush_fence{}; | ||
| 412 | std::mutex flush_request_mutex; | ||
| 413 | |||
| 414 | const bool is_async; | ||
| 415 | |||
| 416 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 417 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 418 | }; | 276 | }; |
| 419 | 277 | ||
| 420 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 421 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 422 | "Field " #field_name " has invalid position") | ||
| 423 | |||
| 424 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 425 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 426 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 427 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 428 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 429 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 430 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 431 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 432 | |||
| 433 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 434 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 435 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 436 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 437 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 438 | |||
| 439 | #undef ASSERT_REG_POSITION | ||
| 440 | |||
| 441 | } // namespace Tegra | 278 | } // namespace Tegra |