diff options
| author | 2021-01-10 22:09:56 -0700 | |
|---|---|---|
| committer | 2021-01-10 22:09:56 -0700 | |
| commit | 7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch) | |
| tree | 5056f9406dec188439cb0deb87603498243a9412 /src/video_core/gpu.h | |
| parent | More forgetting... duh (diff) | |
| parent | Merge pull request #5229 from Morph1984/fullscreen-opt (diff) | |
| download | yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip | |
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/gpu.h')
| -rw-r--r-- | src/video_core/gpu.h | 163 |
1 files changed, 92 insertions, 71 deletions
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2d15d1c6f..d81e38680 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -13,14 +13,17 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | 14 | #include "core/hle/service/nvdrv/nvdata.h" |
| 15 | #include "core/hle/service/nvflinger/buffer_queue.h" | 15 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 16 | #include "video_core/cdma_pusher.h" | ||
| 16 | #include "video_core/dma_pusher.h" | 17 | #include "video_core/dma_pusher.h" |
| 18 | #include "video_core/framebuffer_config.h" | ||
| 19 | #include "video_core/gpu_thread.h" | ||
| 17 | 20 | ||
| 18 | using CacheAddr = std::uintptr_t; | 21 | using CacheAddr = std::uintptr_t; |
| 19 | inline CacheAddr ToCacheAddr(const void* host_ptr) { | 22 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { |
| 20 | return reinterpret_cast<CacheAddr>(host_ptr); | 23 | return reinterpret_cast<CacheAddr>(host_ptr); |
| 21 | } | 24 | } |
| 22 | 25 | ||
| 23 | inline u8* FromCacheAddr(CacheAddr cache_addr) { | 26 | [[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) { |
| 24 | return reinterpret_cast<u8*>(cache_addr); | 27 | return reinterpret_cast<u8*>(cache_addr); |
| 25 | } | 28 | } |
| 26 | 29 | ||
| @@ -100,28 +103,6 @@ enum class DepthFormat : u32 { | |||
| 100 | struct CommandListHeader; | 103 | struct CommandListHeader; |
| 101 | class DebugContext; | 104 | class DebugContext; |
| 102 | 105 | ||
| 103 | /** | ||
| 104 | * Struct describing framebuffer configuration | ||
| 105 | */ | ||
| 106 | struct FramebufferConfig { | ||
| 107 | enum class PixelFormat : u32 { | ||
| 108 | A8B8G8R8_UNORM = 1, | ||
| 109 | RGB565_UNORM = 4, | ||
| 110 | B8G8R8A8_UNORM = 5, | ||
| 111 | }; | ||
| 112 | |||
| 113 | VAddr address; | ||
| 114 | u32 offset; | ||
| 115 | u32 width; | ||
| 116 | u32 height; | ||
| 117 | u32 stride; | ||
| 118 | PixelFormat pixel_format; | ||
| 119 | |||
| 120 | using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||
| 121 | TransformFlags transform_flags; | ||
| 122 | Common::Rectangle<int> crop_rect; | ||
| 123 | }; | ||
| 124 | |||
| 125 | namespace Engines { | 106 | namespace Engines { |
| 126 | class Fermi2D; | 107 | class Fermi2D; |
| 127 | class Maxwell3D; | 108 | class Maxwell3D; |
| @@ -140,7 +121,7 @@ enum class EngineID { | |||
| 140 | 121 | ||
| 141 | class MemoryManager; | 122 | class MemoryManager; |
| 142 | 123 | ||
| 143 | class GPU { | 124 | class GPU final { |
| 144 | public: | 125 | public: |
| 145 | struct MethodCall { | 126 | struct MethodCall { |
| 146 | u32 method{}; | 127 | u32 method{}; |
| @@ -148,17 +129,17 @@ public: | |||
| 148 | u32 subchannel{}; | 129 | u32 subchannel{}; |
| 149 | u32 method_count{}; | 130 | u32 method_count{}; |
| 150 | 131 | ||
| 151 | bool IsLastCall() const { | 132 | explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0) |
| 133 | : method(method_), argument(argument_), subchannel(subchannel_), | ||
| 134 | method_count(method_count_) {} | ||
| 135 | |||
| 136 | [[nodiscard]] bool IsLastCall() const { | ||
| 152 | return method_count <= 1; | 137 | return method_count <= 1; |
| 153 | } | 138 | } |
| 154 | |||
| 155 | MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0) | ||
| 156 | : method(method), argument(argument), subchannel(subchannel), | ||
| 157 | method_count(method_count) {} | ||
| 158 | }; | 139 | }; |
| 159 | 140 | ||
| 160 | explicit GPU(Core::System& system, bool is_async); | 141 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); |
| 161 | virtual ~GPU(); | 142 | ~GPU(); |
| 162 | 143 | ||
| 163 | /// Binds a renderer to the GPU. | 144 | /// Binds a renderer to the GPU. |
| 164 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); | 145 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); |
| @@ -175,13 +156,13 @@ public: | |||
| 175 | /// Synchronizes CPU writes with Host GPU memory. | 156 | /// Synchronizes CPU writes with Host GPU memory. |
| 176 | void SyncGuestHost(); | 157 | void SyncGuestHost(); |
| 177 | /// Signal the ending of command list. | 158 | /// Signal the ending of command list. |
| 178 | virtual void OnCommandListEnd(); | 159 | void OnCommandListEnd(); |
| 179 | 160 | ||
| 180 | /// Request a host GPU memory flush from the CPU. | 161 | /// Request a host GPU memory flush from the CPU. |
| 181 | u64 RequestFlush(VAddr addr, std::size_t size); | 162 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 182 | 163 | ||
| 183 | /// Obtains current flush request fence id. | 164 | /// Obtains current flush request fence id. |
| 184 | u64 CurrentFlushRequestFence() const { | 165 | [[nodiscard]] u64 CurrentFlushRequestFence() const { |
| 185 | return current_flush_fence.load(std::memory_order_relaxed); | 166 | return current_flush_fence.load(std::memory_order_relaxed); |
| 186 | } | 167 | } |
| 187 | 168 | ||
| @@ -189,68 +170,100 @@ public: | |||
| 189 | void TickWork(); | 170 | void TickWork(); |
| 190 | 171 | ||
| 191 | /// Returns a reference to the Maxwell3D GPU engine. | 172 | /// Returns a reference to the Maxwell3D GPU engine. |
| 192 | Engines::Maxwell3D& Maxwell3D(); | 173 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); |
| 193 | 174 | ||
| 194 | /// Returns a const reference to the Maxwell3D GPU engine. | 175 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 195 | const Engines::Maxwell3D& Maxwell3D() const; | 176 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const; |
| 196 | 177 | ||
| 197 | /// Returns a reference to the KeplerCompute GPU engine. | 178 | /// Returns a reference to the KeplerCompute GPU engine. |
| 198 | Engines::KeplerCompute& KeplerCompute(); | 179 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute(); |
| 199 | 180 | ||
| 200 | /// Returns a reference to the KeplerCompute GPU engine. | 181 | /// Returns a reference to the KeplerCompute GPU engine. |
| 201 | const Engines::KeplerCompute& KeplerCompute() const; | 182 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const; |
| 202 | 183 | ||
| 203 | /// Returns a reference to the GPU memory manager. | 184 | /// Returns a reference to the GPU memory manager. |
| 204 | Tegra::MemoryManager& MemoryManager(); | 185 | [[nodiscard]] Tegra::MemoryManager& MemoryManager(); |
| 205 | 186 | ||
| 206 | /// Returns a const reference to the GPU memory manager. | 187 | /// Returns a const reference to the GPU memory manager. |
| 207 | const Tegra::MemoryManager& MemoryManager() const; | 188 | [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const; |
| 208 | 189 | ||
| 209 | /// Returns a reference to the GPU DMA pusher. | 190 | /// Returns a reference to the GPU DMA pusher. |
| 210 | Tegra::DmaPusher& DmaPusher(); | 191 | [[nodiscard]] Tegra::DmaPusher& DmaPusher(); |
| 192 | |||
| 193 | /// Returns a const reference to the GPU DMA pusher. | ||
| 194 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const; | ||
| 195 | |||
| 196 | /// Returns a reference to the GPU CDMA pusher. | ||
| 197 | [[nodiscard]] Tegra::CDmaPusher& CDmaPusher(); | ||
| 198 | |||
| 199 | /// Returns a const reference to the GPU CDMA pusher. | ||
| 200 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; | ||
| 211 | 201 | ||
| 212 | VideoCore::RendererBase& Renderer() { | 202 | /// Returns a reference to the underlying renderer. |
| 203 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | ||
| 213 | return *renderer; | 204 | return *renderer; |
| 214 | } | 205 | } |
| 215 | 206 | ||
| 216 | const VideoCore::RendererBase& Renderer() const { | 207 | /// Returns a const reference to the underlying renderer. |
| 208 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | ||
| 217 | return *renderer; | 209 | return *renderer; |
| 218 | } | 210 | } |
| 219 | 211 | ||
| 220 | VideoCore::ShaderNotify& ShaderNotify() { | 212 | /// Returns a reference to the shader notifier. |
| 213 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | ||
| 221 | return *shader_notify; | 214 | return *shader_notify; |
| 222 | } | 215 | } |
| 223 | 216 | ||
| 224 | const VideoCore::ShaderNotify& ShaderNotify() const { | 217 | /// Returns a const reference to the shader notifier. |
| 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | ||
| 225 | return *shader_notify; | 219 | return *shader_notify; |
| 226 | } | 220 | } |
| 227 | 221 | ||
| 228 | // Waits for the GPU to finish working | 222 | // Waits for the GPU to finish working |
| 229 | virtual void WaitIdle() const = 0; | 223 | void WaitIdle() const; |
| 230 | 224 | ||
| 231 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 225 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 232 | void WaitFence(u32 syncpoint_id, u32 value); | 226 | void WaitFence(u32 syncpoint_id, u32 value); |
| 233 | 227 | ||
| 234 | void IncrementSyncPoint(u32 syncpoint_id); | 228 | void IncrementSyncPoint(u32 syncpoint_id); |
| 235 | 229 | ||
| 236 | u32 GetSyncpointValue(u32 syncpoint_id) const; | 230 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const; |
| 237 | 231 | ||
| 238 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); | 232 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); |
| 239 | 233 | ||
| 240 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | 234 | [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); |
| 241 | 235 | ||
| 242 | u64 GetTicks() const; | 236 | [[nodiscard]] u64 GetTicks() const; |
| 243 | 237 | ||
| 244 | std::unique_lock<std::mutex> LockSync() { | 238 | [[nodiscard]] std::unique_lock<std::mutex> LockSync() { |
| 245 | return std::unique_lock{sync_mutex}; | 239 | return std::unique_lock{sync_mutex}; |
| 246 | } | 240 | } |
| 247 | 241 | ||
| 248 | bool IsAsync() const { | 242 | [[nodiscard]] bool IsAsync() const { |
| 249 | return is_async; | 243 | return is_async; |
| 250 | } | 244 | } |
| 251 | 245 | ||
| 252 | /// Returns a const reference to the GPU DMA pusher. | 246 | [[nodiscard]] bool UseNvdec() const { |
| 253 | const Tegra::DmaPusher& DmaPusher() const; | 247 | return use_nvdec; |
| 248 | } | ||
| 249 | |||
| 250 | enum class FenceOperation : u32 { | ||
| 251 | Acquire = 0, | ||
| 252 | Increment = 1, | ||
| 253 | }; | ||
| 254 | |||
| 255 | union FenceAction { | ||
| 256 | u32 raw; | ||
| 257 | BitField<0, 1, FenceOperation> op; | ||
| 258 | BitField<8, 24, u32> syncpoint_id; | ||
| 259 | |||
| 260 | [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 261 | FenceAction result{}; | ||
| 262 | result.op.Assign(op); | ||
| 263 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 264 | return {result.raw}; | ||
| 265 | } | ||
| 266 | }; | ||
| 254 | 267 | ||
| 255 | struct Regs { | 268 | struct Regs { |
| 256 | static constexpr size_t NUM_REGS = 0x40; | 269 | static constexpr size_t NUM_REGS = 0x40; |
| @@ -262,7 +275,7 @@ public: | |||
| 262 | u32 address_high; | 275 | u32 address_high; |
| 263 | u32 address_low; | 276 | u32 address_low; |
| 264 | 277 | ||
| 265 | GPUVAddr SemaphoreAddress() const { | 278 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { |
| 266 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | 279 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 267 | address_low); | 280 | address_low); |
| 268 | } | 281 | } |
| @@ -280,10 +293,7 @@ public: | |||
| 280 | u32 semaphore_acquire; | 293 | u32 semaphore_acquire; |
| 281 | u32 semaphore_release; | 294 | u32 semaphore_release; |
| 282 | u32 fence_value; | 295 | u32 fence_value; |
| 283 | union { | 296 | FenceAction fence_action; |
| 284 | BitField<4, 4, u32> operation; | ||
| 285 | BitField<8, 8, u32> id; | ||
| 286 | } fence_action; | ||
| 287 | INSERT_UNION_PADDING_WORDS(0xE2); | 297 | INSERT_UNION_PADDING_WORDS(0xE2); |
| 288 | 298 | ||
| 289 | // Puller state | 299 | // Puller state |
| @@ -300,34 +310,39 @@ public: | |||
| 300 | /// Performs any additional setup necessary in order to begin GPU emulation. | 310 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 301 | /// This can be used to launch any necessary threads and register any necessary | 311 | /// This can be used to launch any necessary threads and register any necessary |
| 302 | /// core timing events. | 312 | /// core timing events. |
| 303 | virtual void Start() = 0; | 313 | void Start(); |
| 304 | 314 | ||
| 305 | /// Obtain the CPU Context | 315 | /// Obtain the CPU Context |
| 306 | virtual void ObtainContext() = 0; | 316 | void ObtainContext(); |
| 307 | 317 | ||
| 308 | /// Release the CPU Context | 318 | /// Release the CPU Context |
| 309 | virtual void ReleaseContext() = 0; | 319 | void ReleaseContext(); |
| 310 | 320 | ||
| 311 | /// Push GPU command entries to be processed | 321 | /// Push GPU command entries to be processed |
| 312 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 322 | void PushGPUEntries(Tegra::CommandList&& entries); |
| 323 | |||
| 324 | /// Push GPU command buffer entries to be processed | ||
| 325 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); | ||
| 313 | 326 | ||
| 314 | /// Swap buffers (render frame) | 327 | /// Swap buffers (render frame) |
| 315 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | 328 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 316 | 329 | ||
| 317 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 330 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 318 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 331 | void FlushRegion(VAddr addr, u64 size); |
| 319 | 332 | ||
| 320 | /// Notify rasterizer that any caches of the specified region should be invalidated | 333 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 321 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 334 | void InvalidateRegion(VAddr addr, u64 size); |
| 322 | 335 | ||
| 323 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 336 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 324 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 337 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 325 | 338 | ||
| 326 | protected: | 339 | protected: |
| 327 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | 340 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; |
| 328 | 341 | ||
| 329 | private: | 342 | private: |
| 330 | void ProcessBindMethod(const MethodCall& method_call); | 343 | void ProcessBindMethod(const MethodCall& method_call); |
| 344 | void ProcessFenceActionMethod(); | ||
| 345 | void ProcessWaitForInterruptMethod(); | ||
| 331 | void ProcessSemaphoreTriggerMethod(); | 346 | void ProcessSemaphoreTriggerMethod(); |
| 332 | void ProcessSemaphoreRelease(); | 347 | void ProcessSemaphoreRelease(); |
| 333 | void ProcessSemaphoreAcquire(); | 348 | void ProcessSemaphoreAcquire(); |
| @@ -343,13 +358,15 @@ private: | |||
| 343 | u32 methods_pending); | 358 | u32 methods_pending); |
| 344 | 359 | ||
| 345 | /// Determines where the method should be executed. | 360 | /// Determines where the method should be executed. |
| 346 | bool ExecuteMethodOnEngine(u32 method); | 361 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); |
| 347 | 362 | ||
| 348 | protected: | 363 | protected: |
| 349 | Core::System& system; | 364 | Core::System& system; |
| 350 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 365 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 351 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 366 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 367 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 352 | std::unique_ptr<VideoCore::RendererBase> renderer; | 368 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 369 | const bool use_nvdec; | ||
| 353 | 370 | ||
| 354 | private: | 371 | private: |
| 355 | /// Mapping of command subchannels to their bound engine ids | 372 | /// Mapping of command subchannels to their bound engine ids |
| @@ -372,12 +389,13 @@ private: | |||
| 372 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | 389 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; |
| 373 | 390 | ||
| 374 | std::mutex sync_mutex; | 391 | std::mutex sync_mutex; |
| 392 | std::mutex device_mutex; | ||
| 375 | 393 | ||
| 376 | std::condition_variable sync_cv; | 394 | std::condition_variable sync_cv; |
| 377 | 395 | ||
| 378 | struct FlushRequest { | 396 | struct FlushRequest { |
| 379 | FlushRequest(u64 fence, VAddr addr, std::size_t size) | 397 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) |
| 380 | : fence{fence}, addr{addr}, size{size} {} | 398 | : fence{fence_}, addr{addr_}, size{size_} {} |
| 381 | u64 fence; | 399 | u64 fence; |
| 382 | VAddr addr; | 400 | VAddr addr; |
| 383 | std::size_t size; | 401 | std::size_t size; |
| @@ -389,6 +407,9 @@ private: | |||
| 389 | std::mutex flush_request_mutex; | 407 | std::mutex flush_request_mutex; |
| 390 | 408 | ||
| 391 | const bool is_async; | 409 | const bool is_async; |
| 410 | |||
| 411 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 412 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 392 | }; | 413 | }; |
| 393 | 414 | ||
| 394 | #define ASSERT_REG_POSITION(field_name, position) \ | 415 | #define ASSERT_REG_POSITION(field_name, position) \ |