summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp13
-rw-r--r--src/video_core/cdma_pusher.cpp1
-rw-r--r--src/video_core/cdma_pusher.h2
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/framebuffer_config.h20
-rw-r--r--src/video_core/gpu.cpp1220
-rw-r--r--src/video_core/gpu.h217
-rw-r--r--src/video_core/gpu_thread.h3
-rw-r--r--src/video_core/query_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/shader_environment.cpp1
-rw-r--r--src/video_core/shader_environment.h4
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp1
-rw-r--r--src/video_core/texture_cache/texture_cache.h5
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
19 files changed, 875 insertions, 632 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 789000294..4ee8c5733 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -48,8 +48,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
48 addr, offset, width, height, stride, format); 48 addr, offset, width, height, stride, format);
49 49
50 const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); 50 const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
51 const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, 51 const auto transform_flags = static_cast<Tegra::FramebufferConfig::TransformFlags>(transform);
52 stride, pixel_format, transform, crop_rect}; 52 const Tegra::FramebufferConfig framebuffer{addr, offset, width, height,
53 stride, pixel_format, transform_flags, crop_rect};
53 54
54 system.GetPerfStats().EndSystemFrame(); 55 system.GetPerfStats().EndSystemFrame();
55 system.GPU().SwapBuffers(&framebuffer); 56 system.GPU().SwapBuffers(&framebuffer);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index c0a380088..54ac105d5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -13,6 +13,14 @@
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
16namespace {
17Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
18 Tegra::GPU::FenceAction result{};
19 result.op.Assign(op);
20 result.syncpoint_id.Assign(syncpoint_id);
21 return {result.raw};
22}
23} // namespace
16 24
17nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, 25nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
18 SyncpointManager& syncpoint_manager_) 26 SyncpointManager& syncpoint_manager_)
@@ -187,7 +195,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
187 {fence.value}, 195 {fence.value},
188 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 196 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
189 Tegra::SubmissionMode::Increasing), 197 Tegra::SubmissionMode::Increasing),
190 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), 198 BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
191 }; 199 };
192} 200}
193 201
@@ -200,8 +208,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence,
200 for (u32 count = 0; count < add_increment; ++count) { 208 for (u32 count = 0; count < add_increment; ++count) {
201 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 209 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
202 Tegra::SubmissionMode::Increasing)); 210 Tegra::SubmissionMode::Increasing));
203 result.emplace_back( 211 result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
204 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
205 } 212 }
206 213
207 return result; 214 return result;
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 8b86ad050..a8c4b4415 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -24,6 +24,7 @@
24#include "command_classes/vic.h" 24#include "command_classes/vic.h"
25#include "video_core/cdma_pusher.h" 25#include "video_core/cdma_pusher.h"
26#include "video_core/command_classes/nvdec_common.h" 26#include "video_core/command_classes/nvdec_common.h"
27#include "video_core/command_classes/sync_manager.h"
27#include "video_core/engines/maxwell_3d.h" 28#include "video_core/engines/maxwell_3d.h"
28#include "video_core/gpu.h" 29#include "video_core/gpu.h"
29#include "video_core/memory_manager.h" 30#include "video_core/memory_manager.h"
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 1bada44dd..87b49d6ea 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -9,13 +9,13 @@
9 9
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/command_classes/sync_manager.h"
13 12
14namespace Tegra { 13namespace Tegra {
15 14
16class GPU; 15class GPU;
17class Host1x; 16class Host1x;
18class Nvdec; 17class Nvdec;
18class SyncptIncrManager;
19class Vic; 19class Vic;
20 20
21enum class ChSubmissionMode : u32 { 21enum class ChSubmissionMode : u32 {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7f4ca6282..f22342dfb 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <cmath>
9#include <limits> 10#include <limits>
10#include <optional> 11#include <optional>
11#include <type_traits> 12#include <type_traits>
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index b86c3a757..b1d455e30 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Tegra { 7#include "common/common_types.h"
8#include "common/math_util.h"
8 9
10namespace Tegra {
9/** 11/**
10 * Struct describing framebuffer configuration 12 * Struct describing framebuffer configuration
11 */ 13 */
@@ -16,6 +18,21 @@ struct FramebufferConfig {
16 B8G8R8A8_UNORM = 5, 18 B8G8R8A8_UNORM = 5,
17 }; 19 };
18 20
21 enum class TransformFlags : u32 {
22 /// No transform flags are set
23 Unset = 0x00,
24 /// Flip source image horizontally (around the vertical axis)
25 FlipH = 0x01,
26 /// Flip source image vertically (around the horizontal axis)
27 FlipV = 0x02,
28 /// Rotate source image 90 degrees clockwise
29 Rotate90 = 0x04,
30 /// Rotate source image 180 degrees
31 Rotate180 = 0x03,
32 /// Rotate source image 270 degrees clockwise
33 Rotate270 = 0x07,
34 };
35
19 VAddr address{}; 36 VAddr address{};
20 u32 offset{}; 37 u32 offset{};
21 u32 width{}; 38 u32 width{};
@@ -23,7 +40,6 @@ struct FramebufferConfig {
23 u32 stride{}; 40 u32 stride{};
24 PixelFormat pixel_format{}; 41 PixelFormat pixel_format{};
25 42
26 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
27 TransformFlags transform_flags{}; 43 TransformFlags transform_flags{};
28 Common::Rectangle<int> crop_rect; 44 Common::Rectangle<int> crop_rect;
29}; 45};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2ae3639b5..520675873 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,540 +2,920 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <atomic>
5#include <chrono> 7#include <chrono>
8#include <condition_variable>
9#include <list>
10#include <memory>
6 11
7#include "common/assert.h" 12#include "common/assert.h"
8#include "common/microprofile.h" 13#include "common/microprofile.h"
9#include "common/settings.h" 14#include "common/settings.h"
10#include "core/core.h" 15#include "core/core.h"
11#include "core/core_timing.h" 16#include "core/core_timing.h"
12#include "core/core_timing_util.h"
13#include "core/frontend/emu_window.h" 17#include "core/frontend/emu_window.h"
14#include "core/hardware_interrupt_manager.h" 18#include "core/hardware_interrupt_manager.h"
15#include "core/memory.h" 19#include "core/hle/service/nvdrv/nvdata.h"
20#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "core/perf_stats.h" 21#include "core/perf_stats.h"
22#include "video_core/cdma_pusher.h"
23#include "video_core/dma_pusher.h"
17#include "video_core/engines/fermi_2d.h" 24#include "video_core/engines/fermi_2d.h"
18#include "video_core/engines/kepler_compute.h" 25#include "video_core/engines/kepler_compute.h"
19#include "video_core/engines/kepler_memory.h" 26#include "video_core/engines/kepler_memory.h"
20#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
22#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h"
23#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
24#include "video_core/renderer_base.h" 32#include "video_core/renderer_base.h"
25#include "video_core/shader_notify.h" 33#include "video_core/shader_notify.h"
26#include "video_core/video_core.h"
27 34
28namespace Tegra { 35namespace Tegra {
29 36
30MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 37MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
31 38
32GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) 39struct GPU::Impl {
33 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
34 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, 41 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 42 system)},
36 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 43 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 44 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 45 fermi_2d{std::make_unique<Engines::Fermi2D>()},
39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 46 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 47 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
41 gpu_thread{system_, is_async_} {} 48 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
49 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
50 gpu_thread{system_, is_async_} {}
51
52 ~Impl() = default;
53
54 /// Binds a renderer to the GPU.
55 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
56 renderer = std::move(renderer_);
57 rasterizer = renderer->ReadRasterizer();
58
59 memory_manager->BindRasterizer(rasterizer);
60 maxwell_3d->BindRasterizer(rasterizer);
61 fermi_2d->BindRasterizer(rasterizer);
62 kepler_compute->BindRasterizer(rasterizer);
63 maxwell_dma->BindRasterizer(rasterizer);
64 }
65
66 /// Calls a GPU method.
67 void CallMethod(const GPU::MethodCall& method_call) {
68 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
69 method_call.subchannel);
70
71 ASSERT(method_call.subchannel < bound_engines.size());
72
73 if (ExecuteMethodOnEngine(method_call.method)) {
74 CallEngineMethod(method_call);
75 } else {
76 CallPullerMethod(method_call);
77 }
78 }
79
80 /// Calls a GPU multivalue method.
81 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
82 u32 methods_pending) {
83 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
84
85 ASSERT(subchannel < bound_engines.size());
86
87 if (ExecuteMethodOnEngine(method)) {
88 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
89 } else {
90 for (std::size_t i = 0; i < amount; i++) {
91 CallPullerMethod(GPU::MethodCall{
92 method,
93 base_start[i],
94 subchannel,
95 methods_pending - static_cast<u32>(i),
96 });
97 }
98 }
99 }
100
101 /// Flush all current written commands into the host GPU for execution.
102 void FlushCommands() {
103 rasterizer->FlushCommands();
104 }
105
106 /// Synchronizes CPU writes with Host GPU memory.
107 void SyncGuestHost() {
108 rasterizer->SyncGuestHost();
109 }
110
111 /// Signal the ending of command list.
112 void OnCommandListEnd() {
113 if (is_async) {
114 // This command only applies to asynchronous GPU mode
115 gpu_thread.OnCommandListEnd();
116 }
117 }
118
119 /// Request a host GPU memory flush from the CPU.
120 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
121 std::unique_lock lck{flush_request_mutex};
122 const u64 fence = ++last_flush_fence;
123 flush_requests.emplace_back(fence, addr, size);
124 return fence;
125 }
126
127 /// Obtains current flush request fence id.
128 [[nodiscard]] u64 CurrentFlushRequestFence() const {
129 return current_flush_fence.load(std::memory_order_relaxed);
130 }
131
132 /// Tick pending requests within the GPU.
133 void TickWork() {
134 std::unique_lock lck{flush_request_mutex};
135 while (!flush_requests.empty()) {
136 auto& request = flush_requests.front();
137 const u64 fence = request.fence;
138 const VAddr addr = request.addr;
139 const std::size_t size = request.size;
140 flush_requests.pop_front();
141 flush_request_mutex.unlock();
142 rasterizer->FlushRegion(addr, size);
143 current_flush_fence.store(fence);
144 flush_request_mutex.lock();
145 }
146 }
147
148 /// Returns a reference to the Maxwell3D GPU engine.
149 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
150 return *maxwell_3d;
151 }
152
153 /// Returns a const reference to the Maxwell3D GPU engine.
154 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
155 return *maxwell_3d;
156 }
157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
160 return *kepler_compute;
161 }
162
163 /// Returns a reference to the KeplerCompute GPU engine.
164 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
165 return *kepler_compute;
166 }
167
168 /// Returns a reference to the GPU memory manager.
169 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
170 return *memory_manager;
171 }
172
173 /// Returns a const reference to the GPU memory manager.
174 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
175 return *memory_manager;
176 }
177
178 /// Returns a reference to the GPU DMA pusher.
179 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
180 return *dma_pusher;
181 }
182
183 /// Returns a const reference to the GPU DMA pusher.
184 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
185 return *dma_pusher;
186 }
187
188 /// Returns a reference to the GPU CDMA pusher.
189 [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
190 return *cdma_pusher;
191 }
192
193 /// Returns a const reference to the GPU CDMA pusher.
194 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
195 return *cdma_pusher;
196 }
197
198 /// Returns a reference to the underlying renderer.
199 [[nodiscard]] VideoCore::RendererBase& Renderer() {
200 return *renderer;
201 }
202
203 /// Returns a const reference to the underlying renderer.
204 [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
205 return *renderer;
206 }
207
208 /// Returns a reference to the shader notifier.
209 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
210 return *shader_notify;
211 }
212
213 /// Returns a const reference to the shader notifier.
214 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
215 return *shader_notify;
216 }
217
218 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
219 void WaitFence(u32 syncpoint_id, u32 value) {
220 // Synced GPU, is always in sync
221 if (!is_async) {
222 return;
223 }
224 if (syncpoint_id == UINT32_MAX) {
225 // TODO: Research what this does.
226 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
227 return;
228 }
229 MICROPROFILE_SCOPE(GPU_wait);
230 std::unique_lock lock{sync_mutex};
231 sync_cv.wait(lock, [=, this] {
232 if (shutting_down.load(std::memory_order_relaxed)) {
233 // We're shutting down, ensure no threads continue to wait for the next syncpoint
234 return true;
235 }
236 return syncpoints.at(syncpoint_id).load() >= value;
237 });
238 }
239
240 void IncrementSyncPoint(u32 syncpoint_id) {
241 auto& syncpoint = syncpoints.at(syncpoint_id);
242 syncpoint++;
243 std::lock_guard lock{sync_mutex};
244 sync_cv.notify_all();
245 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
246 if (!interrupt.empty()) {
247 u32 value = syncpoint.load();
248 auto it = interrupt.begin();
249 while (it != interrupt.end()) {
250 if (value >= *it) {
251 TriggerCpuInterrupt(syncpoint_id, *it);
252 it = interrupt.erase(it);
253 continue;
254 }
255 it++;
256 }
257 }
258 }
259
260 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
261 return syncpoints.at(syncpoint_id).load();
262 }
263
264 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
265 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
266 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
267 [value](u32 in_value) { return in_value == value; });
268 if (contains) {
269 return;
270 }
271 interrupt.emplace_back(value);
272 }
273
274 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
275 std::lock_guard lock{sync_mutex};
276 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
277 const auto iter =
278 std::find_if(interrupt.begin(), interrupt.end(),
279 [value](u32 interrupt_value) { return value == interrupt_value; });
280
281 if (iter == interrupt.end()) {
282 return false;
283 }
284 interrupt.erase(iter);
285 return true;
286 }
287
288 [[nodiscard]] u64 GetTicks() const {
289 // This values were reversed engineered by fincs from NVN
290 // The gpu clock is reported in units of 385/625 nanoseconds
291 constexpr u64 gpu_ticks_num = 384;
292 constexpr u64 gpu_ticks_den = 625;
293
294 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
295 if (Settings::values.use_fast_gpu_time.GetValue()) {
296 nanoseconds /= 256;
297 }
298 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
299 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
300 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
301 }
302
303 [[nodiscard]] std::unique_lock<std::mutex> LockSync() {
304 return std::unique_lock{sync_mutex};
305 }
306
307 [[nodiscard]] bool IsAsync() const {
308 return is_async;
309 }
310
311 [[nodiscard]] bool UseNvdec() const {
312 return use_nvdec;
313 }
314
315 void RendererFrameEndNotify() {
316 system.GetPerfStats().EndGameFrame();
317 }
318
319 /// Performs any additional setup necessary in order to begin GPU emulation.
320 /// This can be used to launch any necessary threads and register any necessary
321 /// core timing events.
322 void Start() {
323 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
324 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
325 cpu_context->MakeCurrent();
326 }
327
328 /// Obtain the CPU Context
329 void ObtainContext() {
330 cpu_context->MakeCurrent();
331 }
332
333 /// Release the CPU Context
334 void ReleaseContext() {
335 cpu_context->DoneCurrent();
336 }
337
338 /// Push GPU command entries to be processed
339 void PushGPUEntries(Tegra::CommandList&& entries) {
340 gpu_thread.SubmitList(std::move(entries));
341 }
342
343 /// Push GPU command buffer entries to be processed
344 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
345 if (!use_nvdec) {
346 return;
347 }
348
349 if (!cdma_pusher) {
350 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
351 }
352
353 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
354 // TODO(ameerj): RE proper async nvdec operation
355 // gpu_thread.SubmitCommandBuffer(std::move(entries));
356
357 cdma_pusher->ProcessEntries(std::move(entries));
358 }
359
360 /// Frees the CDMAPusher instance to free up resources
361 void ClearCdmaInstance() {
362 cdma_pusher.reset();
363 }
364
365 /// Swap buffers (render frame)
366 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
367 gpu_thread.SwapBuffers(framebuffer);
368 }
369
370 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
371 void FlushRegion(VAddr addr, u64 size) {
372 gpu_thread.FlushRegion(addr, size);
373 }
374
375 /// Notify rasterizer that any caches of the specified region should be invalidated
376 void InvalidateRegion(VAddr addr, u64 size) {
377 gpu_thread.InvalidateRegion(addr, size);
378 }
379
380 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
381 void FlushAndInvalidateRegion(VAddr addr, u64 size) {
382 gpu_thread.FlushAndInvalidateRegion(addr, size);
383 }
384
385 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const {
386 auto& interrupt_manager = system.InterruptManager();
387 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
388 }
389
390 void ProcessBindMethod(const GPU::MethodCall& method_call) {
391 // Bind the current subchannel to the desired engine id.
392 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
393 method_call.argument);
394 const auto engine_id = static_cast<EngineID>(method_call.argument);
395 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
396 switch (engine_id) {
397 case EngineID::FERMI_TWOD_A:
398 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
399 break;
400 case EngineID::MAXWELL_B:
401 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
402 break;
403 case EngineID::KEPLER_COMPUTE_B:
404 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
405 break;
406 case EngineID::MAXWELL_DMA_COPY_A:
407 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
408 break;
409 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
410 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
411 break;
412 default:
413 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
414 }
415 }
416
417 void ProcessFenceActionMethod() {
418 switch (regs.fence_action.op) {
419 case GPU::FenceOperation::Acquire:
420 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
421 break;
422 case GPU::FenceOperation::Increment:
423 IncrementSyncPoint(regs.fence_action.syncpoint_id);
424 break;
425 default:
426 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
427 }
428 }
429
430 void ProcessWaitForInterruptMethod() {
431 // TODO(bunnei) ImplementMe
432 LOG_WARNING(HW_GPU, "(STUBBED) called");
433 }
434
435 void ProcessSemaphoreTriggerMethod() {
436 const auto semaphoreOperationMask = 0xF;
437 const auto op =
438 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
439 if (op == GpuSemaphoreOperation::WriteLong) {
440 struct Block {
441 u32 sequence;
442 u32 zeros = 0;
443 u64 timestamp;
444 };
445
446 Block block{};
447 block.sequence = regs.semaphore_sequence;
448 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
449 // CoreTiming
450 block.timestamp = GetTicks();
451 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
452 sizeof(block));
453 } else {
454 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
455 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
456 (op == GpuSemaphoreOperation::AcquireGequal &&
457 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
458 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
459 // Nothing to do in this case
460 } else {
461 regs.acquire_source = true;
462 regs.acquire_value = regs.semaphore_sequence;
463 if (op == GpuSemaphoreOperation::AcquireEqual) {
464 regs.acquire_active = true;
465 regs.acquire_mode = false;
466 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
467 regs.acquire_active = true;
468 regs.acquire_mode = true;
469 } else if (op == GpuSemaphoreOperation::AcquireMask) {
470 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
471 // semaphore_sequence, gives a non-0 result
472 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
473 } else {
474 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
475 }
476 }
477 }
478 }
479
480 void ProcessSemaphoreRelease() {
481 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
482 regs.semaphore_release);
483 }
484
485 void ProcessSemaphoreAcquire() {
486 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
487 const auto value = regs.semaphore_acquire;
488 if (word != value) {
489 regs.acquire_active = true;
490 regs.acquire_value = value;
491 // TODO(kemathe73) figure out how to do the acquire_timeout
492 regs.acquire_mode = false;
493 regs.acquire_source = false;
494 }
495 }
496
497 /// Calls a GPU puller method.
498 void CallPullerMethod(const GPU::MethodCall& method_call) {
499 regs.reg_array[method_call.method] = method_call.argument;
500 const auto method = static_cast<BufferMethods>(method_call.method);
501
502 switch (method) {
503 case BufferMethods::BindObject: {
504 ProcessBindMethod(method_call);
505 break;
506 }
507 case BufferMethods::Nop:
508 case BufferMethods::SemaphoreAddressHigh:
509 case BufferMethods::SemaphoreAddressLow:
510 case BufferMethods::SemaphoreSequence:
511 case BufferMethods::UnkCacheFlush:
512 case BufferMethods::WrcacheFlush:
513 case BufferMethods::FenceValue:
514 break;
515 case BufferMethods::RefCnt:
516 rasterizer->SignalReference();
517 break;
518 case BufferMethods::FenceAction:
519 ProcessFenceActionMethod();
520 break;
521 case BufferMethods::WaitForInterrupt:
522 ProcessWaitForInterruptMethod();
523 break;
524 case BufferMethods::SemaphoreTrigger: {
525 ProcessSemaphoreTriggerMethod();
526 break;
527 }
528 case BufferMethods::NotifyIntr: {
529 // TODO(Kmather73): Research and implement this method.
530 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
531 break;
532 }
533 case BufferMethods::Unk28: {
534 // TODO(Kmather73): Research and implement this method.
535 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
536 break;
537 }
538 case BufferMethods::SemaphoreAcquire: {
539 ProcessSemaphoreAcquire();
540 break;
541 }
542 case BufferMethods::SemaphoreRelease: {
543 ProcessSemaphoreRelease();
544 break;
545 }
546 case BufferMethods::Yield: {
547 // TODO(Kmather73): Research and implement this method.
548 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
549 break;
550 }
551 default:
552 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
553 break;
554 }
555 }
556
557 /// Calls a GPU engine method.
558 void CallEngineMethod(const GPU::MethodCall& method_call) {
559 const EngineID engine = bound_engines[method_call.subchannel];
560
561 switch (engine) {
562 case EngineID::FERMI_TWOD_A:
563 fermi_2d->CallMethod(method_call.method, method_call.argument,
564 method_call.IsLastCall());
565 break;
566 case EngineID::MAXWELL_B:
567 maxwell_3d->CallMethod(method_call.method, method_call.argument,
568 method_call.IsLastCall());
569 break;
570 case EngineID::KEPLER_COMPUTE_B:
571 kepler_compute->CallMethod(method_call.method, method_call.argument,
572 method_call.IsLastCall());
573 break;
574 case EngineID::MAXWELL_DMA_COPY_A:
575 maxwell_dma->CallMethod(method_call.method, method_call.argument,
576 method_call.IsLastCall());
577 break;
578 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
579 kepler_memory->CallMethod(method_call.method, method_call.argument,
580 method_call.IsLastCall());
581 break;
582 default:
583 UNIMPLEMENTED_MSG("Unimplemented engine");
584 }
585 }
586
587 /// Calls a GPU engine multivalue method.
588 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
589 u32 methods_pending) {
590 const EngineID engine = bound_engines[subchannel];
591
592 switch (engine) {
593 case EngineID::FERMI_TWOD_A:
594 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
595 break;
596 case EngineID::MAXWELL_B:
597 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
598 break;
599 case EngineID::KEPLER_COMPUTE_B:
600 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
601 break;
602 case EngineID::MAXWELL_DMA_COPY_A:
603 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
604 break;
605 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
606 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
607 break;
608 default:
609 UNIMPLEMENTED_MSG("Unimplemented engine");
610 }
611 }
612
613 /// Determines where the method should be executed.
614 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
615 const auto buffer_method = static_cast<BufferMethods>(method);
616 return buffer_method >= BufferMethods::NonPullerMethods;
617 }
618
619 struct Regs {
620 static constexpr size_t NUM_REGS = 0x40;
621
622 union {
623 struct {
624 INSERT_PADDING_WORDS_NOINIT(0x4);
625 struct {
626 u32 address_high;
627 u32 address_low;
628
629 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
630 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
631 address_low);
632 }
633 } semaphore_address;
634
635 u32 semaphore_sequence;
636 u32 semaphore_trigger;
637 INSERT_PADDING_WORDS_NOINIT(0xC);
638
639 // The pusher and the puller share the reference counter, the pusher only has read
640 // access
641 u32 reference_count;
642 INSERT_PADDING_WORDS_NOINIT(0x5);
643
644 u32 semaphore_acquire;
645 u32 semaphore_release;
646 u32 fence_value;
647 GPU::FenceAction fence_action;
648 INSERT_PADDING_WORDS_NOINIT(0xE2);
649
650 // Puller state
651 u32 acquire_mode;
652 u32 acquire_source;
653 u32 acquire_active;
654 u32 acquire_timeout;
655 u32 acquire_value;
656 };
657 std::array<u32, NUM_REGS> reg_array;
658 };
659 } regs{};
660
661 GPU& gpu;
662 Core::System& system;
663 std::unique_ptr<Tegra::MemoryManager> memory_manager;
664 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
665 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
666 std::unique_ptr<VideoCore::RendererBase> renderer;
667 VideoCore::RasterizerInterface* rasterizer = nullptr;
668 const bool use_nvdec;
669
670 /// Mapping of command subchannels to their bound engine ids
671 std::array<EngineID, 8> bound_engines{};
672 /// 3D engine
673 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
674 /// 2D engine
675 std::unique_ptr<Engines::Fermi2D> fermi_2d;
676 /// Compute engine
677 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
678 /// DMA engine
679 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
680 /// Inline memory engine
681 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
682 /// Shader build notifier
683 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
684 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
685 std::atomic_bool shutting_down{};
686
687 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
688
689 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
690
691 std::mutex sync_mutex;
692 std::mutex device_mutex;
693
694 std::condition_variable sync_cv;
695
696 struct FlushRequest {
697 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
698 : fence{fence_}, addr{addr_}, size{size_} {}
699 u64 fence;
700 VAddr addr;
701 std::size_t size;
702 };
703
704 std::list<FlushRequest> flush_requests;
705 std::atomic<u64> current_flush_fence{};
706 u64 last_flush_fence{};
707 std::mutex flush_request_mutex;
708
709 const bool is_async;
710
711 VideoCommon::GPUThread::ThreadManager gpu_thread;
712 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
713
714#define ASSERT_REG_POSITION(field_name, position) \
715 static_assert(offsetof(Regs, field_name) == position * 4, \
716 "Field " #field_name " has invalid position")
717
718 ASSERT_REG_POSITION(semaphore_address, 0x4);
719 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
720 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
721 ASSERT_REG_POSITION(reference_count, 0x14);
722 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
723 ASSERT_REG_POSITION(semaphore_release, 0x1B);
724 ASSERT_REG_POSITION(fence_value, 0x1C);
725 ASSERT_REG_POSITION(fence_action, 0x1D);
726
727 ASSERT_REG_POSITION(acquire_mode, 0x100);
728 ASSERT_REG_POSITION(acquire_source, 0x101);
729 ASSERT_REG_POSITION(acquire_active, 0x102);
730 ASSERT_REG_POSITION(acquire_timeout, 0x103);
731 ASSERT_REG_POSITION(acquire_value, 0x104);
732
733#undef ASSERT_REG_POSITION
734
735 enum class GpuSemaphoreOperation {
736 AcquireEqual = 0x1,
737 WriteLong = 0x2,
738 AcquireGequal = 0x4,
739 AcquireMask = 0x8,
740 };
741};
742
743GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
744 : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {}
42 745
43GPU::~GPU() = default; 746GPU::~GPU() = default;
44 747
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 748void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
46 renderer = std::move(renderer_); 749 impl->BindRenderer(std::move(renderer));
47 rasterizer = renderer->ReadRasterizer(); 750}
48 751
49 memory_manager->BindRasterizer(rasterizer); 752void GPU::CallMethod(const MethodCall& method_call) {
50 maxwell_3d->BindRasterizer(rasterizer); 753 impl->CallMethod(method_call);
51 fermi_2d->BindRasterizer(rasterizer);
52 kepler_compute->BindRasterizer(rasterizer);
53 maxwell_dma->BindRasterizer(rasterizer);
54} 754}
55 755
56Engines::Maxwell3D& GPU::Maxwell3D() { 756void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
57 return *maxwell_3d; 757 u32 methods_pending) {
758 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
58} 759}
59 760
60const Engines::Maxwell3D& GPU::Maxwell3D() const { 761void GPU::FlushCommands() {
61 return *maxwell_3d; 762 impl->FlushCommands();
62} 763}
63 764
64Engines::KeplerCompute& GPU::KeplerCompute() { 765void GPU::SyncGuestHost() {
65 return *kepler_compute; 766 impl->SyncGuestHost();
66} 767}
67 768
68const Engines::KeplerCompute& GPU::KeplerCompute() const { 769void GPU::OnCommandListEnd() {
69 return *kepler_compute; 770 impl->OnCommandListEnd();
70} 771}
71 772
72MemoryManager& GPU::MemoryManager() { 773u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
73 return *memory_manager; 774 return impl->RequestFlush(addr, size);
74} 775}
75 776
76const MemoryManager& GPU::MemoryManager() const { 777u64 GPU::CurrentFlushRequestFence() const {
77 return *memory_manager; 778 return impl->CurrentFlushRequestFence();
78} 779}
79 780
80DmaPusher& GPU::DmaPusher() { 781void GPU::TickWork() {
81 return *dma_pusher; 782 impl->TickWork();
82} 783}
83 784
84Tegra::CDmaPusher& GPU::CDmaPusher() { 785Engines::Maxwell3D& GPU::Maxwell3D() {
85 return *cdma_pusher; 786 return impl->Maxwell3D();
86} 787}
87 788
88const DmaPusher& GPU::DmaPusher() const { 789const Engines::Maxwell3D& GPU::Maxwell3D() const {
89 return *dma_pusher; 790 return impl->Maxwell3D();
90} 791}
91 792
92const Tegra::CDmaPusher& GPU::CDmaPusher() const { 793Engines::KeplerCompute& GPU::KeplerCompute() {
93 return *cdma_pusher; 794 return impl->KeplerCompute();
94} 795}
95 796
96void GPU::WaitFence(u32 syncpoint_id, u32 value) { 797const Engines::KeplerCompute& GPU::KeplerCompute() const {
97 // Synced GPU, is always in sync 798 return impl->KeplerCompute();
98 if (!is_async) {
99 return;
100 }
101 if (syncpoint_id == UINT32_MAX) {
102 // TODO: Research what this does.
103 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
104 return;
105 }
106 MICROPROFILE_SCOPE(GPU_wait);
107 std::unique_lock lock{sync_mutex};
108 sync_cv.wait(lock, [=, this] {
109 if (shutting_down.load(std::memory_order_relaxed)) {
110 // We're shutting down, ensure no threads continue to wait for the next syncpoint
111 return true;
112 }
113 return syncpoints.at(syncpoint_id).load() >= value;
114 });
115}
116
117void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
118 auto& syncpoint = syncpoints.at(syncpoint_id);
119 syncpoint++;
120 std::lock_guard lock{sync_mutex};
121 sync_cv.notify_all();
122 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
123 if (!interrupt.empty()) {
124 u32 value = syncpoint.load();
125 auto it = interrupt.begin();
126 while (it != interrupt.end()) {
127 if (value >= *it) {
128 TriggerCpuInterrupt(syncpoint_id, *it);
129 it = interrupt.erase(it);
130 continue;
131 }
132 it++;
133 }
134 }
135} 799}
136 800
137u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { 801Tegra::MemoryManager& GPU::MemoryManager() {
138 return syncpoints.at(syncpoint_id).load(); 802 return impl->MemoryManager();
139} 803}
140 804
141void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 805const Tegra::MemoryManager& GPU::MemoryManager() const {
142 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 806 return impl->MemoryManager();
143 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
144 [value](u32 in_value) { return in_value == value; });
145 if (contains) {
146 return;
147 }
148 interrupt.emplace_back(value);
149} 807}
150 808
151bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 809Tegra::DmaPusher& GPU::DmaPusher() {
152 std::lock_guard lock{sync_mutex}; 810 return impl->DmaPusher();
153 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 811}
154 const auto iter =
155 std::find_if(interrupt.begin(), interrupt.end(),
156 [value](u32 interrupt_value) { return value == interrupt_value; });
157 812
158 if (iter == interrupt.end()) { 813const Tegra::DmaPusher& GPU::DmaPusher() const {
159 return false; 814 return impl->DmaPusher();
160 }
161 interrupt.erase(iter);
162 return true;
163} 815}
164 816
165u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 817Tegra::CDmaPusher& GPU::CDmaPusher() {
166 std::unique_lock lck{flush_request_mutex}; 818 return impl->CDmaPusher();
167 const u64 fence = ++last_flush_fence;
168 flush_requests.emplace_back(fence, addr, size);
169 return fence;
170} 819}
171 820
172void GPU::TickWork() { 821const Tegra::CDmaPusher& GPU::CDmaPusher() const {
173 std::unique_lock lck{flush_request_mutex}; 822 return impl->CDmaPusher();
174 while (!flush_requests.empty()) {
175 auto& request = flush_requests.front();
176 const u64 fence = request.fence;
177 const VAddr addr = request.addr;
178 const std::size_t size = request.size;
179 flush_requests.pop_front();
180 flush_request_mutex.unlock();
181 rasterizer->FlushRegion(addr, size);
182 current_flush_fence.store(fence);
183 flush_request_mutex.lock();
184 }
185} 823}
186 824
187u64 GPU::GetTicks() const { 825VideoCore::RendererBase& GPU::Renderer() {
188 // This values were reversed engineered by fincs from NVN 826 return impl->Renderer();
189 // The gpu clock is reported in units of 385/625 nanoseconds 827}
190 constexpr u64 gpu_ticks_num = 384;
191 constexpr u64 gpu_ticks_den = 625;
192 828
193 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); 829const VideoCore::RendererBase& GPU::Renderer() const {
194 if (Settings::values.use_fast_gpu_time.GetValue()) { 830 return impl->Renderer();
195 nanoseconds /= 256;
196 }
197 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
198 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
199 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
200} 831}
201 832
202void GPU::RendererFrameEndNotify() { 833VideoCore::ShaderNotify& GPU::ShaderNotify() {
203 system.GetPerfStats().EndGameFrame(); 834 return impl->ShaderNotify();
204} 835}
205 836
206void GPU::FlushCommands() { 837const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
207 rasterizer->FlushCommands(); 838 return impl->ShaderNotify();
208} 839}
209 840
210void GPU::SyncGuestHost() { 841void GPU::WaitFence(u32 syncpoint_id, u32 value) {
211 rasterizer->SyncGuestHost(); 842 impl->WaitFence(syncpoint_id, value);
212} 843}
213 844
214enum class GpuSemaphoreOperation { 845void GPU::IncrementSyncPoint(u32 syncpoint_id) {
215 AcquireEqual = 0x1, 846 impl->IncrementSyncPoint(syncpoint_id);
216 WriteLong = 0x2, 847}
217 AcquireGequal = 0x4,
218 AcquireMask = 0x8,
219};
220 848
221void GPU::CallMethod(const MethodCall& method_call) { 849u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
222 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 850 return impl->GetSyncpointValue(syncpoint_id);
223 method_call.subchannel); 851}
224 852
225 ASSERT(method_call.subchannel < bound_engines.size()); 853void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
854 impl->RegisterSyncptInterrupt(syncpoint_id, value);
855}
226 856
227 if (ExecuteMethodOnEngine(method_call.method)) { 857bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
228 CallEngineMethod(method_call); 858 return impl->CancelSyncptInterrupt(syncpoint_id, value);
229 } else {
230 CallPullerMethod(method_call);
231 }
232} 859}
233 860
234void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 861u64 GPU::GetTicks() const {
235 u32 methods_pending) { 862 return impl->GetTicks();
236 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
237
238 ASSERT(subchannel < bound_engines.size());
239
240 if (ExecuteMethodOnEngine(method)) {
241 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
242 } else {
243 for (std::size_t i = 0; i < amount; i++) {
244 CallPullerMethod(MethodCall{
245 method,
246 base_start[i],
247 subchannel,
248 methods_pending - static_cast<u32>(i),
249 });
250 }
251 }
252} 863}
253 864
254bool GPU::ExecuteMethodOnEngine(u32 method) { 865std::unique_lock<std::mutex> GPU::LockSync() {
255 const auto buffer_method = static_cast<BufferMethods>(method); 866 return impl->LockSync();
256 return buffer_method >= BufferMethods::NonPullerMethods; 867}
257}
258
259void GPU::CallPullerMethod(const MethodCall& method_call) {
260 regs.reg_array[method_call.method] = method_call.argument;
261 const auto method = static_cast<BufferMethods>(method_call.method);
262
263 switch (method) {
264 case BufferMethods::BindObject: {
265 ProcessBindMethod(method_call);
266 break;
267 }
268 case BufferMethods::Nop:
269 case BufferMethods::SemaphoreAddressHigh:
270 case BufferMethods::SemaphoreAddressLow:
271 case BufferMethods::SemaphoreSequence:
272 case BufferMethods::UnkCacheFlush:
273 case BufferMethods::WrcacheFlush:
274 case BufferMethods::FenceValue:
275 break;
276 case BufferMethods::RefCnt:
277 rasterizer->SignalReference();
278 break;
279 case BufferMethods::FenceAction:
280 ProcessFenceActionMethod();
281 break;
282 case BufferMethods::WaitForInterrupt:
283 ProcessWaitForInterruptMethod();
284 break;
285 case BufferMethods::SemaphoreTrigger: {
286 ProcessSemaphoreTriggerMethod();
287 break;
288 }
289 case BufferMethods::NotifyIntr: {
290 // TODO(Kmather73): Research and implement this method.
291 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
292 break;
293 }
294 case BufferMethods::Unk28: {
295 // TODO(Kmather73): Research and implement this method.
296 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
297 break;
298 }
299 case BufferMethods::SemaphoreAcquire: {
300 ProcessSemaphoreAcquire();
301 break;
302 }
303 case BufferMethods::SemaphoreRelease: {
304 ProcessSemaphoreRelease();
305 break;
306 }
307 case BufferMethods::Yield: {
308 // TODO(Kmather73): Research and implement this method.
309 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
310 break;
311 }
312 default:
313 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
314 break;
315 }
316}
317
318void GPU::CallEngineMethod(const MethodCall& method_call) {
319 const EngineID engine = bound_engines[method_call.subchannel];
320
321 switch (engine) {
322 case EngineID::FERMI_TWOD_A:
323 fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
324 break;
325 case EngineID::MAXWELL_B:
326 maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
327 break;
328 case EngineID::KEPLER_COMPUTE_B:
329 kepler_compute->CallMethod(method_call.method, method_call.argument,
330 method_call.IsLastCall());
331 break;
332 case EngineID::MAXWELL_DMA_COPY_A:
333 maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
334 break;
335 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
336 kepler_memory->CallMethod(method_call.method, method_call.argument,
337 method_call.IsLastCall());
338 break;
339 default:
340 UNIMPLEMENTED_MSG("Unimplemented engine");
341 }
342}
343
344void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
345 u32 methods_pending) {
346 const EngineID engine = bound_engines[subchannel];
347
348 switch (engine) {
349 case EngineID::FERMI_TWOD_A:
350 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
351 break;
352 case EngineID::MAXWELL_B:
353 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
354 break;
355 case EngineID::KEPLER_COMPUTE_B:
356 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
357 break;
358 case EngineID::MAXWELL_DMA_COPY_A:
359 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
360 break;
361 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
362 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
363 break;
364 default:
365 UNIMPLEMENTED_MSG("Unimplemented engine");
366 }
367}
368
369void GPU::ProcessBindMethod(const MethodCall& method_call) {
370 // Bind the current subchannel to the desired engine id.
371 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
372 method_call.argument);
373 const auto engine_id = static_cast<EngineID>(method_call.argument);
374 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
375 switch (engine_id) {
376 case EngineID::FERMI_TWOD_A:
377 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
378 break;
379 case EngineID::MAXWELL_B:
380 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
381 break;
382 case EngineID::KEPLER_COMPUTE_B:
383 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
384 break;
385 case EngineID::MAXWELL_DMA_COPY_A:
386 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
387 break;
388 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
389 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
390 break;
391 default:
392 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
393 }
394}
395
396void GPU::ProcessFenceActionMethod() {
397 switch (regs.fence_action.op) {
398 case FenceOperation::Acquire:
399 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
400 break;
401 case FenceOperation::Increment:
402 IncrementSyncPoint(regs.fence_action.syncpoint_id);
403 break;
404 default:
405 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
406 }
407}
408
409void GPU::ProcessWaitForInterruptMethod() {
410 // TODO(bunnei) ImplementMe
411 LOG_WARNING(HW_GPU, "(STUBBED) called");
412}
413
414void GPU::ProcessSemaphoreTriggerMethod() {
415 const auto semaphoreOperationMask = 0xF;
416 const auto op =
417 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
418 if (op == GpuSemaphoreOperation::WriteLong) {
419 struct Block {
420 u32 sequence;
421 u32 zeros = 0;
422 u64 timestamp;
423 };
424 868
425 Block block{}; 869bool GPU::IsAsync() const {
426 block.sequence = regs.semaphore_sequence; 870 return impl->IsAsync();
427 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
428 // CoreTiming
429 block.timestamp = GetTicks();
430 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
431 sizeof(block));
432 } else {
433 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
434 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
435 (op == GpuSemaphoreOperation::AcquireGequal &&
436 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
437 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
438 // Nothing to do in this case
439 } else {
440 regs.acquire_source = true;
441 regs.acquire_value = regs.semaphore_sequence;
442 if (op == GpuSemaphoreOperation::AcquireEqual) {
443 regs.acquire_active = true;
444 regs.acquire_mode = false;
445 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
446 regs.acquire_active = true;
447 regs.acquire_mode = true;
448 } else if (op == GpuSemaphoreOperation::AcquireMask) {
449 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
450 // semaphore_sequence, gives a non-0 result
451 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
452 } else {
453 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
454 }
455 }
456 }
457} 871}
458 872
459void GPU::ProcessSemaphoreRelease() { 873bool GPU::UseNvdec() const {
460 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); 874 return impl->UseNvdec();
461} 875}
462 876
463void GPU::ProcessSemaphoreAcquire() { 877void GPU::RendererFrameEndNotify() {
464 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); 878 impl->RendererFrameEndNotify();
465 const auto value = regs.semaphore_acquire;
466 if (word != value) {
467 regs.acquire_active = true;
468 regs.acquire_value = value;
469 // TODO(kemathe73) figure out how to do the acquire_timeout
470 regs.acquire_mode = false;
471 regs.acquire_source = false;
472 }
473} 879}
474 880
475void GPU::Start() { 881void GPU::Start() {
476 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 882 impl->Start();
477 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
478 cpu_context->MakeCurrent();
479} 883}
480 884
481void GPU::ObtainContext() { 885void GPU::ObtainContext() {
482 cpu_context->MakeCurrent(); 886 impl->ObtainContext();
483} 887}
484 888
485void GPU::ReleaseContext() { 889void GPU::ReleaseContext() {
486 cpu_context->DoneCurrent(); 890 impl->ReleaseContext();
487} 891}
488 892
489void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 893void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
490 gpu_thread.SubmitList(std::move(entries)); 894 impl->PushGPUEntries(std::move(entries));
491} 895}
492 896
493void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 897void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
494 if (!use_nvdec) { 898 impl->PushCommandBuffer(entries);
495 return;
496 }
497
498 if (!cdma_pusher) {
499 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
500 }
501
502 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
503 // TODO(ameerj): RE proper async nvdec operation
504 // gpu_thread.SubmitCommandBuffer(std::move(entries));
505
506 cdma_pusher->ProcessEntries(std::move(entries));
507} 899}
508 900
509void GPU::ClearCdmaInstance() { 901void GPU::ClearCdmaInstance() {
510 cdma_pusher.reset(); 902 impl->ClearCdmaInstance();
511} 903}
512 904
513void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 905void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
514 gpu_thread.SwapBuffers(framebuffer); 906 impl->SwapBuffers(framebuffer);
515} 907}
516 908
517void GPU::FlushRegion(VAddr addr, u64 size) { 909void GPU::FlushRegion(VAddr addr, u64 size) {
518 gpu_thread.FlushRegion(addr, size); 910 impl->FlushRegion(addr, size);
519} 911}
520 912
521void GPU::InvalidateRegion(VAddr addr, u64 size) { 913void GPU::InvalidateRegion(VAddr addr, u64 size) {
522 gpu_thread.InvalidateRegion(addr, size); 914 impl->InvalidateRegion(addr, size);
523} 915}
524 916
525void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 917void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
526 gpu_thread.FlushAndInvalidateRegion(addr, size); 918 impl->FlushAndInvalidateRegion(addr, size);
527}
528
529void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
530 auto& interrupt_manager = system.InterruptManager();
531 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
532}
533
534void GPU::OnCommandListEnd() {
535 if (is_async) {
536 // This command only applies to asynchronous GPU mode
537 gpu_thread.OnCommandListEnd();
538 }
539} 919}
540 920
541} // namespace Tegra 921} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e6a02a71b..39b304823 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -4,19 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <list>
11#include <memory> 7#include <memory>
12#include <mutex> 8#include <mutex>
9
10#include "common/bit_field.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "core/hle/service/nvdrv/nvdata.h"
15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h" 12#include "video_core/cdma_pusher.h"
17#include "video_core/dma_pusher.h"
18#include "video_core/framebuffer_config.h" 13#include "video_core/framebuffer_config.h"
19#include "video_core/gpu_thread.h"
20 14
21using CacheAddr = std::uintptr_t; 15using CacheAddr = std::uintptr_t;
22[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { 16[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -40,6 +34,9 @@ class ShaderNotify;
40} // namespace VideoCore 34} // namespace VideoCore
41 35
42namespace Tegra { 36namespace Tegra {
37class DmaPusher;
38class CDmaPusher;
39struct CommandList;
43 40
44enum class RenderTargetFormat : u32 { 41enum class RenderTargetFormat : u32 {
45 NONE = 0x0, 42 NONE = 0x0,
@@ -138,7 +135,18 @@ public:
138 } 135 }
139 }; 136 };
140 137
141 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); 138 enum class FenceOperation : u32 {
139 Acquire = 0,
140 Increment = 1,
141 };
142
143 union FenceAction {
144 u32 raw;
145 BitField<0, 1, FenceOperation> op;
146 BitField<8, 24, u32> syncpoint_id;
147 };
148
149 explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
142 ~GPU(); 150 ~GPU();
143 151
144 /// Binds a renderer to the GPU. 152 /// Binds a renderer to the GPU.
@@ -162,9 +170,7 @@ public:
162 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 170 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
163 171
164 /// Obtains current flush request fence id. 172 /// Obtains current flush request fence id.
165 [[nodiscard]] u64 CurrentFlushRequestFence() const { 173 [[nodiscard]] u64 CurrentFlushRequestFence() const;
166 return current_flush_fence.load(std::memory_order_relaxed);
167 }
168 174
169 /// Tick pending requests within the GPU. 175 /// Tick pending requests within the GPU.
170 void TickWork(); 176 void TickWork();
@@ -200,24 +206,16 @@ public:
200 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; 206 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
201 207
202 /// Returns a reference to the underlying renderer. 208 /// Returns a reference to the underlying renderer.
203 [[nodiscard]] VideoCore::RendererBase& Renderer() { 209 [[nodiscard]] VideoCore::RendererBase& Renderer();
204 return *renderer;
205 }
206 210
207 /// Returns a const reference to the underlying renderer. 211 /// Returns a const reference to the underlying renderer.
208 [[nodiscard]] const VideoCore::RendererBase& Renderer() const { 212 [[nodiscard]] const VideoCore::RendererBase& Renderer() const;
209 return *renderer;
210 }
211 213
212 /// Returns a reference to the shader notifier. 214 /// Returns a reference to the shader notifier.
213 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { 215 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify();
214 return *shader_notify;
215 }
216 216
217 /// Returns a const reference to the shader notifier. 217 /// Returns a const reference to the shader notifier.
218 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { 218 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
219 return *shader_notify;
220 }
221 219
222 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 220 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
223 void WaitFence(u32 syncpoint_id, u32 value); 221 void WaitFence(u32 syncpoint_id, u32 value);
@@ -232,80 +230,14 @@ public:
232 230
233 [[nodiscard]] u64 GetTicks() const; 231 [[nodiscard]] u64 GetTicks() const;
234 232
235 [[nodiscard]] std::unique_lock<std::mutex> LockSync() { 233 [[nodiscard]] std::unique_lock<std::mutex> LockSync();
236 return std::unique_lock{sync_mutex};
237 }
238 234
239 [[nodiscard]] bool IsAsync() const { 235 [[nodiscard]] bool IsAsync() const;
240 return is_async;
241 }
242 236
243 [[nodiscard]] bool UseNvdec() const { 237 [[nodiscard]] bool UseNvdec() const;
244 return use_nvdec;
245 }
246 238
247 void RendererFrameEndNotify(); 239 void RendererFrameEndNotify();
248 240
249 enum class FenceOperation : u32 {
250 Acquire = 0,
251 Increment = 1,
252 };
253
254 union FenceAction {
255 u32 raw;
256 BitField<0, 1, FenceOperation> op;
257 BitField<8, 24, u32> syncpoint_id;
258
259 [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
260 FenceAction result{};
261 result.op.Assign(op);
262 result.syncpoint_id.Assign(syncpoint_id);
263 return {result.raw};
264 }
265 };
266
267 struct Regs {
268 static constexpr size_t NUM_REGS = 0x40;
269
270 union {
271 struct {
272 INSERT_PADDING_WORDS_NOINIT(0x4);
273 struct {
274 u32 address_high;
275 u32 address_low;
276
277 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
278 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
279 address_low);
280 }
281 } semaphore_address;
282
283 u32 semaphore_sequence;
284 u32 semaphore_trigger;
285 INSERT_PADDING_WORDS_NOINIT(0xC);
286
287 // The pusher and the puller share the reference counter, the pusher only has read
288 // access
289 u32 reference_count;
290 INSERT_PADDING_WORDS_NOINIT(0x5);
291
292 u32 semaphore_acquire;
293 u32 semaphore_release;
294 u32 fence_value;
295 FenceAction fence_action;
296 INSERT_PADDING_WORDS_NOINIT(0xE2);
297
298 // Puller state
299 u32 acquire_mode;
300 u32 acquire_source;
301 u32 acquire_active;
302 u32 acquire_timeout;
303 u32 acquire_value;
304 };
305 std::array<u32, NUM_REGS> reg_array;
306 };
307 } regs{};
308
309 /// Performs any additional setup necessary in order to begin GPU emulation. 241 /// Performs any additional setup necessary in order to begin GPU emulation.
310 /// This can be used to launch any necessary threads and register any necessary 242 /// This can be used to launch any necessary threads and register any necessary
311 /// core timing events. 243 /// core timing events.
@@ -338,104 +270,9 @@ public:
338 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 270 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
339 void FlushAndInvalidateRegion(VAddr addr, u64 size); 271 void FlushAndInvalidateRegion(VAddr addr, u64 size);
340 272
341protected:
342 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
343
344private:
345 void ProcessBindMethod(const MethodCall& method_call);
346 void ProcessFenceActionMethod();
347 void ProcessWaitForInterruptMethod();
348 void ProcessSemaphoreTriggerMethod();
349 void ProcessSemaphoreRelease();
350 void ProcessSemaphoreAcquire();
351
352 /// Calls a GPU puller method.
353 void CallPullerMethod(const MethodCall& method_call);
354
355 /// Calls a GPU engine method.
356 void CallEngineMethod(const MethodCall& method_call);
357
358 /// Calls a GPU engine multivalue method.
359 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
360 u32 methods_pending);
361
362 /// Determines where the method should be executed.
363 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
364
365protected:
366 Core::System& system;
367 std::unique_ptr<Tegra::MemoryManager> memory_manager;
368 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
369 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
370 std::unique_ptr<VideoCore::RendererBase> renderer;
371 VideoCore::RasterizerInterface* rasterizer = nullptr;
372 const bool use_nvdec;
373
374private: 273private:
375 /// Mapping of command subchannels to their bound engine ids 274 struct Impl;
376 std::array<EngineID, 8> bound_engines = {}; 275 std::unique_ptr<Impl> impl;
377 /// 3D engine
378 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
379 /// 2D engine
380 std::unique_ptr<Engines::Fermi2D> fermi_2d;
381 /// Compute engine
382 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
383 /// DMA engine
384 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
385 /// Inline memory engine
386 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
387 /// Shader build notifier
388 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
389 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
390 std::atomic_bool shutting_down{};
391
392 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
393
394 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
395
396 std::mutex sync_mutex;
397 std::mutex device_mutex;
398
399 std::condition_variable sync_cv;
400
401 struct FlushRequest {
402 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
403 : fence{fence_}, addr{addr_}, size{size_} {}
404 u64 fence;
405 VAddr addr;
406 std::size_t size;
407 };
408
409 std::list<FlushRequest> flush_requests;
410 std::atomic<u64> current_flush_fence{};
411 u64 last_flush_fence{};
412 std::mutex flush_request_mutex;
413
414 const bool is_async;
415
416 VideoCommon::GPUThread::ThreadManager gpu_thread;
417 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
418}; 276};
419 277
420#define ASSERT_REG_POSITION(field_name, position) \
421 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
422 "Field " #field_name " has invalid position")
423
424ASSERT_REG_POSITION(semaphore_address, 0x4);
425ASSERT_REG_POSITION(semaphore_sequence, 0x6);
426ASSERT_REG_POSITION(semaphore_trigger, 0x7);
427ASSERT_REG_POSITION(reference_count, 0x14);
428ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
429ASSERT_REG_POSITION(semaphore_release, 0x1B);
430ASSERT_REG_POSITION(fence_value, 0x1C);
431ASSERT_REG_POSITION(fence_action, 0x1D);
432
433ASSERT_REG_POSITION(acquire_mode, 0x100);
434ASSERT_REG_POSITION(acquire_source, 0x101);
435ASSERT_REG_POSITION(acquire_active, 0x102);
436ASSERT_REG_POSITION(acquire_timeout, 0x103);
437ASSERT_REG_POSITION(acquire_value, 0x104);
438
439#undef ASSERT_REG_POSITION
440
441} // namespace Tegra 278} // namespace Tegra
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 91bada925..00984188e 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -130,9 +130,6 @@ public:
130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
131 void FlushAndInvalidateRegion(VAddr addr, u64 size); 131 void FlushAndInvalidateRegion(VAddr addr, u64 size);
132 132
133 // Stops the GPU execution and waits for the GPU to finish working
134 void ShutDown();
135
136 void OnCommandListEnd(); 133 void OnCommandListEnd();
137 134
138private: 135private:
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index aac851253..73231061a 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -8,6 +8,7 @@
8#include <array> 8#include <array>
9#include <cstring> 9#include <cstring>
10#include <iterator> 10#include <iterator>
11#include <list>
11#include <memory> 12#include <memory>
12#include <mutex> 13#include <mutex>
13#include <optional> 14#include <optional>
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 54dae2c41..8c3ca3d82 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/surface.h" 20#include "video_core/surface.h"
21#include "video_core/texture_cache/formatter.h" 21#include "video_core/texture_cache/formatter.h"
22#include "video_core/texture_cache/samples_helper.h" 22#include "video_core/texture_cache/samples_helper.h"
23#include "video_core/texture_cache/util.h"
23 24
24namespace OpenGL { 25namespace OpenGL {
25namespace { 26namespace {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index c498a8a8f..1ca2c90be 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -12,6 +12,7 @@
12#include "shader_recompiler/shader_info.h" 12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
15#include "video_core/texture_cache/image_view_base.h"
15#include "video_core/texture_cache/texture_cache_base.h" 16#include "video_core/texture_cache/texture_cache_base.h"
16 17
17namespace OpenGL { 18namespace OpenGL {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 3b87640b5..06c5fb867 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -21,6 +21,7 @@
21#include "video_core/renderer_vulkan/vk_texture_cache.h" 21#include "video_core/renderer_vulkan/vk_texture_cache.h"
22#include "video_core/texture_cache/formatter.h" 22#include "video_core/texture_cache/formatter.h"
23#include "video_core/texture_cache/samples_helper.h" 23#include "video_core/texture_cache/samples_helper.h"
24#include "video_core/texture_cache/util.h"
24#include "video_core/vulkan_common/vulkan_device.h" 25#include "video_core/vulkan_common/vulkan_device.h"
25#include "video_core/vulkan_common/vulkan_memory_allocator.h" 26#include "video_core/vulkan_common/vulkan_memory_allocator.h"
26#include "video_core/vulkan_common/vulkan_wrapper.h" 27#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 6d5a68bfe..b09c468e4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,11 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <compare>
8#include <span> 7#include <span>
9 8
10#include "shader_recompiler/shader_info.h" 9#include "shader_recompiler/shader_info.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/texture_cache_base.h" 12#include "video_core/texture_cache/texture_cache_base.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 81a878bb2..05850afd0 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -16,6 +16,7 @@
16#include "common/fs/fs.h" 16#include "common/fs/fs.h"
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18#include "shader_recompiler/environment.h" 18#include "shader_recompiler/environment.h"
19#include "video_core/engines/kepler_compute.h"
19#include "video_core/memory_manager.h" 20#include "video_core/memory_manager.h"
20#include "video_core/shader_environment.h" 21#include "video_core/shader_environment.h"
21#include "video_core/textures/texture.h" 22#include "video_core/textures/texture.h"
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 2079979db..6640e53d0 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -5,13 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <filesystem> 8#include <filesystem>
10#include <iosfwd> 9#include <iosfwd>
11#include <limits> 10#include <limits>
12#include <memory> 11#include <memory>
13#include <optional> 12#include <optional>
14#include <span> 13#include <span>
14#include <stop_token>
15#include <type_traits> 15#include <type_traits>
16#include <unordered_map> 16#include <unordered_map>
17#include <vector> 17#include <vector>
@@ -19,9 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/unique_function.h" 20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h" 21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25 23
26namespace Tegra { 24namespace Tegra {
27class Memorymanager; 25class Memorymanager;
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
index 6527e14c8..e751f26c7 100644
--- a/src/video_core/texture_cache/image_view_info.cpp
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -8,6 +8,7 @@
8#include "video_core/texture_cache/image_view_info.h" 8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache_base.h" 9#include "video_core/texture_cache/texture_cache_base.h"
10#include "video_core/texture_cache/types.h" 10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
11#include "video_core/textures/texture.h" 12#include "video_core/textures/texture.h"
12 13
13namespace VideoCommon { 14namespace VideoCommon {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 24b809242..329df2e49 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -4,10 +4,15 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_set>
8
7#include "common/alignment.h" 9#include "common/alignment.h"
8#include "video_core/dirty_flags.h" 10#include "video_core/dirty_flags.h"
11#include "video_core/engines/kepler_compute.h"
12#include "video_core/texture_cache/image_view_base.h"
9#include "video_core/texture_cache/samples_helper.h" 13#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/texture_cache_base.h" 14#include "video_core/texture_cache/texture_cache_base.h"
15#include "video_core/texture_cache/util.h"
11 16
12namespace VideoCommon { 17namespace VideoCommon {
13 18
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index d7528ed24..2d1893c1c 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -4,13 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <mutex> 7#include <mutex>
9#include <span> 8#include <span>
10#include <type_traits> 9#include <type_traits>
11#include <unordered_map> 10#include <unordered_map>
12#include <unordered_set>
13#include <vector> 11#include <vector>
12#include <queue>
14 13
15#include "common/common_types.h" 14#include "common/common_types.h"
16#include "common/literals.h" 15#include "common/literals.h"
@@ -18,10 +17,6 @@
18#include "video_core/compatible_formats.h" 17#include "video_core/compatible_formats.h"
19#include "video_core/delayed_destruction_ring.h" 18#include "video_core/delayed_destruction_ring.h"
20#include "video_core/engines/fermi_2d.h" 19#include "video_core/engines/fermi_2d.h"
21#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h"
24#include "video_core/rasterizer_interface.h"
25#include "video_core/surface.h" 20#include "video_core/surface.h"
26#include "video_core/texture_cache/descriptor_table.h" 21#include "video_core/texture_cache/descriptor_table.h"
27#include "video_core/texture_cache/image_base.h" 22#include "video_core/texture_cache/image_base.h"
@@ -30,7 +25,6 @@
30#include "video_core/texture_cache/render_targets.h" 25#include "video_core/texture_cache/render_targets.h"
31#include "video_core/texture_cache/slot_vector.h" 26#include "video_core/texture_cache/slot_vector.h"
32#include "video_core/texture_cache/types.h" 27#include "video_core/texture_cache/types.h"
33#include "video_core/texture_cache/util.h"
34#include "video_core/textures/texture.h" 28#include "video_core/textures/texture.h"
35 29
36namespace VideoCommon { 30namespace VideoCommon {