summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp30
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp13
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp29
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h11
-rw-r--r--src/video_core/cdma_pusher.cpp1
-rw-r--r--src/video_core/cdma_pusher.h2
-rw-r--r--src/video_core/framebuffer_config.h20
-rw-r--r--src/video_core/gpu.cpp1215
-rw-r--r--src/video_core/gpu.h227
-rw-r--r--src/video_core/gpu_thread.h3
-rw-r--r--src/video_core/query_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/shader_environment.cpp1
-rw-r--r--src/video_core/shader_environment.h4
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp1
-rw-r--r--src/video_core/texture_cache/texture_cache.h5
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
21 files changed, 890 insertions, 691 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 789000294..4ee8c5733 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -48,8 +48,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
48 addr, offset, width, height, stride, format); 48 addr, offset, width, height, stride, format);
49 49
50 const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); 50 const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format);
51 const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, 51 const auto transform_flags = static_cast<Tegra::FramebufferConfig::TransformFlags>(transform);
52 stride, pixel_format, transform, crop_rect}; 52 const Tegra::FramebufferConfig framebuffer{addr, offset, width, height,
53 stride, pixel_format, transform_flags, crop_rect};
53 54
54 system.GetPerfStats().EndSystemFrame(); 55 system.GetPerfStats().EndSystemFrame();
55 system.GPU().SwapBuffers(&framebuffer); 56 system.GPU().SwapBuffers(&framebuffer);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 775e76330..8b4867ca7 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -111,7 +111,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
111 event.event->GetWritableEvent().Signal(); 111 event.event->GetWritableEvent().Signal();
112 return NvResult::Success; 112 return NvResult::Success;
113 } 113 }
114 auto lock = gpu.LockSync();
115 const u32 current_syncpoint_value = event.fence.value; 114 const u32 current_syncpoint_value = event.fence.value;
116 const s32 diff = current_syncpoint_value - params.threshold; 115 const s32 diff = current_syncpoint_value - params.threshold;
117 if (diff >= 0) { 116 if (diff >= 0) {
@@ -132,23 +131,24 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
132 } 131 }
133 132
134 EventState status = events_interface.status[event_id]; 133 EventState status = events_interface.status[event_id];
135 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { 134 const bool bad_parameter = status != EventState::Free && status != EventState::Registered;
136 events_interface.SetEventStatus(event_id, EventState::Waiting); 135 if (bad_parameter) {
137 events_interface.assigned_syncpt[event_id] = params.syncpt_id;
138 events_interface.assigned_value[event_id] = target_value;
139 if (is_async) {
140 params.value = params.syncpt_id << 4;
141 } else {
142 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
143 }
144 params.value |= event_id;
145 event.event->GetWritableEvent().Clear();
146 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
147 std::memcpy(output.data(), &params, sizeof(params)); 136 std::memcpy(output.data(), &params, sizeof(params));
148 return NvResult::Timeout; 137 return NvResult::BadParameter;
149 } 138 }
139 events_interface.SetEventStatus(event_id, EventState::Waiting);
140 events_interface.assigned_syncpt[event_id] = params.syncpt_id;
141 events_interface.assigned_value[event_id] = target_value;
142 if (is_async) {
143 params.value = params.syncpt_id << 4;
144 } else {
145 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
146 }
147 params.value |= event_id;
148 event.event->GetWritableEvent().Clear();
149 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
150 std::memcpy(output.data(), &params, sizeof(params)); 150 std::memcpy(output.data(), &params, sizeof(params));
151 return NvResult::BadParameter; 151 return NvResult::Timeout;
152} 152}
153 153
154NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { 154NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index c0a380088..54ac105d5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -13,6 +13,14 @@
13#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
14 14
15namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
16namespace {
17Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
18 Tegra::GPU::FenceAction result{};
19 result.op.Assign(op);
20 result.syncpoint_id.Assign(syncpoint_id);
21 return {result.raw};
22}
23} // namespace
16 24
17nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, 25nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_,
18 SyncpointManager& syncpoint_manager_) 26 SyncpointManager& syncpoint_manager_)
@@ -187,7 +195,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) {
187 {fence.value}, 195 {fence.value},
188 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 196 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
189 Tegra::SubmissionMode::Increasing), 197 Tegra::SubmissionMode::Increasing),
190 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), 198 BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
191 }; 199 };
192} 200}
193 201
@@ -200,8 +208,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence,
200 for (u32 count = 0; count < add_increment; ++count) { 208 for (u32 count = 0; count < add_increment; ++count) {
201 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 209 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
202 Tegra::SubmissionMode::Increasing)); 210 Tegra::SubmissionMode::Increasing));
203 result.emplace_back( 211 result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
204 Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id));
205 } 212 }
206 213
207 return result; 214 return result;
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3ead813b0..a22811ec1 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -13,28 +13,20 @@
13#include "common/thread.h" 13#include "common/thread.h"
14#include "core/core.h" 14#include "core/core.h"
15#include "core/core_timing.h" 15#include "core/core_timing.h"
16#include "core/core_timing_util.h"
17#include "core/hardware_properties.h"
18#include "core/hle/kernel/k_readable_event.h" 16#include "core/hle/kernel/k_readable_event.h"
19#include "core/hle/kernel/kernel.h"
20#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 17#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
21#include "core/hle/service/nvdrv/nvdrv.h" 18#include "core/hle/service/nvdrv/nvdrv.h"
22#include "core/hle/service/nvflinger/buffer_queue.h" 19#include "core/hle/service/nvflinger/buffer_queue.h"
23#include "core/hle/service/nvflinger/nvflinger.h" 20#include "core/hle/service/nvflinger/nvflinger.h"
24#include "core/hle/service/vi/display/vi_display.h" 21#include "core/hle/service/vi/display/vi_display.h"
25#include "core/hle/service/vi/layer/vi_layer.h" 22#include "core/hle/service/vi/layer/vi_layer.h"
26#include "core/perf_stats.h" 23#include "video_core/gpu.h"
27#include "video_core/renderer_base.h"
28 24
29namespace Service::NVFlinger { 25namespace Service::NVFlinger {
30 26
31constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60}; 27constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60};
32 28
33void NVFlinger::VSyncThread(NVFlinger& nv_flinger) { 29void NVFlinger::SplitVSync(std::stop_token stop_token) {
34 nv_flinger.SplitVSync();
35}
36
37void NVFlinger::SplitVSync() {
38 system.RegisterHostThread(); 30 system.RegisterHostThread();
39 std::string name = "yuzu:VSyncThread"; 31 std::string name = "yuzu:VSyncThread";
40 MicroProfileOnThreadCreate(name.c_str()); 32 MicroProfileOnThreadCreate(name.c_str());
@@ -45,7 +37,7 @@ void NVFlinger::SplitVSync() {
45 Common::SetCurrentThreadName(name.c_str()); 37 Common::SetCurrentThreadName(name.c_str());
46 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 38 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
47 s64 delay = 0; 39 s64 delay = 0;
48 while (is_running) { 40 while (!stop_token.stop_requested()) {
49 guard->lock(); 41 guard->lock();
50 const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count(); 42 const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
51 Compose(); 43 Compose();
@@ -55,7 +47,7 @@ void NVFlinger::SplitVSync() {
55 const s64 next_time = std::max<s64>(0, ticks - time_passed - delay); 47 const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
56 guard->unlock(); 48 guard->unlock();
57 if (next_time > 0) { 49 if (next_time > 0) {
58 wait_event->WaitFor(std::chrono::nanoseconds{next_time}); 50 std::this_thread::sleep_for(std::chrono::nanoseconds{next_time});
59 } 51 }
60 delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time; 52 delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time;
61 } 53 }
@@ -84,9 +76,7 @@ NVFlinger::NVFlinger(Core::System& system_)
84 }); 76 });
85 77
86 if (system.IsMulticore()) { 78 if (system.IsMulticore()) {
87 is_running = true; 79 vsync_thread = std::jthread([this](std::stop_token token) { SplitVSync(token); });
88 wait_event = std::make_unique<Common::Event>();
89 vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
90 } else { 80 } else {
91 system.CoreTiming().ScheduleEvent(frame_ns, composition_event); 81 system.CoreTiming().ScheduleEvent(frame_ns, composition_event);
92 } 82 }
@@ -96,14 +86,7 @@ NVFlinger::~NVFlinger() {
96 for (auto& buffer_queue : buffer_queues) { 86 for (auto& buffer_queue : buffer_queues) {
97 buffer_queue->Disconnect(); 87 buffer_queue->Disconnect();
98 } 88 }
99 89 if (!system.IsMulticore()) {
100 if (system.IsMulticore()) {
101 is_running = false;
102 wait_event->Set();
103 vsync_thread->join();
104 vsync_thread.reset();
105 wait_event.reset();
106 } else {
107 system.CoreTiming().UnscheduleEvent(composition_event, 0); 90 system.CoreTiming().UnscheduleEvent(composition_event, 0);
108 } 91 }
109} 92}
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 6d84cafb4..7935cf773 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,13 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
8#include <list> 7#include <list>
9#include <memory> 8#include <memory>
10#include <mutex> 9#include <mutex>
11#include <optional> 10#include <optional>
12#include <string>
13#include <string_view>
14#include <thread> 11#include <thread>
15#include <vector> 12#include <vector>
16 13
@@ -109,9 +106,7 @@ private:
109 /// Creates a layer with the specified layer ID in the desired display. 106 /// Creates a layer with the specified layer ID in the desired display.
110 void CreateLayerAtId(VI::Display& display, u64 layer_id); 107 void CreateLayerAtId(VI::Display& display, u64 layer_id);
111 108
112 static void VSyncThread(NVFlinger& nv_flinger); 109 void SplitVSync(std::stop_token stop_token);
113
114 void SplitVSync();
115 110
116 std::shared_ptr<Nvidia::Module> nvdrv; 111 std::shared_ptr<Nvidia::Module> nvdrv;
117 112
@@ -133,9 +128,7 @@ private:
133 128
134 Core::System& system; 129 Core::System& system;
135 130
136 std::unique_ptr<std::thread> vsync_thread; 131 std::jthread vsync_thread;
137 std::unique_ptr<Common::Event> wait_event;
138 std::atomic<bool> is_running{};
139 132
140 KernelHelpers::ServiceContext service_context; 133 KernelHelpers::ServiceContext service_context;
141}; 134};
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index 8b86ad050..a8c4b4415 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -24,6 +24,7 @@
24#include "command_classes/vic.h" 24#include "command_classes/vic.h"
25#include "video_core/cdma_pusher.h" 25#include "video_core/cdma_pusher.h"
26#include "video_core/command_classes/nvdec_common.h" 26#include "video_core/command_classes/nvdec_common.h"
27#include "video_core/command_classes/sync_manager.h"
27#include "video_core/engines/maxwell_3d.h" 28#include "video_core/engines/maxwell_3d.h"
28#include "video_core/gpu.h" 29#include "video_core/gpu.h"
29#include "video_core/memory_manager.h" 30#include "video_core/memory_manager.h"
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 1bada44dd..87b49d6ea 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -9,13 +9,13 @@
9 9
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/command_classes/sync_manager.h"
13 12
14namespace Tegra { 13namespace Tegra {
15 14
16class GPU; 15class GPU;
17class Host1x; 16class Host1x;
18class Nvdec; 17class Nvdec;
18class SyncptIncrManager;
19class Vic; 19class Vic;
20 20
21enum class ChSubmissionMode : u32 { 21enum class ChSubmissionMode : u32 {
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index b86c3a757..b1d455e30 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Tegra { 7#include "common/common_types.h"
8#include "common/math_util.h"
8 9
10namespace Tegra {
9/** 11/**
10 * Struct describing framebuffer configuration 12 * Struct describing framebuffer configuration
11 */ 13 */
@@ -16,6 +18,21 @@ struct FramebufferConfig {
16 B8G8R8A8_UNORM = 5, 18 B8G8R8A8_UNORM = 5,
17 }; 19 };
18 20
21 enum class TransformFlags : u32 {
22 /// No transform flags are set
23 Unset = 0x00,
24 /// Flip source image horizontally (around the vertical axis)
25 FlipH = 0x01,
26 /// Flip source image vertically (around the horizontal axis)
27 FlipV = 0x02,
28 /// Rotate source image 90 degrees clockwise
29 Rotate90 = 0x04,
30 /// Rotate source image 180 degrees
31 Rotate180 = 0x03,
32 /// Rotate source image 270 degrees clockwise
33 Rotate270 = 0x07,
34 };
35
19 VAddr address{}; 36 VAddr address{};
20 u32 offset{}; 37 u32 offset{};
21 u32 width{}; 38 u32 width{};
@@ -23,7 +40,6 @@ struct FramebufferConfig {
23 u32 stride{}; 40 u32 stride{};
24 PixelFormat pixel_format{}; 41 PixelFormat pixel_format{};
25 42
26 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
27 TransformFlags transform_flags{}; 43 TransformFlags transform_flags{};
28 Common::Rectangle<int> crop_rect; 44 Common::Rectangle<int> crop_rect;
29}; 45};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 2ae3639b5..ab7c21a49 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,540 +2,913 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <atomic>
5#include <chrono> 7#include <chrono>
8#include <condition_variable>
9#include <list>
10#include <memory>
6 11
7#include "common/assert.h" 12#include "common/assert.h"
8#include "common/microprofile.h" 13#include "common/microprofile.h"
9#include "common/settings.h" 14#include "common/settings.h"
10#include "core/core.h" 15#include "core/core.h"
11#include "core/core_timing.h" 16#include "core/core_timing.h"
12#include "core/core_timing_util.h"
13#include "core/frontend/emu_window.h" 17#include "core/frontend/emu_window.h"
14#include "core/hardware_interrupt_manager.h" 18#include "core/hardware_interrupt_manager.h"
15#include "core/memory.h" 19#include "core/hle/service/nvdrv/nvdata.h"
20#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "core/perf_stats.h" 21#include "core/perf_stats.h"
22#include "video_core/cdma_pusher.h"
23#include "video_core/dma_pusher.h"
17#include "video_core/engines/fermi_2d.h" 24#include "video_core/engines/fermi_2d.h"
18#include "video_core/engines/kepler_compute.h" 25#include "video_core/engines/kepler_compute.h"
19#include "video_core/engines/kepler_memory.h" 26#include "video_core/engines/kepler_memory.h"
20#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
22#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h"
23#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
24#include "video_core/renderer_base.h" 32#include "video_core/renderer_base.h"
25#include "video_core/shader_notify.h" 33#include "video_core/shader_notify.h"
26#include "video_core/video_core.h"
27 34
28namespace Tegra { 35namespace Tegra {
29 36
30MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 37MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
31 38
32GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) 39struct GPU::Impl {
33 : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, 40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
34 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, 41 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
35 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, 42 system)},
36 fermi_2d{std::make_unique<Engines::Fermi2D>()}, 43 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 44 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 45 fermi_2d{std::make_unique<Engines::Fermi2D>()},
39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 46 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 47 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
41 gpu_thread{system_, is_async_} {} 48 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
49 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
50 gpu_thread{system_, is_async_} {}
51
52 ~Impl() = default;
53
54 /// Binds a renderer to the GPU.
55 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
56 renderer = std::move(renderer_);
57 rasterizer = renderer->ReadRasterizer();
58
59 memory_manager->BindRasterizer(rasterizer);
60 maxwell_3d->BindRasterizer(rasterizer);
61 fermi_2d->BindRasterizer(rasterizer);
62 kepler_compute->BindRasterizer(rasterizer);
63 maxwell_dma->BindRasterizer(rasterizer);
64 }
42 65
43GPU::~GPU() = default; 66 /// Calls a GPU method.
67 void CallMethod(const GPU::MethodCall& method_call) {
68 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
69 method_call.subchannel);
70
71 ASSERT(method_call.subchannel < bound_engines.size());
72
73 if (ExecuteMethodOnEngine(method_call.method)) {
74 CallEngineMethod(method_call);
75 } else {
76 CallPullerMethod(method_call);
77 }
78 }
79
80 /// Calls a GPU multivalue method.
81 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
82 u32 methods_pending) {
83 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
44 84
45void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 85 ASSERT(subchannel < bound_engines.size());
46 renderer = std::move(renderer_); 86
47 rasterizer = renderer->ReadRasterizer(); 87 if (ExecuteMethodOnEngine(method)) {
88 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
89 } else {
90 for (std::size_t i = 0; i < amount; i++) {
91 CallPullerMethod(GPU::MethodCall{
92 method,
93 base_start[i],
94 subchannel,
95 methods_pending - static_cast<u32>(i),
96 });
97 }
98 }
99 }
100
101 /// Flush all current written commands into the host GPU for execution.
102 void FlushCommands() {
103 rasterizer->FlushCommands();
104 }
105
106 /// Synchronizes CPU writes with Host GPU memory.
107 void SyncGuestHost() {
108 rasterizer->SyncGuestHost();
109 }
110
111 /// Signal the ending of command list.
112 void OnCommandListEnd() {
113 if (is_async) {
114 // This command only applies to asynchronous GPU mode
115 gpu_thread.OnCommandListEnd();
116 }
117 }
118
119 /// Request a host GPU memory flush from the CPU.
120 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) {
121 std::unique_lock lck{flush_request_mutex};
122 const u64 fence = ++last_flush_fence;
123 flush_requests.emplace_back(fence, addr, size);
124 return fence;
125 }
126
127 /// Obtains current flush request fence id.
128 [[nodiscard]] u64 CurrentFlushRequestFence() const {
129 return current_flush_fence.load(std::memory_order_relaxed);
130 }
131
132 /// Tick pending requests within the GPU.
133 void TickWork() {
134 std::unique_lock lck{flush_request_mutex};
135 while (!flush_requests.empty()) {
136 auto& request = flush_requests.front();
137 const u64 fence = request.fence;
138 const VAddr addr = request.addr;
139 const std::size_t size = request.size;
140 flush_requests.pop_front();
141 flush_request_mutex.unlock();
142 rasterizer->FlushRegion(addr, size);
143 current_flush_fence.store(fence);
144 flush_request_mutex.lock();
145 }
146 }
147
148 /// Returns a reference to the Maxwell3D GPU engine.
149 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
150 return *maxwell_3d;
151 }
152
153 /// Returns a const reference to the Maxwell3D GPU engine.
154 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
155 return *maxwell_3d;
156 }
157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
160 return *kepler_compute;
161 }
162
163 /// Returns a reference to the KeplerCompute GPU engine.
164 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
165 return *kepler_compute;
166 }
167
168 /// Returns a reference to the GPU memory manager.
169 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
170 return *memory_manager;
171 }
172
173 /// Returns a const reference to the GPU memory manager.
174 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
175 return *memory_manager;
176 }
177
178 /// Returns a reference to the GPU DMA pusher.
179 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
180 return *dma_pusher;
181 }
182
183 /// Returns a const reference to the GPU DMA pusher.
184 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
185 return *dma_pusher;
186 }
187
188 /// Returns a reference to the GPU CDMA pusher.
189 [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() {
190 return *cdma_pusher;
191 }
192
193 /// Returns a const reference to the GPU CDMA pusher.
194 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const {
195 return *cdma_pusher;
196 }
197
198 /// Returns a reference to the underlying renderer.
199 [[nodiscard]] VideoCore::RendererBase& Renderer() {
200 return *renderer;
201 }
202
203 /// Returns a const reference to the underlying renderer.
204 [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
205 return *renderer;
206 }
207
208 /// Returns a reference to the shader notifier.
209 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
210 return *shader_notify;
211 }
212
213 /// Returns a const reference to the shader notifier.
214 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
215 return *shader_notify;
216 }
217
218 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
219 void WaitFence(u32 syncpoint_id, u32 value) {
220 // Synced GPU, is always in sync
221 if (!is_async) {
222 return;
223 }
224 if (syncpoint_id == UINT32_MAX) {
225 // TODO: Research what this does.
226 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
227 return;
228 }
229 MICROPROFILE_SCOPE(GPU_wait);
230 std::unique_lock lock{sync_mutex};
231 sync_cv.wait(lock, [=, this] {
232 if (shutting_down.load(std::memory_order_relaxed)) {
233 // We're shutting down, ensure no threads continue to wait for the next syncpoint
234 return true;
235 }
236 return syncpoints.at(syncpoint_id).load() >= value;
237 });
238 }
239
240 void IncrementSyncPoint(u32 syncpoint_id) {
241 auto& syncpoint = syncpoints.at(syncpoint_id);
242 syncpoint++;
243 std::lock_guard lock{sync_mutex};
244 sync_cv.notify_all();
245 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
246 if (!interrupt.empty()) {
247 u32 value = syncpoint.load();
248 auto it = interrupt.begin();
249 while (it != interrupt.end()) {
250 if (value >= *it) {
251 TriggerCpuInterrupt(syncpoint_id, *it);
252 it = interrupt.erase(it);
253 continue;
254 }
255 it++;
256 }
257 }
258 }
259
260 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
261 return syncpoints.at(syncpoint_id).load();
262 }
263
264 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
265 std::lock_guard lock{sync_mutex};
266 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
267 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
268 [value](u32 in_value) { return in_value == value; });
269 if (contains) {
270 return;
271 }
272 interrupt.emplace_back(value);
273 }
274
275 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
276 std::lock_guard lock{sync_mutex};
277 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
278 const auto iter =
279 std::find_if(interrupt.begin(), interrupt.end(),
280 [value](u32 interrupt_value) { return value == interrupt_value; });
281
282 if (iter == interrupt.end()) {
283 return false;
284 }
285 interrupt.erase(iter);
286 return true;
287 }
288
289 [[nodiscard]] u64 GetTicks() const {
290 // This values were reversed engineered by fincs from NVN
291 // The gpu clock is reported in units of 385/625 nanoseconds
292 constexpr u64 gpu_ticks_num = 384;
293 constexpr u64 gpu_ticks_den = 625;
294
295 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
296 if (Settings::values.use_fast_gpu_time.GetValue()) {
297 nanoseconds /= 256;
298 }
299 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
300 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
301 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
302 }
303
304 [[nodiscard]] bool IsAsync() const {
305 return is_async;
306 }
307
308 [[nodiscard]] bool UseNvdec() const {
309 return use_nvdec;
310 }
311
312 void RendererFrameEndNotify() {
313 system.GetPerfStats().EndGameFrame();
314 }
315
316 /// Performs any additional setup necessary in order to begin GPU emulation.
317 /// This can be used to launch any necessary threads and register any necessary
318 /// core timing events.
319 void Start() {
320 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
321 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
322 cpu_context->MakeCurrent();
323 }
324
325 /// Obtain the CPU Context
326 void ObtainContext() {
327 cpu_context->MakeCurrent();
328 }
329
330 /// Release the CPU Context
331 void ReleaseContext() {
332 cpu_context->DoneCurrent();
333 }
334
335 /// Push GPU command entries to be processed
336 void PushGPUEntries(Tegra::CommandList&& entries) {
337 gpu_thread.SubmitList(std::move(entries));
338 }
339
340 /// Push GPU command buffer entries to be processed
341 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
342 if (!use_nvdec) {
343 return;
344 }
345
346 if (!cdma_pusher) {
347 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu);
348 }
349
350 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
351 // TODO(ameerj): RE proper async nvdec operation
352 // gpu_thread.SubmitCommandBuffer(std::move(entries));
353
354 cdma_pusher->ProcessEntries(std::move(entries));
355 }
356
357 /// Frees the CDMAPusher instance to free up resources
358 void ClearCdmaInstance() {
359 cdma_pusher.reset();
360 }
361
362 /// Swap buffers (render frame)
363 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
364 gpu_thread.SwapBuffers(framebuffer);
365 }
366
367 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
368 void FlushRegion(VAddr addr, u64 size) {
369 gpu_thread.FlushRegion(addr, size);
370 }
371
372 /// Notify rasterizer that any caches of the specified region should be invalidated
373 void InvalidateRegion(VAddr addr, u64 size) {
374 gpu_thread.InvalidateRegion(addr, size);
375 }
376
377 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
378 void FlushAndInvalidateRegion(VAddr addr, u64 size) {
379 gpu_thread.FlushAndInvalidateRegion(addr, size);
380 }
381
382 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const {
383 auto& interrupt_manager = system.InterruptManager();
384 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
385 }
386
387 void ProcessBindMethod(const GPU::MethodCall& method_call) {
388 // Bind the current subchannel to the desired engine id.
389 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
390 method_call.argument);
391 const auto engine_id = static_cast<EngineID>(method_call.argument);
392 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
393 switch (engine_id) {
394 case EngineID::FERMI_TWOD_A:
395 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
396 break;
397 case EngineID::MAXWELL_B:
398 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
399 break;
400 case EngineID::KEPLER_COMPUTE_B:
401 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
402 break;
403 case EngineID::MAXWELL_DMA_COPY_A:
404 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
405 break;
406 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
407 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
408 break;
409 default:
410 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
411 }
412 }
48 413
49 memory_manager->BindRasterizer(rasterizer); 414 void ProcessFenceActionMethod() {
50 maxwell_3d->BindRasterizer(rasterizer); 415 switch (regs.fence_action.op) {
51 fermi_2d->BindRasterizer(rasterizer); 416 case GPU::FenceOperation::Acquire:
52 kepler_compute->BindRasterizer(rasterizer); 417 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
53 maxwell_dma->BindRasterizer(rasterizer); 418 break;
419 case GPU::FenceOperation::Increment:
420 IncrementSyncPoint(regs.fence_action.syncpoint_id);
421 break;
422 default:
423 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
424 }
425 }
426
427 void ProcessWaitForInterruptMethod() {
428 // TODO(bunnei) ImplementMe
429 LOG_WARNING(HW_GPU, "(STUBBED) called");
430 }
431
432 void ProcessSemaphoreTriggerMethod() {
433 const auto semaphoreOperationMask = 0xF;
434 const auto op =
435 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
436 if (op == GpuSemaphoreOperation::WriteLong) {
437 struct Block {
438 u32 sequence;
439 u32 zeros = 0;
440 u64 timestamp;
441 };
442
443 Block block{};
444 block.sequence = regs.semaphore_sequence;
445 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
446 // CoreTiming
447 block.timestamp = GetTicks();
448 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
449 sizeof(block));
450 } else {
451 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
452 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
453 (op == GpuSemaphoreOperation::AcquireGequal &&
454 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
455 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
456 // Nothing to do in this case
457 } else {
458 regs.acquire_source = true;
459 regs.acquire_value = regs.semaphore_sequence;
460 if (op == GpuSemaphoreOperation::AcquireEqual) {
461 regs.acquire_active = true;
462 regs.acquire_mode = false;
463 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
464 regs.acquire_active = true;
465 regs.acquire_mode = true;
466 } else if (op == GpuSemaphoreOperation::AcquireMask) {
467 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
468 // semaphore_sequence, gives a non-0 result
469 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
470 } else {
471 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
472 }
473 }
474 }
475 }
476
477 void ProcessSemaphoreRelease() {
478 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
479 regs.semaphore_release);
480 }
481
482 void ProcessSemaphoreAcquire() {
483 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
484 const auto value = regs.semaphore_acquire;
485 if (word != value) {
486 regs.acquire_active = true;
487 regs.acquire_value = value;
488 // TODO(kemathe73) figure out how to do the acquire_timeout
489 regs.acquire_mode = false;
490 regs.acquire_source = false;
491 }
492 }
493
494 /// Calls a GPU puller method.
495 void CallPullerMethod(const GPU::MethodCall& method_call) {
496 regs.reg_array[method_call.method] = method_call.argument;
497 const auto method = static_cast<BufferMethods>(method_call.method);
498
499 switch (method) {
500 case BufferMethods::BindObject: {
501 ProcessBindMethod(method_call);
502 break;
503 }
504 case BufferMethods::Nop:
505 case BufferMethods::SemaphoreAddressHigh:
506 case BufferMethods::SemaphoreAddressLow:
507 case BufferMethods::SemaphoreSequence:
508 case BufferMethods::UnkCacheFlush:
509 case BufferMethods::WrcacheFlush:
510 case BufferMethods::FenceValue:
511 break;
512 case BufferMethods::RefCnt:
513 rasterizer->SignalReference();
514 break;
515 case BufferMethods::FenceAction:
516 ProcessFenceActionMethod();
517 break;
518 case BufferMethods::WaitForInterrupt:
519 ProcessWaitForInterruptMethod();
520 break;
521 case BufferMethods::SemaphoreTrigger: {
522 ProcessSemaphoreTriggerMethod();
523 break;
524 }
525 case BufferMethods::NotifyIntr: {
526 // TODO(Kmather73): Research and implement this method.
527 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
528 break;
529 }
530 case BufferMethods::Unk28: {
531 // TODO(Kmather73): Research and implement this method.
532 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
533 break;
534 }
535 case BufferMethods::SemaphoreAcquire: {
536 ProcessSemaphoreAcquire();
537 break;
538 }
539 case BufferMethods::SemaphoreRelease: {
540 ProcessSemaphoreRelease();
541 break;
542 }
543 case BufferMethods::Yield: {
544 // TODO(Kmather73): Research and implement this method.
545 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
546 break;
547 }
548 default:
549 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
550 break;
551 }
552 }
553
554 /// Calls a GPU engine method.
555 void CallEngineMethod(const GPU::MethodCall& method_call) {
556 const EngineID engine = bound_engines[method_call.subchannel];
557
558 switch (engine) {
559 case EngineID::FERMI_TWOD_A:
560 fermi_2d->CallMethod(method_call.method, method_call.argument,
561 method_call.IsLastCall());
562 break;
563 case EngineID::MAXWELL_B:
564 maxwell_3d->CallMethod(method_call.method, method_call.argument,
565 method_call.IsLastCall());
566 break;
567 case EngineID::KEPLER_COMPUTE_B:
568 kepler_compute->CallMethod(method_call.method, method_call.argument,
569 method_call.IsLastCall());
570 break;
571 case EngineID::MAXWELL_DMA_COPY_A:
572 maxwell_dma->CallMethod(method_call.method, method_call.argument,
573 method_call.IsLastCall());
574 break;
575 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
576 kepler_memory->CallMethod(method_call.method, method_call.argument,
577 method_call.IsLastCall());
578 break;
579 default:
580 UNIMPLEMENTED_MSG("Unimplemented engine");
581 }
582 }
583
584 /// Calls a GPU engine multivalue method.
585 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
586 u32 methods_pending) {
587 const EngineID engine = bound_engines[subchannel];
588
589 switch (engine) {
590 case EngineID::FERMI_TWOD_A:
591 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
592 break;
593 case EngineID::MAXWELL_B:
594 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
595 break;
596 case EngineID::KEPLER_COMPUTE_B:
597 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
598 break;
599 case EngineID::MAXWELL_DMA_COPY_A:
600 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
601 break;
602 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
603 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
604 break;
605 default:
606 UNIMPLEMENTED_MSG("Unimplemented engine");
607 }
608 }
609
610 /// Determines where the method should be executed.
611 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
612 const auto buffer_method = static_cast<BufferMethods>(method);
613 return buffer_method >= BufferMethods::NonPullerMethods;
614 }
615
616 struct Regs {
617 static constexpr size_t NUM_REGS = 0x40;
618
619 union {
620 struct {
621 INSERT_PADDING_WORDS_NOINIT(0x4);
622 struct {
623 u32 address_high;
624 u32 address_low;
625
626 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
627 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
628 address_low);
629 }
630 } semaphore_address;
631
632 u32 semaphore_sequence;
633 u32 semaphore_trigger;
634 INSERT_PADDING_WORDS_NOINIT(0xC);
635
636 // The pusher and the puller share the reference counter, the pusher only has read
637 // access
638 u32 reference_count;
639 INSERT_PADDING_WORDS_NOINIT(0x5);
640
641 u32 semaphore_acquire;
642 u32 semaphore_release;
643 u32 fence_value;
644 GPU::FenceAction fence_action;
645 INSERT_PADDING_WORDS_NOINIT(0xE2);
646
647 // Puller state
648 u32 acquire_mode;
649 u32 acquire_source;
650 u32 acquire_active;
651 u32 acquire_timeout;
652 u32 acquire_value;
653 };
654 std::array<u32, NUM_REGS> reg_array;
655 };
656 } regs{};
657
658 GPU& gpu;
659 Core::System& system;
660 std::unique_ptr<Tegra::MemoryManager> memory_manager;
661 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
662 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
663 std::unique_ptr<VideoCore::RendererBase> renderer;
664 VideoCore::RasterizerInterface* rasterizer = nullptr;
665 const bool use_nvdec;
666
667 /// Mapping of command subchannels to their bound engine ids
668 std::array<EngineID, 8> bound_engines{};
669 /// 3D engine
670 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
671 /// 2D engine
672 std::unique_ptr<Engines::Fermi2D> fermi_2d;
673 /// Compute engine
674 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
675 /// DMA engine
676 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
677 /// Inline memory engine
678 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
679 /// Shader build notifier
680 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
681 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
682 std::atomic_bool shutting_down{};
683
684 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
685
686 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
687
688 std::mutex sync_mutex;
689 std::mutex device_mutex;
690
691 std::condition_variable sync_cv;
692
693 struct FlushRequest {
694 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
695 : fence{fence_}, addr{addr_}, size{size_} {}
696 u64 fence;
697 VAddr addr;
698 std::size_t size;
699 };
700
701 std::list<FlushRequest> flush_requests;
702 std::atomic<u64> current_flush_fence{};
703 u64 last_flush_fence{};
704 std::mutex flush_request_mutex;
705
706 const bool is_async;
707
708 VideoCommon::GPUThread::ThreadManager gpu_thread;
709 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
710
711#define ASSERT_REG_POSITION(field_name, position) \
712 static_assert(offsetof(Regs, field_name) == position * 4, \
713 "Field " #field_name " has invalid position")
714
715 ASSERT_REG_POSITION(semaphore_address, 0x4);
716 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
717 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
718 ASSERT_REG_POSITION(reference_count, 0x14);
719 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
720 ASSERT_REG_POSITION(semaphore_release, 0x1B);
721 ASSERT_REG_POSITION(fence_value, 0x1C);
722 ASSERT_REG_POSITION(fence_action, 0x1D);
723
724 ASSERT_REG_POSITION(acquire_mode, 0x100);
725 ASSERT_REG_POSITION(acquire_source, 0x101);
726 ASSERT_REG_POSITION(acquire_active, 0x102);
727 ASSERT_REG_POSITION(acquire_timeout, 0x103);
728 ASSERT_REG_POSITION(acquire_value, 0x104);
729
730#undef ASSERT_REG_POSITION
731
732 enum class GpuSemaphoreOperation {
733 AcquireEqual = 0x1,
734 WriteLong = 0x2,
735 AcquireGequal = 0x4,
736 AcquireMask = 0x8,
737 };
738};
739
740GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
741 : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {}
742
743GPU::~GPU() = default;
744
745void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
746 impl->BindRenderer(std::move(renderer));
54} 747}
55 748
56Engines::Maxwell3D& GPU::Maxwell3D() { 749void GPU::CallMethod(const MethodCall& method_call) {
57 return *maxwell_3d; 750 impl->CallMethod(method_call);
58} 751}
59 752
60const Engines::Maxwell3D& GPU::Maxwell3D() const { 753void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
61 return *maxwell_3d; 754 u32 methods_pending) {
755 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
62} 756}
63 757
64Engines::KeplerCompute& GPU::KeplerCompute() { 758void GPU::FlushCommands() {
65 return *kepler_compute; 759 impl->FlushCommands();
66} 760}
67 761
68const Engines::KeplerCompute& GPU::KeplerCompute() const { 762void GPU::SyncGuestHost() {
69 return *kepler_compute; 763 impl->SyncGuestHost();
70} 764}
71 765
72MemoryManager& GPU::MemoryManager() { 766void GPU::OnCommandListEnd() {
73 return *memory_manager; 767 impl->OnCommandListEnd();
74} 768}
75 769
76const MemoryManager& GPU::MemoryManager() const { 770u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
77 return *memory_manager; 771 return impl->RequestFlush(addr, size);
78} 772}
79 773
80DmaPusher& GPU::DmaPusher() { 774u64 GPU::CurrentFlushRequestFence() const {
81 return *dma_pusher; 775 return impl->CurrentFlushRequestFence();
82} 776}
83 777
84Tegra::CDmaPusher& GPU::CDmaPusher() { 778void GPU::TickWork() {
85 return *cdma_pusher; 779 impl->TickWork();
86} 780}
87 781
88const DmaPusher& GPU::DmaPusher() const { 782Engines::Maxwell3D& GPU::Maxwell3D() {
89 return *dma_pusher; 783 return impl->Maxwell3D();
90} 784}
91 785
92const Tegra::CDmaPusher& GPU::CDmaPusher() const { 786const Engines::Maxwell3D& GPU::Maxwell3D() const {
93 return *cdma_pusher; 787 return impl->Maxwell3D();
94} 788}
95 789
96void GPU::WaitFence(u32 syncpoint_id, u32 value) { 790Engines::KeplerCompute& GPU::KeplerCompute() {
97 // Synced GPU, is always in sync 791 return impl->KeplerCompute();
98 if (!is_async) {
99 return;
100 }
101 if (syncpoint_id == UINT32_MAX) {
102 // TODO: Research what this does.
103 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
104 return;
105 }
106 MICROPROFILE_SCOPE(GPU_wait);
107 std::unique_lock lock{sync_mutex};
108 sync_cv.wait(lock, [=, this] {
109 if (shutting_down.load(std::memory_order_relaxed)) {
110 // We're shutting down, ensure no threads continue to wait for the next syncpoint
111 return true;
112 }
113 return syncpoints.at(syncpoint_id).load() >= value;
114 });
115}
116
117void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
118 auto& syncpoint = syncpoints.at(syncpoint_id);
119 syncpoint++;
120 std::lock_guard lock{sync_mutex};
121 sync_cv.notify_all();
122 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
123 if (!interrupt.empty()) {
124 u32 value = syncpoint.load();
125 auto it = interrupt.begin();
126 while (it != interrupt.end()) {
127 if (value >= *it) {
128 TriggerCpuInterrupt(syncpoint_id, *it);
129 it = interrupt.erase(it);
130 continue;
131 }
132 it++;
133 }
134 }
135} 792}
136 793
137u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { 794const Engines::KeplerCompute& GPU::KeplerCompute() const {
138 return syncpoints.at(syncpoint_id).load(); 795 return impl->KeplerCompute();
139} 796}
140 797
141void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 798Tegra::MemoryManager& GPU::MemoryManager() {
142 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 799 return impl->MemoryManager();
143 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
144 [value](u32 in_value) { return in_value == value; });
145 if (contains) {
146 return;
147 }
148 interrupt.emplace_back(value);
149} 800}
150 801
151bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { 802const Tegra::MemoryManager& GPU::MemoryManager() const {
152 std::lock_guard lock{sync_mutex}; 803 return impl->MemoryManager();
153 auto& interrupt = syncpt_interrupts.at(syncpoint_id); 804}
154 const auto iter =
155 std::find_if(interrupt.begin(), interrupt.end(),
156 [value](u32 interrupt_value) { return value == interrupt_value; });
157 805
158 if (iter == interrupt.end()) { 806Tegra::DmaPusher& GPU::DmaPusher() {
159 return false; 807 return impl->DmaPusher();
160 }
161 interrupt.erase(iter);
162 return true;
163} 808}
164 809
165u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 810const Tegra::DmaPusher& GPU::DmaPusher() const {
166 std::unique_lock lck{flush_request_mutex}; 811 return impl->DmaPusher();
167 const u64 fence = ++last_flush_fence;
168 flush_requests.emplace_back(fence, addr, size);
169 return fence;
170} 812}
171 813
172void GPU::TickWork() { 814Tegra::CDmaPusher& GPU::CDmaPusher() {
173 std::unique_lock lck{flush_request_mutex}; 815 return impl->CDmaPusher();
174 while (!flush_requests.empty()) {
175 auto& request = flush_requests.front();
176 const u64 fence = request.fence;
177 const VAddr addr = request.addr;
178 const std::size_t size = request.size;
179 flush_requests.pop_front();
180 flush_request_mutex.unlock();
181 rasterizer->FlushRegion(addr, size);
182 current_flush_fence.store(fence);
183 flush_request_mutex.lock();
184 }
185} 816}
186 817
187u64 GPU::GetTicks() const { 818const Tegra::CDmaPusher& GPU::CDmaPusher() const {
188 // This values were reversed engineered by fincs from NVN 819 return impl->CDmaPusher();
189 // The gpu clock is reported in units of 385/625 nanoseconds 820}
190 constexpr u64 gpu_ticks_num = 384;
191 constexpr u64 gpu_ticks_den = 625;
192 821
193 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); 822VideoCore::RendererBase& GPU::Renderer() {
194 if (Settings::values.use_fast_gpu_time.GetValue()) { 823 return impl->Renderer();
195 nanoseconds /= 256;
196 }
197 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
198 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
199 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
200} 824}
201 825
202void GPU::RendererFrameEndNotify() { 826const VideoCore::RendererBase& GPU::Renderer() const {
203 system.GetPerfStats().EndGameFrame(); 827 return impl->Renderer();
204} 828}
205 829
206void GPU::FlushCommands() { 830VideoCore::ShaderNotify& GPU::ShaderNotify() {
207 rasterizer->FlushCommands(); 831 return impl->ShaderNotify();
208} 832}
209 833
210void GPU::SyncGuestHost() { 834const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
211 rasterizer->SyncGuestHost(); 835 return impl->ShaderNotify();
212} 836}
213 837
214enum class GpuSemaphoreOperation { 838void GPU::WaitFence(u32 syncpoint_id, u32 value) {
215 AcquireEqual = 0x1, 839 impl->WaitFence(syncpoint_id, value);
216 WriteLong = 0x2, 840}
217 AcquireGequal = 0x4,
218 AcquireMask = 0x8,
219};
220 841
221void GPU::CallMethod(const MethodCall& method_call) { 842void GPU::IncrementSyncPoint(u32 syncpoint_id) {
222 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 843 impl->IncrementSyncPoint(syncpoint_id);
223 method_call.subchannel); 844}
224 845
225 ASSERT(method_call.subchannel < bound_engines.size()); 846u32 GPU::GetSyncpointValue(u32 syncpoint_id) const {
847 return impl->GetSyncpointValue(syncpoint_id);
848}
226 849
227 if (ExecuteMethodOnEngine(method_call.method)) { 850void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
228 CallEngineMethod(method_call); 851 impl->RegisterSyncptInterrupt(syncpoint_id, value);
229 } else {
230 CallPullerMethod(method_call);
231 }
232} 852}
233 853
234void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 854bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
235 u32 methods_pending) { 855 return impl->CancelSyncptInterrupt(syncpoint_id, value);
236 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
237
238 ASSERT(subchannel < bound_engines.size());
239
240 if (ExecuteMethodOnEngine(method)) {
241 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
242 } else {
243 for (std::size_t i = 0; i < amount; i++) {
244 CallPullerMethod(MethodCall{
245 method,
246 base_start[i],
247 subchannel,
248 methods_pending - static_cast<u32>(i),
249 });
250 }
251 }
252} 856}
253 857
254bool GPU::ExecuteMethodOnEngine(u32 method) { 858u64 GPU::GetTicks() const {
255 const auto buffer_method = static_cast<BufferMethods>(method); 859 return impl->GetTicks();
256 return buffer_method >= BufferMethods::NonPullerMethods; 860}
257}
258
259void GPU::CallPullerMethod(const MethodCall& method_call) {
260 regs.reg_array[method_call.method] = method_call.argument;
261 const auto method = static_cast<BufferMethods>(method_call.method);
262
263 switch (method) {
264 case BufferMethods::BindObject: {
265 ProcessBindMethod(method_call);
266 break;
267 }
268 case BufferMethods::Nop:
269 case BufferMethods::SemaphoreAddressHigh:
270 case BufferMethods::SemaphoreAddressLow:
271 case BufferMethods::SemaphoreSequence:
272 case BufferMethods::UnkCacheFlush:
273 case BufferMethods::WrcacheFlush:
274 case BufferMethods::FenceValue:
275 break;
276 case BufferMethods::RefCnt:
277 rasterizer->SignalReference();
278 break;
279 case BufferMethods::FenceAction:
280 ProcessFenceActionMethod();
281 break;
282 case BufferMethods::WaitForInterrupt:
283 ProcessWaitForInterruptMethod();
284 break;
285 case BufferMethods::SemaphoreTrigger: {
286 ProcessSemaphoreTriggerMethod();
287 break;
288 }
289 case BufferMethods::NotifyIntr: {
290 // TODO(Kmather73): Research and implement this method.
291 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
292 break;
293 }
294 case BufferMethods::Unk28: {
295 // TODO(Kmather73): Research and implement this method.
296 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
297 break;
298 }
299 case BufferMethods::SemaphoreAcquire: {
300 ProcessSemaphoreAcquire();
301 break;
302 }
303 case BufferMethods::SemaphoreRelease: {
304 ProcessSemaphoreRelease();
305 break;
306 }
307 case BufferMethods::Yield: {
308 // TODO(Kmather73): Research and implement this method.
309 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
310 break;
311 }
312 default:
313 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
314 break;
315 }
316}
317
318void GPU::CallEngineMethod(const MethodCall& method_call) {
319 const EngineID engine = bound_engines[method_call.subchannel];
320
321 switch (engine) {
322 case EngineID::FERMI_TWOD_A:
323 fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
324 break;
325 case EngineID::MAXWELL_B:
326 maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
327 break;
328 case EngineID::KEPLER_COMPUTE_B:
329 kepler_compute->CallMethod(method_call.method, method_call.argument,
330 method_call.IsLastCall());
331 break;
332 case EngineID::MAXWELL_DMA_COPY_A:
333 maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
334 break;
335 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
336 kepler_memory->CallMethod(method_call.method, method_call.argument,
337 method_call.IsLastCall());
338 break;
339 default:
340 UNIMPLEMENTED_MSG("Unimplemented engine");
341 }
342}
343
344void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
345 u32 methods_pending) {
346 const EngineID engine = bound_engines[subchannel];
347
348 switch (engine) {
349 case EngineID::FERMI_TWOD_A:
350 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
351 break;
352 case EngineID::MAXWELL_B:
353 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
354 break;
355 case EngineID::KEPLER_COMPUTE_B:
356 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
357 break;
358 case EngineID::MAXWELL_DMA_COPY_A:
359 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
360 break;
361 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
362 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
363 break;
364 default:
365 UNIMPLEMENTED_MSG("Unimplemented engine");
366 }
367}
368
369void GPU::ProcessBindMethod(const MethodCall& method_call) {
370 // Bind the current subchannel to the desired engine id.
371 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
372 method_call.argument);
373 const auto engine_id = static_cast<EngineID>(method_call.argument);
374 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
375 switch (engine_id) {
376 case EngineID::FERMI_TWOD_A:
377 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
378 break;
379 case EngineID::MAXWELL_B:
380 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
381 break;
382 case EngineID::KEPLER_COMPUTE_B:
383 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
384 break;
385 case EngineID::MAXWELL_DMA_COPY_A:
386 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
387 break;
388 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
389 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
390 break;
391 default:
392 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
393 }
394}
395
396void GPU::ProcessFenceActionMethod() {
397 switch (regs.fence_action.op) {
398 case FenceOperation::Acquire:
399 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
400 break;
401 case FenceOperation::Increment:
402 IncrementSyncPoint(regs.fence_action.syncpoint_id);
403 break;
404 default:
405 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
406 }
407}
408
409void GPU::ProcessWaitForInterruptMethod() {
410 // TODO(bunnei) ImplementMe
411 LOG_WARNING(HW_GPU, "(STUBBED) called");
412}
413
414void GPU::ProcessSemaphoreTriggerMethod() {
415 const auto semaphoreOperationMask = 0xF;
416 const auto op =
417 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
418 if (op == GpuSemaphoreOperation::WriteLong) {
419 struct Block {
420 u32 sequence;
421 u32 zeros = 0;
422 u64 timestamp;
423 };
424 861
425 Block block{}; 862bool GPU::IsAsync() const {
426 block.sequence = regs.semaphore_sequence; 863 return impl->IsAsync();
427 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
428 // CoreTiming
429 block.timestamp = GetTicks();
430 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
431 sizeof(block));
432 } else {
433 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
434 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
435 (op == GpuSemaphoreOperation::AcquireGequal &&
436 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
437 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
438 // Nothing to do in this case
439 } else {
440 regs.acquire_source = true;
441 regs.acquire_value = regs.semaphore_sequence;
442 if (op == GpuSemaphoreOperation::AcquireEqual) {
443 regs.acquire_active = true;
444 regs.acquire_mode = false;
445 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
446 regs.acquire_active = true;
447 regs.acquire_mode = true;
448 } else if (op == GpuSemaphoreOperation::AcquireMask) {
449 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
450 // semaphore_sequence, gives a non-0 result
451 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
452 } else {
453 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
454 }
455 }
456 }
457} 864}
458 865
459void GPU::ProcessSemaphoreRelease() { 866bool GPU::UseNvdec() const {
460 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); 867 return impl->UseNvdec();
461} 868}
462 869
463void GPU::ProcessSemaphoreAcquire() { 870void GPU::RendererFrameEndNotify() {
464 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); 871 impl->RendererFrameEndNotify();
465 const auto value = regs.semaphore_acquire;
466 if (word != value) {
467 regs.acquire_active = true;
468 regs.acquire_value = value;
469 // TODO(kemathe73) figure out how to do the acquire_timeout
470 regs.acquire_mode = false;
471 regs.acquire_source = false;
472 }
473} 872}
474 873
475void GPU::Start() { 874void GPU::Start() {
476 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 875 impl->Start();
477 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
478 cpu_context->MakeCurrent();
479} 876}
480 877
481void GPU::ObtainContext() { 878void GPU::ObtainContext() {
482 cpu_context->MakeCurrent(); 879 impl->ObtainContext();
483} 880}
484 881
485void GPU::ReleaseContext() { 882void GPU::ReleaseContext() {
486 cpu_context->DoneCurrent(); 883 impl->ReleaseContext();
487} 884}
488 885
489void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 886void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
490 gpu_thread.SubmitList(std::move(entries)); 887 impl->PushGPUEntries(std::move(entries));
491} 888}
492 889
493void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { 890void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
494 if (!use_nvdec) { 891 impl->PushCommandBuffer(entries);
495 return;
496 }
497
498 if (!cdma_pusher) {
499 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
500 }
501
502 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
503 // TODO(ameerj): RE proper async nvdec operation
504 // gpu_thread.SubmitCommandBuffer(std::move(entries));
505
506 cdma_pusher->ProcessEntries(std::move(entries));
507} 892}
508 893
509void GPU::ClearCdmaInstance() { 894void GPU::ClearCdmaInstance() {
510 cdma_pusher.reset(); 895 impl->ClearCdmaInstance();
511} 896}
512 897
513void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 898void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
514 gpu_thread.SwapBuffers(framebuffer); 899 impl->SwapBuffers(framebuffer);
515} 900}
516 901
517void GPU::FlushRegion(VAddr addr, u64 size) { 902void GPU::FlushRegion(VAddr addr, u64 size) {
518 gpu_thread.FlushRegion(addr, size); 903 impl->FlushRegion(addr, size);
519} 904}
520 905
521void GPU::InvalidateRegion(VAddr addr, u64 size) { 906void GPU::InvalidateRegion(VAddr addr, u64 size) {
522 gpu_thread.InvalidateRegion(addr, size); 907 impl->InvalidateRegion(addr, size);
523} 908}
524 909
525void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 910void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
526 gpu_thread.FlushAndInvalidateRegion(addr, size); 911 impl->FlushAndInvalidateRegion(addr, size);
527}
528
529void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
530 auto& interrupt_manager = system.InterruptManager();
531 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
532}
533
534void GPU::OnCommandListEnd() {
535 if (is_async) {
536 // This command only applies to asynchronous GPU mode
537 gpu_thread.OnCommandListEnd();
538 }
539} 912}
540 913
541} // namespace Tegra 914} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e6a02a71b..05e5c94f3 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -4,28 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <list>
11#include <memory> 7#include <memory>
12#include <mutex> 8
9#include "common/bit_field.h"
13#include "common/common_types.h" 10#include "common/common_types.h"
14#include "core/hle/service/nvdrv/nvdata.h"
15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h" 11#include "video_core/cdma_pusher.h"
17#include "video_core/dma_pusher.h"
18#include "video_core/framebuffer_config.h" 12#include "video_core/framebuffer_config.h"
19#include "video_core/gpu_thread.h"
20
21using CacheAddr = std::uintptr_t;
22[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
23 return reinterpret_cast<CacheAddr>(host_ptr);
24}
25
26[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) {
27 return reinterpret_cast<u8*>(cache_addr);
28}
29 13
30namespace Core { 14namespace Core {
31namespace Frontend { 15namespace Frontend {
@@ -40,6 +24,9 @@ class ShaderNotify;
40} // namespace VideoCore 24} // namespace VideoCore
41 25
42namespace Tegra { 26namespace Tegra {
27class DmaPusher;
28class CDmaPusher;
29struct CommandList;
43 30
44enum class RenderTargetFormat : u32 { 31enum class RenderTargetFormat : u32 {
45 NONE = 0x0, 32 NONE = 0x0,
@@ -138,7 +125,18 @@ public:
138 } 125 }
139 }; 126 };
140 127
141 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); 128 enum class FenceOperation : u32 {
129 Acquire = 0,
130 Increment = 1,
131 };
132
133 union FenceAction {
134 u32 raw;
135 BitField<0, 1, FenceOperation> op;
136 BitField<8, 24, u32> syncpoint_id;
137 };
138
139 explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
142 ~GPU(); 140 ~GPU();
143 141
144 /// Binds a renderer to the GPU. 142 /// Binds a renderer to the GPU.
@@ -162,9 +160,7 @@ public:
162 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 160 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
163 161
164 /// Obtains current flush request fence id. 162 /// Obtains current flush request fence id.
165 [[nodiscard]] u64 CurrentFlushRequestFence() const { 163 [[nodiscard]] u64 CurrentFlushRequestFence() const;
166 return current_flush_fence.load(std::memory_order_relaxed);
167 }
168 164
169 /// Tick pending requests within the GPU. 165 /// Tick pending requests within the GPU.
170 void TickWork(); 166 void TickWork();
@@ -200,24 +196,16 @@ public:
200 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; 196 [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
201 197
202 /// Returns a reference to the underlying renderer. 198 /// Returns a reference to the underlying renderer.
203 [[nodiscard]] VideoCore::RendererBase& Renderer() { 199 [[nodiscard]] VideoCore::RendererBase& Renderer();
204 return *renderer;
205 }
206 200
207 /// Returns a const reference to the underlying renderer. 201 /// Returns a const reference to the underlying renderer.
208 [[nodiscard]] const VideoCore::RendererBase& Renderer() const { 202 [[nodiscard]] const VideoCore::RendererBase& Renderer() const;
209 return *renderer;
210 }
211 203
212 /// Returns a reference to the shader notifier. 204 /// Returns a reference to the shader notifier.
213 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { 205 [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify();
214 return *shader_notify;
215 }
216 206
217 /// Returns a const reference to the shader notifier. 207 /// Returns a const reference to the shader notifier.
218 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { 208 [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
219 return *shader_notify;
220 }
221 209
222 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 210 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
223 void WaitFence(u32 syncpoint_id, u32 value); 211 void WaitFence(u32 syncpoint_id, u32 value);
@@ -232,80 +220,12 @@ public:
232 220
233 [[nodiscard]] u64 GetTicks() const; 221 [[nodiscard]] u64 GetTicks() const;
234 222
235 [[nodiscard]] std::unique_lock<std::mutex> LockSync() { 223 [[nodiscard]] bool IsAsync() const;
236 return std::unique_lock{sync_mutex};
237 }
238
239 [[nodiscard]] bool IsAsync() const {
240 return is_async;
241 }
242 224
243 [[nodiscard]] bool UseNvdec() const { 225 [[nodiscard]] bool UseNvdec() const;
244 return use_nvdec;
245 }
246 226
247 void RendererFrameEndNotify(); 227 void RendererFrameEndNotify();
248 228
249 enum class FenceOperation : u32 {
250 Acquire = 0,
251 Increment = 1,
252 };
253
254 union FenceAction {
255 u32 raw;
256 BitField<0, 1, FenceOperation> op;
257 BitField<8, 24, u32> syncpoint_id;
258
259 [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
260 FenceAction result{};
261 result.op.Assign(op);
262 result.syncpoint_id.Assign(syncpoint_id);
263 return {result.raw};
264 }
265 };
266
267 struct Regs {
268 static constexpr size_t NUM_REGS = 0x40;
269
270 union {
271 struct {
272 INSERT_PADDING_WORDS_NOINIT(0x4);
273 struct {
274 u32 address_high;
275 u32 address_low;
276
277 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
278 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
279 address_low);
280 }
281 } semaphore_address;
282
283 u32 semaphore_sequence;
284 u32 semaphore_trigger;
285 INSERT_PADDING_WORDS_NOINIT(0xC);
286
287 // The pusher and the puller share the reference counter, the pusher only has read
288 // access
289 u32 reference_count;
290 INSERT_PADDING_WORDS_NOINIT(0x5);
291
292 u32 semaphore_acquire;
293 u32 semaphore_release;
294 u32 fence_value;
295 FenceAction fence_action;
296 INSERT_PADDING_WORDS_NOINIT(0xE2);
297
298 // Puller state
299 u32 acquire_mode;
300 u32 acquire_source;
301 u32 acquire_active;
302 u32 acquire_timeout;
303 u32 acquire_value;
304 };
305 std::array<u32, NUM_REGS> reg_array;
306 };
307 } regs{};
308
309 /// Performs any additional setup necessary in order to begin GPU emulation. 229 /// Performs any additional setup necessary in order to begin GPU emulation.
310 /// This can be used to launch any necessary threads and register any necessary 230 /// This can be used to launch any necessary threads and register any necessary
311 /// core timing events. 231 /// core timing events.
@@ -338,104 +258,9 @@ public:
338 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 258 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
339 void FlushAndInvalidateRegion(VAddr addr, u64 size); 259 void FlushAndInvalidateRegion(VAddr addr, u64 size);
340 260
341protected:
342 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
343
344private: 261private:
345 void ProcessBindMethod(const MethodCall& method_call); 262 struct Impl;
346 void ProcessFenceActionMethod(); 263 std::unique_ptr<Impl> impl;
347 void ProcessWaitForInterruptMethod();
348 void ProcessSemaphoreTriggerMethod();
349 void ProcessSemaphoreRelease();
350 void ProcessSemaphoreAcquire();
351
352 /// Calls a GPU puller method.
353 void CallPullerMethod(const MethodCall& method_call);
354
355 /// Calls a GPU engine method.
356 void CallEngineMethod(const MethodCall& method_call);
357
358 /// Calls a GPU engine multivalue method.
359 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
360 u32 methods_pending);
361
362 /// Determines where the method should be executed.
363 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
364
365protected:
366 Core::System& system;
367 std::unique_ptr<Tegra::MemoryManager> memory_manager;
368 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
369 std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
370 std::unique_ptr<VideoCore::RendererBase> renderer;
371 VideoCore::RasterizerInterface* rasterizer = nullptr;
372 const bool use_nvdec;
373
374private:
375 /// Mapping of command subchannels to their bound engine ids
376 std::array<EngineID, 8> bound_engines = {};
377 /// 3D engine
378 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
379 /// 2D engine
380 std::unique_ptr<Engines::Fermi2D> fermi_2d;
381 /// Compute engine
382 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
383 /// DMA engine
384 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
385 /// Inline memory engine
386 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
387 /// Shader build notifier
388 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
389 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
390 std::atomic_bool shutting_down{};
391
392 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
393
394 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
395
396 std::mutex sync_mutex;
397 std::mutex device_mutex;
398
399 std::condition_variable sync_cv;
400
401 struct FlushRequest {
402 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
403 : fence{fence_}, addr{addr_}, size{size_} {}
404 u64 fence;
405 VAddr addr;
406 std::size_t size;
407 };
408
409 std::list<FlushRequest> flush_requests;
410 std::atomic<u64> current_flush_fence{};
411 u64 last_flush_fence{};
412 std::mutex flush_request_mutex;
413
414 const bool is_async;
415
416 VideoCommon::GPUThread::ThreadManager gpu_thread;
417 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
418}; 264};
419 265
420#define ASSERT_REG_POSITION(field_name, position) \
421 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
422 "Field " #field_name " has invalid position")
423
424ASSERT_REG_POSITION(semaphore_address, 0x4);
425ASSERT_REG_POSITION(semaphore_sequence, 0x6);
426ASSERT_REG_POSITION(semaphore_trigger, 0x7);
427ASSERT_REG_POSITION(reference_count, 0x14);
428ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
429ASSERT_REG_POSITION(semaphore_release, 0x1B);
430ASSERT_REG_POSITION(fence_value, 0x1C);
431ASSERT_REG_POSITION(fence_action, 0x1D);
432
433ASSERT_REG_POSITION(acquire_mode, 0x100);
434ASSERT_REG_POSITION(acquire_source, 0x101);
435ASSERT_REG_POSITION(acquire_active, 0x102);
436ASSERT_REG_POSITION(acquire_timeout, 0x103);
437ASSERT_REG_POSITION(acquire_value, 0x104);
438
439#undef ASSERT_REG_POSITION
440
441} // namespace Tegra 266} // namespace Tegra
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 91bada925..00984188e 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -130,9 +130,6 @@ public:
130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
131 void FlushAndInvalidateRegion(VAddr addr, u64 size); 131 void FlushAndInvalidateRegion(VAddr addr, u64 size);
132 132
133 // Stops the GPU execution and waits for the GPU to finish working
134 void ShutDown();
135
136 void OnCommandListEnd(); 133 void OnCommandListEnd();
137 134
138private: 135private:
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index aac851253..73231061a 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -8,6 +8,7 @@
8#include <array> 8#include <array>
9#include <cstring> 9#include <cstring>
10#include <iterator> 10#include <iterator>
11#include <list>
11#include <memory> 12#include <memory>
12#include <mutex> 13#include <mutex>
13#include <optional> 14#include <optional>
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 54dae2c41..8c3ca3d82 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/surface.h" 20#include "video_core/surface.h"
21#include "video_core/texture_cache/formatter.h" 21#include "video_core/texture_cache/formatter.h"
22#include "video_core/texture_cache/samples_helper.h" 22#include "video_core/texture_cache/samples_helper.h"
23#include "video_core/texture_cache/util.h"
23 24
24namespace OpenGL { 25namespace OpenGL {
25namespace { 26namespace {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index c498a8a8f..1ca2c90be 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -12,6 +12,7 @@
12#include "shader_recompiler/shader_info.h" 12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
15#include "video_core/texture_cache/image_view_base.h"
15#include "video_core/texture_cache/texture_cache_base.h" 16#include "video_core/texture_cache/texture_cache_base.h"
16 17
17namespace OpenGL { 18namespace OpenGL {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 3b87640b5..06c5fb867 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -21,6 +21,7 @@
21#include "video_core/renderer_vulkan/vk_texture_cache.h" 21#include "video_core/renderer_vulkan/vk_texture_cache.h"
22#include "video_core/texture_cache/formatter.h" 22#include "video_core/texture_cache/formatter.h"
23#include "video_core/texture_cache/samples_helper.h" 23#include "video_core/texture_cache/samples_helper.h"
24#include "video_core/texture_cache/util.h"
24#include "video_core/vulkan_common/vulkan_device.h" 25#include "video_core/vulkan_common/vulkan_device.h"
25#include "video_core/vulkan_common/vulkan_memory_allocator.h" 26#include "video_core/vulkan_common/vulkan_memory_allocator.h"
26#include "video_core/vulkan_common/vulkan_wrapper.h" 27#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 6d5a68bfe..b09c468e4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,11 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <compare>
8#include <span> 7#include <span>
9 8
10#include "shader_recompiler/shader_info.h" 9#include "shader_recompiler/shader_info.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/texture_cache_base.h" 12#include "video_core/texture_cache/texture_cache_base.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 81a878bb2..05850afd0 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -16,6 +16,7 @@
16#include "common/fs/fs.h" 16#include "common/fs/fs.h"
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18#include "shader_recompiler/environment.h" 18#include "shader_recompiler/environment.h"
19#include "video_core/engines/kepler_compute.h"
19#include "video_core/memory_manager.h" 20#include "video_core/memory_manager.h"
20#include "video_core/shader_environment.h" 21#include "video_core/shader_environment.h"
21#include "video_core/textures/texture.h" 22#include "video_core/textures/texture.h"
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 2079979db..6640e53d0 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -5,13 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <filesystem> 8#include <filesystem>
10#include <iosfwd> 9#include <iosfwd>
11#include <limits> 10#include <limits>
12#include <memory> 11#include <memory>
13#include <optional> 12#include <optional>
14#include <span> 13#include <span>
14#include <stop_token>
15#include <type_traits> 15#include <type_traits>
16#include <unordered_map> 16#include <unordered_map>
17#include <vector> 17#include <vector>
@@ -19,9 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/unique_function.h" 20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h" 21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25 23
26namespace Tegra { 24namespace Tegra {
27class Memorymanager; 25class Memorymanager;
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
index 6527e14c8..e751f26c7 100644
--- a/src/video_core/texture_cache/image_view_info.cpp
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -8,6 +8,7 @@
8#include "video_core/texture_cache/image_view_info.h" 8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache_base.h" 9#include "video_core/texture_cache/texture_cache_base.h"
10#include "video_core/texture_cache/types.h" 10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
11#include "video_core/textures/texture.h" 12#include "video_core/textures/texture.h"
12 13
13namespace VideoCommon { 14namespace VideoCommon {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 24b809242..329df2e49 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -4,10 +4,15 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_set>
8
7#include "common/alignment.h" 9#include "common/alignment.h"
8#include "video_core/dirty_flags.h" 10#include "video_core/dirty_flags.h"
11#include "video_core/engines/kepler_compute.h"
12#include "video_core/texture_cache/image_view_base.h"
9#include "video_core/texture_cache/samples_helper.h" 13#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/texture_cache_base.h" 14#include "video_core/texture_cache/texture_cache_base.h"
15#include "video_core/texture_cache/util.h"
11 16
12namespace VideoCommon { 17namespace VideoCommon {
13 18
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index d7528ed24..2d1893c1c 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -4,13 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <mutex> 7#include <mutex>
9#include <span> 8#include <span>
10#include <type_traits> 9#include <type_traits>
11#include <unordered_map> 10#include <unordered_map>
12#include <unordered_set>
13#include <vector> 11#include <vector>
12#include <queue>
14 13
15#include "common/common_types.h" 14#include "common/common_types.h"
16#include "common/literals.h" 15#include "common/literals.h"
@@ -18,10 +17,6 @@
18#include "video_core/compatible_formats.h" 17#include "video_core/compatible_formats.h"
19#include "video_core/delayed_destruction_ring.h" 18#include "video_core/delayed_destruction_ring.h"
20#include "video_core/engines/fermi_2d.h" 19#include "video_core/engines/fermi_2d.h"
21#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h"
24#include "video_core/rasterizer_interface.h"
25#include "video_core/surface.h" 20#include "video_core/surface.h"
26#include "video_core/texture_cache/descriptor_table.h" 21#include "video_core/texture_cache/descriptor_table.h"
27#include "video_core/texture_cache/image_base.h" 22#include "video_core/texture_cache/image_base.h"
@@ -30,7 +25,6 @@
30#include "video_core/texture_cache/render_targets.h" 25#include "video_core/texture_cache/render_targets.h"
31#include "video_core/texture_cache/slot_vector.h" 26#include "video_core/texture_cache/slot_vector.h"
32#include "video_core/texture_cache/types.h" 27#include "video_core/texture_cache/types.h"
33#include "video_core/texture_cache/util.h"
34#include "video_core/textures/texture.h" 28#include "video_core/textures/texture.h"
35 29
36namespace VideoCommon { 30namespace VideoCommon {