summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp34
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h14
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp34
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp2
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp2
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h103
-rw-r--r--src/video_core/control/channel_state.cpp44
-rw-r--r--src/video_core/control/channel_state.h69
-rw-r--r--src/video_core/control/channel_state_cache.cpp5
-rw-r--r--src/video_core/control/channel_state_cache.h68
-rw-r--r--src/video_core/control/channel_state_cache.inc64
-rw-r--r--src/video_core/control/scheduler.cpp31
-rw-r--r--src/video_core/control/scheduler.h38
-rw-r--r--src/video_core/dma_pusher.cpp23
-rw-r--r--src/video_core/dma_pusher.h13
-rw-r--r--src/video_core/engines/puller.cpp297
-rw-r--r--src/video_core/engines/puller.h179
-rw-r--r--src/video_core/fence_manager.h28
-rw-r--r--src/video_core/gpu.cpp468
-rw-r--r--src/video_core/gpu.h55
-rw-r--r--src/video_core/gpu_thread.cpp14
-rw-r--r--src/video_core/gpu_thread.h12
-rw-r--r--src/video_core/memory_manager.cpp5
-rw-r--r--src/video_core/query_cache.h18
-rw-r--r--src/video_core/rasterizer_interface.h9
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h4
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h9
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp87
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h22
-rw-r--r--src/video_core/shader_cache.cpp33
-rw-r--r--src/video_core/shader_cache.h15
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h209
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h73
50 files changed, 1461 insertions, 809 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index b1c683511..9946ce624 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,13 +10,17 @@
10#include "core/hle/service/nvdrv/core/container.h" 10#include "core/hle/service/nvdrv/core/container.h"
11#include "core/hle/service/nvdrv/core/nvmap.h" 11#include "core/hle/service/nvdrv/core/nvmap.h"
12#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 12#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
13#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
14#include "core/hle/service/nvdrv/nvdrv.h"
15#include "video_core/control/channel_state.h"
13#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
15 18
16namespace Service::Nvidia::Devices { 19namespace Service::Nvidia::Devices {
17 20
18nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core) 21nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
19 : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {} 22 : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
23 gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
20nvhost_as_gpu::~nvhost_as_gpu() = default; 24nvhost_as_gpu::~nvhost_as_gpu() = default;
21 25
22NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 26NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
102 106
103 const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; 107 const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
104 if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { 108 if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
105 params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size); 109 params.offset = *(gmmu->AllocateFixed(params.offset, size));
106 } else { 110 } else {
107 params.offset = system.GPU().MemoryManager().Allocate(size, params.align); 111 params.offset = gmmu->Allocate(size, params.align);
108 } 112 }
109 113
110 auto result = NvResult::Success; 114 auto result = NvResult::Success;
@@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
124 LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, 128 LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
125 params.pages, params.page_size); 129 params.pages, params.page_size);
126 130
127 system.GPU().MemoryManager().Unmap(params.offset, 131 gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
128 static_cast<std::size_t>(params.pages) * params.page_size);
129 132
130 std::memcpy(output.data(), &params, output.size()); 133 std::memcpy(output.data(), &params, output.size());
131 return NvResult::Success; 134 return NvResult::Success;
@@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
148 // If nvmap handle is null, we should unmap instead. 151 // If nvmap handle is null, we should unmap instead.
149 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; 152 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
150 const auto size{static_cast<u64>(entry.pages) << 0x10}; 153 const auto size{static_cast<u64>(entry.pages) << 0x10};
151 system.GPU().MemoryManager().Unmap(offset, size); 154 gmmu->Unmap(offset, size);
152 continue; 155 continue;
153 } 156 }
154 157
@@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
162 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; 165 const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
163 const auto size{static_cast<u64>(entry.pages) << 0x10}; 166 const auto size{static_cast<u64>(entry.pages) << 0x10};
164 const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; 167 const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
165 const auto addr{ 168 const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
166 system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
167 169
168 if (!addr) { 170 if (!addr) {
169 LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); 171 LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
@@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
186 params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, 188 params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
187 params.offset); 189 params.offset);
188 190
189 auto& gpu = system.GPU();
190 if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { 191 if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
191 if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { 192 if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
192 const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; 193 const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
193 const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; 194 const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
194 195
195 if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) { 196 if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
196 LOG_CRITICAL(Service_NVDRV, 197 LOG_CRITICAL(Service_NVDRV,
197 "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " 198 "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
198 "mapping_size = {}, offset={}", 199 "mapping_size = {}, offset={}",
@@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
238 239
239 const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; 240 const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
240 if (is_alloc) { 241 if (is_alloc) {
241 params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size); 242 params.offset = gmmu->MapAllocate(physical_address, size, page_size);
242 } else { 243 } else {
243 params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size); 244 params.offset = gmmu->Map(physical_address, params.offset, size);
244 } 245 }
245 246
246 auto result = NvResult::Success; 247 auto result = NvResult::Success;
@@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
262 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); 263 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
263 264
264 if (const auto size{RemoveBufferMap(params.offset)}; size) { 265 if (const auto size{RemoveBufferMap(params.offset)}; size) {
265 system.GPU().MemoryManager().Unmap(params.offset, *size); 266 gmmu->Unmap(params.offset, *size);
266 } else { 267 } else {
267 LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); 268 LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
268 } 269 }
@@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
274NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) { 275NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
275 IoctlBindChannel params{}; 276 IoctlBindChannel params{};
276 std::memcpy(&params, input.data(), input.size()); 277 std::memcpy(&params, input.data(), input.size());
277 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd); 278 LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
278 279
279 channel = params.fd; 280 auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
281 gpu_channel_device->channel_state->memory_manager = gmmu;
280 return NvResult::Success; 282 return NvResult::Success;
281} 283}
282 284
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 67d2f1e87..4ecae3caf 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -13,6 +13,14 @@
13#include "common/swap.h" 13#include "common/swap.h"
14#include "core/hle/service/nvdrv/devices/nvdevice.h" 14#include "core/hle/service/nvdrv/devices/nvdevice.h"
15 15
16namespace Tegra {
17class MemoryManager;
18} // namespace Tegra
19
20namespace Service::Nvidia {
21class Module;
22}
23
16namespace Service::Nvidia::NvCore { 24namespace Service::Nvidia::NvCore {
17class Container; 25class Container;
18class NvMap; 26class NvMap;
@@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
34 42
35class nvhost_as_gpu final : public nvdevice { 43class nvhost_as_gpu final : public nvdevice {
36public: 44public:
37 explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core); 45 explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
38 ~nvhost_as_gpu() override; 46 ~nvhost_as_gpu() override;
39 47
40 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 48 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -187,9 +195,13 @@ private:
187 void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); 195 void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
188 std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); 196 std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
189 197
198 Module& module;
199
190 NvCore::Container& container; 200 NvCore::Container& container;
191 NvCore::NvMap& nvmap; 201 NvCore::NvMap& nvmap;
192 202
203 std::shared_ptr<Tegra::MemoryManager> gmmu;
204
193 // This is expected to be ordered, therefore we must use a map, not unordered_map 205 // This is expected to be ordered, therefore we must use a map, not unordered_map
194 std::map<GPUVAddr, BufferMap> buffer_mappings; 206 std::map<GPUVAddr, BufferMap> buffer_mappings;
195}; 207};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index cb54ee5a4..38d45cb79 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -11,12 +11,14 @@
11#include "core/hle/service/nvdrv/devices/nvhost_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
12#include "core/hle/service/nvdrv/nvdrv.h" 12#include "core/hle/service/nvdrv/nvdrv.h"
13#include "core/memory.h" 13#include "core/memory.h"
14#include "video_core/control/channel_state.h"
15#include "video_core/engines/puller.h"
14#include "video_core/gpu.h" 16#include "video_core/gpu.h"
15 17
16namespace Service::Nvidia::Devices { 18namespace Service::Nvidia::Devices {
17namespace { 19namespace {
18Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) { 20Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
19 Tegra::GPU::FenceAction result{}; 21 Tegra::Engines::Puller::FenceAction result{};
20 result.op.Assign(op); 22 result.op.Assign(op);
21 result.syncpoint_id.Assign(syncpoint_id); 23 result.syncpoint_id.Assign(syncpoint_id);
22 return {result.raw}; 24 return {result.raw};
@@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
26nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, 28nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
27 NvCore::Container& core_) 29 NvCore::Container& core_)
28 : nvdevice{system_}, events_interface{events_interface_}, core{core_}, 30 : nvdevice{system_}, events_interface{events_interface_}, core{core_},
29 syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} { 31 syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
32 channel_state{system.GPU().AllocateChannel()} {
30 channel_fence.id = syncpoint_manager.AllocateSyncpoint(); 33 channel_fence.id = syncpoint_manager.AllocateSyncpoint();
31 channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id); 34 channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
32 sm_exception_breakpoint_int_report_event = 35 sm_exception_breakpoint_int_report_event =
@@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
180 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, 183 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
181 params.unk3); 184 params.unk3);
182 185
186 if (channel_state->initiated) {
187 LOG_CRITICAL(Service_NVDRV, "Already allocated!");
188 return NvResult::AlreadyAllocated;
189 }
190
191 system.GPU().InitChannel(*channel_state);
183 channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); 192 channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
184 193
185 params.fence_out = channel_fence; 194 params.fence_out = channel_fence;
@@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
206 {fence.value}, 215 {fence.value},
207 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 216 Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
208 Tegra::SubmissionMode::Increasing), 217 Tegra::SubmissionMode::Increasing),
209 BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id), 218 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
210 }; 219 };
211} 220}
212 221
@@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
220 for (u32 count = 0; count < add_increment; ++count) { 229 for (u32 count = 0; count < add_increment; ++count) {
221 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, 230 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
222 Tegra::SubmissionMode::Increasing)); 231 Tegra::SubmissionMode::Increasing));
223 result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id)); 232 result.emplace_back(
233 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
224 } 234 }
225 235
226 return result; 236 return result;
@@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
247 257
248 auto& gpu = system.GPU(); 258 auto& gpu = system.GPU();
249 259
260 const auto bind_id = channel_state->bind_id;
261
250 params.fence_out.id = channel_fence.id; 262 params.fence_out.id = channel_fence.id;
251 263
252 if (params.flags.add_wait.Value() && 264 if (params.flags.add_wait.Value() &&
253 !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { 265 !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
254 gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); 266 gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
255 } 267 }
256 268
257 if (params.flags.add_increment.Value() || params.flags.increment.Value()) { 269 if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
@@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
262 params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); 274 params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
263 } 275 }
264 276
265 gpu.PushGPUEntries(std::move(entries)); 277 gpu.PushGPUEntries(bind_id, std::move(entries));
266 278
267 if (params.flags.add_increment.Value()) { 279 if (params.flags.add_increment.Value()) {
268 if (params.flags.suppress_wfi) { 280 if (params.flags.suppress_wfi) {
269 gpu.PushGPUEntries(Tegra::CommandList{ 281 gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
270 BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); 282 params.fence_out, params.AddIncrementValue())});
271 } else { 283 } else {
272 gpu.PushGPUEntries(Tegra::CommandList{ 284 gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
273 BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); 285 params.fence_out, params.AddIncrementValue())});
274 } 286 }
275 } 287 }
276 288
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 440c0c42d..3a65ed06d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -13,6 +13,12 @@
13#include "core/hle/service/nvdrv/nvdata.h" 13#include "core/hle/service/nvdrv/nvdata.h"
14#include "video_core/dma_pusher.h" 14#include "video_core/dma_pusher.h"
15 15
16namespace Tegra {
17namespace Control {
18struct ChannelState;
19}
20} // namespace Tegra
21
16namespace Service::Nvidia { 22namespace Service::Nvidia {
17 23
18namespace NvCore { 24namespace NvCore {
@@ -26,6 +32,7 @@ class EventInterface;
26 32
27namespace Service::Nvidia::Devices { 33namespace Service::Nvidia::Devices {
28 34
35class nvhost_as_gpu;
29class nvmap; 36class nvmap;
30class nvhost_gpu final : public nvdevice { 37class nvhost_gpu final : public nvdevice {
31public: 38public:
@@ -46,6 +53,7 @@ public:
46 Kernel::KEvent* QueryEvent(u32 event_id) override; 53 Kernel::KEvent* QueryEvent(u32 event_id) override;
47 54
48private: 55private:
56 friend class nvhost_as_gpu;
49 enum class CtxObjects : u32_le { 57 enum class CtxObjects : u32_le {
50 Ctx2D = 0x902D, 58 Ctx2D = 0x902D,
51 Ctx3D = 0xB197, 59 Ctx3D = 0xB197,
@@ -204,6 +212,7 @@ private:
204 NvCore::Container& core; 212 NvCore::Container& core;
205 NvCore::SyncpointManager& syncpoint_manager; 213 NvCore::SyncpointManager& syncpoint_manager;
206 NvCore::NvMap& nvmap; 214 NvCore::NvMap& nvmap;
215 std::shared_ptr<Tegra::Control::ChannelState> channel_state;
207 NvFence channel_fence; 216 NvFence channel_fence;
208 217
209 // Events 218 // Events
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 57f58055d..279997e81 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
168 IocFromIdParams params; 168 IocFromIdParams params;
169 std::memcpy(&params, input.data(), sizeof(params)); 169 std::memcpy(&params, input.data(), sizeof(params));
170 170
171 LOG_DEBUG(Service_NVDRV, "called, id:{}"); 171 LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
172 172
173 // Handles and IDs are always the same value in nvmap however IDs can be used globally given the 173 // Handles and IDs are always the same value in nvmap however IDs can be used globally given the
174 // right permissions. 174 // right permissions.
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 208de0b75..b39a4c6db 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -74,7 +74,7 @@ Module::Module(Core::System& system)
74 : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} { 74 : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} {
75 builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) { 75 builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
76 std::shared_ptr<Devices::nvdevice> device = 76 std::shared_ptr<Devices::nvdevice> device =
77 std::make_shared<Devices::nvhost_as_gpu>(system, container); 77 std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
78 return open_files.emplace(fd, device).first; 78 return open_files.emplace(fd, device).first;
79 }; 79 };
80 builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) { 80 builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 5b3808351..e216c51a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -35,6 +35,12 @@ add_library(video_core STATIC
35 command_classes/vic.h 35 command_classes/vic.h
36 compatible_formats.cpp 36 compatible_formats.cpp
37 compatible_formats.h 37 compatible_formats.h
38 control/channel_state.cpp
39 control/channel_state.h
40 control/channel_state_cache.cpp
41 control/channel_state_cache.h
42 control/scheduler.cpp
43 control/scheduler.h
38 delayed_destruction_ring.h 44 delayed_destruction_ring.h
39 dirty_flags.cpp 45 dirty_flags.cpp
40 dirty_flags.h 46 dirty_flags.h
@@ -54,6 +60,8 @@ add_library(video_core STATIC
54 engines/maxwell_3d.h 60 engines/maxwell_3d.h
55 engines/maxwell_dma.cpp 61 engines/maxwell_dma.cpp
56 engines/maxwell_dma.h 62 engines/maxwell_dma.h
63 engines/puller.cpp
64 engines/puller.h
57 framebuffer_config.h 65 framebuffer_config.h
58 macro/macro.cpp 66 macro/macro.cpp
59 macro/macro.h 67 macro/macro.h
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f015dae56..6b6764d72 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@
5 5
6#include <algorithm> 6#include <algorithm>
7#include <array> 7#include <array>
8#include <deque>
9#include <memory> 8#include <memory>
10#include <mutex> 9#include <mutex>
11#include <numeric> 10#include <numeric>
@@ -23,6 +22,7 @@
23#include "common/settings.h" 22#include "common/settings.h"
24#include "core/memory.h" 23#include "core/memory.h"
25#include "video_core/buffer_cache/buffer_base.h" 24#include "video_core/buffer_cache/buffer_base.h"
25#include "video_core/control/channel_state_cache.h"
26#include "video_core/delayed_destruction_ring.h" 26#include "video_core/delayed_destruction_ring.h"
27#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
28#include "video_core/engines/kepler_compute.h" 28#include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
56using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; 56using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
57 57
58template <typename P> 58template <typename P>
59class BufferCache { 59class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
60 60
61 // Page size for caching purposes. 61 // Page size for caching purposes.
62 // This is unrelated to the CPU page size and it can be changed as it seems optimal. 62 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
116 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); 116 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
117 117
118 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 118 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
119 Tegra::Engines::Maxwell3D& maxwell3d_, 119 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
120 Tegra::Engines::KeplerCompute& kepler_compute_,
121 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
122 Runtime& runtime_);
123 120
124 void TickFrame(); 121 void TickFrame();
125 122
@@ -367,9 +364,6 @@ private:
367 void ClearDownload(IntervalType subtract_interval); 364 void ClearDownload(IntervalType subtract_interval);
368 365
369 VideoCore::RasterizerInterface& rasterizer; 366 VideoCore::RasterizerInterface& rasterizer;
370 Tegra::Engines::Maxwell3D& maxwell3d;
371 Tegra::Engines::KeplerCompute& kepler_compute;
372 Tegra::MemoryManager& gpu_memory;
373 Core::Memory::Memory& cpu_memory; 367 Core::Memory::Memory& cpu_memory;
374 368
375 SlotVector<Buffer> slot_buffers; 369 SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:
444 438
445template <class P> 439template <class P>
446BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 440BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
447 Tegra::Engines::Maxwell3D& maxwell3d_, 441 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
448 Tegra::Engines::KeplerCompute& kepler_compute_, 442 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
449 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
450 Runtime& runtime_)
451 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
452 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
453 // Ensure the first slot is used for the null buffer 443 // Ensure the first slot is used for the null buffer
454 void(slot_buffers.insert(runtime, NullBufferParams{})); 444 void(slot_buffers.insert(runtime, NullBufferParams{}));
455 common_ranges.clear(); 445 common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
552 542
553template <class P> 543template <class P>
554bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 544bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
555 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); 545 const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
556 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); 546 const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
557 if (!cpu_src_address || !cpu_dest_address) { 547 if (!cpu_src_address || !cpu_dest_address) {
558 return false; 548 return false;
559 } 549 }
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
611 601
612template <class P> 602template <class P>
613bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { 603bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
614 const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); 604 const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
615 if (!cpu_dst_address) { 605 if (!cpu_dst_address) {
616 return false; 606 return false;
617 } 607 }
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
635template <class P> 625template <class P>
636void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 626void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
637 u32 size) { 627 u32 size) {
638 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 628 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
639 const Binding binding{ 629 const Binding binding{
640 .cpu_addr = *cpu_addr, 630 .cpu_addr = *cpu_addr,
641 .size = size, 631 .size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
673 if (is_indexed) { 663 if (is_indexed) {
674 BindHostIndexBuffer(); 664 BindHostIndexBuffer();
675 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 665 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
676 const auto& regs = maxwell3d.regs; 666 const auto& regs = maxwell3d->regs;
677 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { 667 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
678 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); 668 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
679 } 669 }
@@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
733 enabled_storage_buffers[stage] |= 1U << ssbo_index; 723 enabled_storage_buffers[stage] |= 1U << ssbo_index;
734 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; 724 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
735 725
736 const auto& cbufs = maxwell3d.state.shader_stages[stage]; 726 const auto& cbufs = maxwell3d->state.shader_stages[stage];
737 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; 727 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
738 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); 728 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
739} 729}
@@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
770 enabled_compute_storage_buffers |= 1U << ssbo_index; 760 enabled_compute_storage_buffers |= 1U << ssbo_index;
771 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; 761 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
772 762
773 const auto& launch_desc = kepler_compute.launch_description; 763 const auto& launch_desc = kepler_compute->launch_description;
774 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); 764 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
775 765
776 const auto& cbufs = launch_desc.const_buffer_config; 766 const auto& cbufs = launch_desc.const_buffer_config;
@@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
991 const u32 size = index_buffer.size; 981 const u32 size = index_buffer.size;
992 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 982 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
993 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 983 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
994 const u32 new_offset = offset + maxwell3d.regs.index_array.first * 984 const u32 new_offset = offset + maxwell3d->regs.index_array.first *
995 maxwell3d.regs.index_array.FormatSizeInBytes(); 985 maxwell3d->regs.index_array.FormatSizeInBytes();
996 runtime.BindIndexBuffer(buffer, new_offset, size); 986 runtime.BindIndexBuffer(buffer, new_offset, size);
997 } else { 987 } else {
998 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, 988 runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
999 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, 989 maxwell3d->regs.index_array.first,
1000 buffer, offset, size); 990 maxwell3d->regs.index_array.count, buffer, offset, size);
1001 } 991 }
1002} 992}
1003 993
1004template <class P> 994template <class P>
1005void BufferCache<P>::BindHostVertexBuffers() { 995void BufferCache<P>::BindHostVertexBuffers() {
1006 auto& flags = maxwell3d.dirty.flags; 996 auto& flags = maxwell3d->dirty.flags;
1007 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 997 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
1008 const Binding& binding = vertex_buffers[index]; 998 const Binding& binding = vertex_buffers[index];
1009 Buffer& buffer = slot_buffers[binding.buffer_id]; 999 Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
1014 } 1004 }
1015 flags[Dirty::VertexBuffer0 + index] = false; 1005 flags[Dirty::VertexBuffer0 + index] = false;
1016 1006
1017 const u32 stride = maxwell3d.regs.vertex_array[index].stride; 1007 const u32 stride = maxwell3d->regs.vertex_array[index].stride;
1018 const u32 offset = buffer.Offset(binding.cpu_addr); 1008 const u32 offset = buffer.Offset(binding.cpu_addr);
1019 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); 1009 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
1020 } 1010 }
@@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1154 1144
1155template <class P> 1145template <class P>
1156void BufferCache<P>::BindHostTransformFeedbackBuffers() { 1146void BufferCache<P>::BindHostTransformFeedbackBuffers() {
1157 if (maxwell3d.regs.tfb_enabled == 0) { 1147 if (maxwell3d->regs.tfb_enabled == 0) {
1158 return; 1148 return;
1159 } 1149 }
1160 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1150 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1262,8 +1252,8 @@ template <class P>
1262void BufferCache<P>::UpdateIndexBuffer() { 1252void BufferCache<P>::UpdateIndexBuffer() {
1263 // We have to check for the dirty flags and index count 1253 // We have to check for the dirty flags and index count
1264 // The index count is currently changed without updating the dirty flags 1254 // The index count is currently changed without updating the dirty flags
1265 const auto& index_array = maxwell3d.regs.index_array; 1255 const auto& index_array = maxwell3d->regs.index_array;
1266 auto& flags = maxwell3d.dirty.flags; 1256 auto& flags = maxwell3d->dirty.flags;
1267 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { 1257 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
1268 return; 1258 return;
1269 } 1259 }
@@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1272 1262
1273 const GPUVAddr gpu_addr_begin = index_array.StartAddress(); 1263 const GPUVAddr gpu_addr_begin = index_array.StartAddress();
1274 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1264 const GPUVAddr gpu_addr_end = index_array.EndAddress();
1275 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1265 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1276 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1266 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1277 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); 1267 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
1278 const u32 size = std::min(address_size, draw_size); 1268 const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
1289 1279
1290template <class P> 1280template <class P>
1291void BufferCache<P>::UpdateVertexBuffers() { 1281void BufferCache<P>::UpdateVertexBuffers() {
1292 auto& flags = maxwell3d.dirty.flags; 1282 auto& flags = maxwell3d->dirty.flags;
1293 if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { 1283 if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
1294 return; 1284 return;
1295 } 1285 }
1296 flags[Dirty::VertexBuffers] = false; 1286 flags[Dirty::VertexBuffers] = false;
@@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {
1302 1292
1303template <class P> 1293template <class P>
1304void BufferCache<P>::UpdateVertexBuffer(u32 index) { 1294void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1305 if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { 1295 if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
1306 return; 1296 return;
1307 } 1297 }
1308 const auto& array = maxwell3d.regs.vertex_array[index]; 1298 const auto& array = maxwell3d->regs.vertex_array[index];
1309 const auto& limit = maxwell3d.regs.vertex_array_limit[index]; 1299 const auto& limit = maxwell3d->regs.vertex_array_limit[index];
1310 const GPUVAddr gpu_addr_begin = array.StartAddress(); 1300 const GPUVAddr gpu_addr_begin = array.StartAddress();
1311 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; 1301 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
1312 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1302 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1313 u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1303 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1314 if (address_size >= 64_MiB) {
1315 // Reported vertex buffer size is very large, cap to mapped buffer size
1316 GPUVAddr submapped_addr_end = gpu_addr_begin;
1317
1318 const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
1319 if (ranges.size() > 0) {
1320 const auto& [addr, size] = *ranges.begin();
1321 submapped_addr_end = addr + size;
1322 }
1323
1324 address_size =
1325 std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
1326 }
1327 const u32 size = address_size; // TODO: Analyze stride and number of vertices 1304 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1328 if (array.enable == 0 || size == 0 || !cpu_addr) { 1305 if (array.enable == 0 || size == 0 || !cpu_addr) {
1329 vertex_buffers[index] = NULL_BINDING; 1306 vertex_buffers[index] = NULL_BINDING;
@@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1382 1359
1383template <class P> 1360template <class P>
1384void BufferCache<P>::UpdateTransformFeedbackBuffers() { 1361void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1385 if (maxwell3d.regs.tfb_enabled == 0) { 1362 if (maxwell3d->regs.tfb_enabled == 0) {
1386 return; 1363 return;
1387 } 1364 }
1388 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1365 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1392 1369
1393template <class P> 1370template <class P>
1394void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { 1371void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1395 const auto& binding = maxwell3d.regs.tfb_bindings[index]; 1372 const auto& binding = maxwell3d->regs.tfb_bindings[index];
1396 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; 1373 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
1397 const u32 size = binding.buffer_size; 1374 const u32 size = binding.buffer_size;
1398 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1375 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1399 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { 1376 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
1400 transform_feedback_buffers[index] = NULL_BINDING; 1377 transform_feedback_buffers[index] = NULL_BINDING;
1401 return; 1378 return;
@@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1414 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 1391 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1415 Binding& binding = compute_uniform_buffers[index]; 1392 Binding& binding = compute_uniform_buffers[index];
1416 binding = NULL_BINDING; 1393 binding = NULL_BINDING;
1417 const auto& launch_desc = kepler_compute.launch_description; 1394 const auto& launch_desc = kepler_compute->launch_description;
1418 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1395 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
1419 const auto& cbuf = launch_desc.const_buffer_config[index]; 1396 const auto& cbuf = launch_desc.const_buffer_config[index];
1420 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); 1397 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
1421 if (cpu_addr) { 1398 if (cpu_addr) {
1422 binding.cpu_addr = *cpu_addr; 1399 binding.cpu_addr = *cpu_addr;
1423 binding.size = cbuf.size; 1400 binding.size = cbuf.size;
@@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
1831 dirty_uniform_buffers.fill(~u32{0}); 1808 dirty_uniform_buffers.fill(~u32{0});
1832 uniform_buffer_binding_sizes.fill({}); 1809 uniform_buffer_binding_sizes.fill({});
1833 } 1810 }
1834 auto& flags = maxwell3d.dirty.flags; 1811 auto& flags = maxwell3d->dirty.flags;
1835 flags[Dirty::IndexBuffer] = true; 1812 flags[Dirty::IndexBuffer] = true;
1836 flags[Dirty::VertexBuffers] = true; 1813 flags[Dirty::VertexBuffers] = true;
1837 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 1814 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {
1842 1819
1843template <class P> 1820template <class P>
1844typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { 1821typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
1845 const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); 1822 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1846 const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); 1823 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1847 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1824 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1848 if (!cpu_addr || size == 0) { 1825 if (!cpu_addr || size == 0) {
1849 return NULL_BINDING; 1826 return NULL_BINDING;
1850 } 1827 }
@@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1859template <class P> 1836template <class P>
1860typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( 1837typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
1861 GPUVAddr gpu_addr, u32 size, PixelFormat format) { 1838 GPUVAddr gpu_addr, u32 size, PixelFormat format) {
1862 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1839 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1863 TextureBufferBinding binding; 1840 TextureBufferBinding binding;
1864 if (!cpu_addr || size == 0) { 1841 if (!cpu_addr || size == 0) {
1865 binding.cpu_addr = 0; 1842 binding.cpu_addr = 0;
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
new file mode 100644
index 000000000..67803fe94
--- /dev/null
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/control/channel_state.h"
7#include "video_core/dma_pusher.h"
8#include "video_core/engines/fermi_2d.h"
9#include "video_core/engines/kepler_compute.h"
10#include "video_core/engines/kepler_memory.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/maxwell_dma.h"
13#include "video_core/engines/puller.h"
14#include "video_core/memory_manager.h"
15
16namespace Tegra::Control {
17
18ChannelState::ChannelState(s32 bind_id_) {
19 bind_id = bind_id_;
20 initiated = false;
21}
22
23void ChannelState::Init(Core::System& system, GPU& gpu) {
24 ASSERT(memory_manager);
25 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
26 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
27 fermi_2d = std::make_unique<Engines::Fermi2D>();
28 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
29 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
30 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
31 initiated = true;
32}
33
34void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
35 dma_pusher->BindRasterizer(rasterizer);
36 memory_manager->BindRasterizer(rasterizer);
37 maxwell_3d->BindRasterizer(rasterizer);
38 fermi_2d->BindRasterizer(rasterizer);
39 kepler_memory->BindRasterizer(rasterizer);
40 kepler_compute->BindRasterizer(rasterizer);
41 maxwell_dma->BindRasterizer(rasterizer);
42}
43
44} // namespace Tegra::Control
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
new file mode 100644
index 000000000..82808a6b8
--- /dev/null
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9#include "common/common_types.h"
10
11namespace Core {
12class System;
13}
14
15namespace VideoCore {
16class RasterizerInterface;
17}
18
19namespace Tegra {
20
21class GPU;
22
23namespace Engines {
24class Puller;
25class Fermi2D;
26class Maxwell3D;
27class MaxwellDMA;
28class KeplerCompute;
29class KeplerMemory;
30} // namespace Engines
31
32class MemoryManager;
33class DmaPusher;
34
35namespace Control {
36
37struct ChannelState {
38 ChannelState(s32 bind_id);
39 ChannelState(const ChannelState& state) = delete;
40 ChannelState& operator=(const ChannelState&) = delete;
41 ChannelState(ChannelState&& other) noexcept = default;
42 ChannelState& operator=(ChannelState&& other) noexcept = default;
43
44 void Init(Core::System& system, GPU& gpu);
45
46 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
47
48 s32 bind_id = -1;
49 /// 3D engine
50 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
51 /// 2D engine
52 std::unique_ptr<Engines::Fermi2D> fermi_2d;
53 /// Compute engine
54 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
55 /// DMA engine
56 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
57 /// Inline memory engine
58 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
59
60 std::shared_ptr<MemoryManager> memory_manager;
61
62 std::unique_ptr<DmaPusher> dma_pusher;
63
64 bool initiated{};
65};
66
67} // namespace Control
68
69} // namespace Tegra
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
new file mode 100644
index 000000000..f72a97b2f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,5 @@
1#include "video_core/control/channel_state_cache.inc"
2
3namespace VideoCommon {
4template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
5}
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
new file mode 100644
index 000000000..c8298c003
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,68 @@
1#pragma once
2
3#include <deque>
4#include <limits>
5#include <unordered_map>
6
7#include "common/common_types.h"
8
9namespace Tegra {
10
11namespace Engines {
12class Maxwell3D;
13class KeplerCompute;
14} // namespace Engines
15
16class MemoryManager;
17
18namespace Control {
19struct ChannelState;
20}
21
22} // namespace Tegra
23
24namespace VideoCommon {
25
26class ChannelInfo {
27public:
28 ChannelInfo() = delete;
29 ChannelInfo(Tegra::Control::ChannelState& state);
30 ChannelInfo(const ChannelInfo& state) = delete;
31 ChannelInfo& operator=(const ChannelInfo&) = delete;
32 ChannelInfo(ChannelInfo&& other) = default;
33 ChannelInfo& operator=(ChannelInfo&& other) = default;
34
35 Tegra::Engines::Maxwell3D& maxwell3d;
36 Tegra::Engines::KeplerCompute& kepler_compute;
37 Tegra::MemoryManager& gpu_memory;
38};
39
40template <class P>
41class ChannelSetupCaches {
42public:
43 /// Operations for seting the channel of execution.
44
45 /// Create channel state.
46 void CreateChannel(Tegra::Control::ChannelState& channel);
47
48 /// Bind a channel for execution.
49 void BindToChannel(s32 id);
50
51 /// Erase channel's state.
52 void EraseChannel(s32 id);
53
54protected:
55 static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
56
57 std::deque<P> channel_storage;
58 std::deque<size_t> free_channel_ids;
59 std::unordered_map<s32, size_t> channel_map;
60
61 P* channel_state;
62 size_t current_channel_id{UNSET_CHANNEL};
63 Tegra::Engines::Maxwell3D* maxwell3d;
64 Tegra::Engines::KeplerCompute* kepler_compute;
65 Tegra::MemoryManager* gpu_memory;
66};
67
68} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
new file mode 100644
index 000000000..3eb73af9f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,64 @@
1#include "video_core/control/channel_state.h"
2#include "video_core/control/channel_state_cache.h"
3#include "video_core/engines/kepler_compute.h"
4#include "video_core/engines/maxwell_3d.h"
5#include "video_core/memory_manager.h"
6
7namespace VideoCommon {
8
9ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
10 : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
11 gpu_memory{*channel_state.memory_manager} {}
12
13template <class P>
14void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
15 ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
16 auto new_id = [this, &channel]() {
17 if (!free_channel_ids.empty()) {
18 auto id = free_channel_ids.front();
19 free_channel_ids.pop_front();
20 new (&channel_storage[id]) ChannelInfo(channel);
21 return id;
22 }
23 channel_storage.emplace_back(channel);
24 return channel_storage.size() - 1;
25 }();
26 channel_map.emplace(channel.bind_id, new_id);
27 if (current_channel_id != UNSET_CHANNEL) {
28 channel_state = &channel_storage[current_channel_id];
29 }
30}
31
32/// Bind a channel for execution.
33template <class P>
34void ChannelSetupCaches<P>::BindToChannel(s32 id) {
35 auto it = channel_map.find(id);
36 ASSERT(it != channel_map.end() && id >= 0);
37 current_channel_id = it->second;
38 channel_state = &channel_storage[current_channel_id];
39 maxwell3d = &channel_state->maxwell3d;
40 kepler_compute = &channel_state->kepler_compute;
41 gpu_memory = &channel_state->gpu_memory;
42}
43
44/// Erase channel's channel_state.
45template <class P>
46void ChannelSetupCaches<P>::EraseChannel(s32 id) {
47 const auto it = channel_map.find(id);
48 ASSERT(it != channel_map.end() && id >= 0);
49 const auto this_id = it->second;
50 free_channel_ids.push_back(this_id);
51 channel_map.erase(it);
52 if (this_id == current_channel_id) {
53 current_channel_id = UNSET_CHANNEL;
54 channel_state = nullptr;
55 maxwell3d = nullptr;
56 kepler_compute = nullptr;
57 gpu_memory = nullptr;
58 } else if (current_channel_id != UNSET_CHANNEL) {
59 channel_state = &channel_storage[current_channel_id];
60 }
61}
62
63
64} // namespace VideoCommon
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
new file mode 100644
index 000000000..e1abcb188
--- /dev/null
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,31 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6
7#include "video_core/control/channel_state.h"
8#include "video_core/control/scheduler.h"
9#include "video_core/gpu.h"
10
11namespace Tegra::Control {
12Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
13
14Scheduler::~Scheduler() = default;
15
16void Scheduler::Push(s32 channel, CommandList&& entries) {
17 std::unique_lock<std::mutex> lk(scheduling_guard);
18 auto it = channels.find(channel);
19 auto channel_state = it->second;
20 gpu.BindChannel(channel_state->bind_id);
21 channel_state->dma_pusher->Push(std::move(entries));
22 channel_state->dma_pusher->DispatchCalls();
23}
24
25void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
26 s32 channel = new_channel->bind_id;
27 std::unique_lock<std::mutex> lk(scheduling_guard);
28 channels.emplace(channel, new_channel);
29}
30
31} // namespace Tegra::Control
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
new file mode 100644
index 000000000..802e9caff
--- /dev/null
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <mutex>
9#include <unordered_map>
10
11#include "video_core/dma_pusher.h"
12
13namespace Tegra {
14
15class GPU;
16
17namespace Control {
18
19struct ChannelState;
20
21class Scheduler {
22public:
23 Scheduler(GPU& gpu_);
24 ~Scheduler();
25
26 void Push(s32 channel, CommandList&& entries);
27
28 void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
29
30private:
31 std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
32 std::mutex scheduling_guard;
33 GPU& gpu;
34};
35
36} // namespace Control
37
38} // namespace Tegra
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 29b8582ab..b01f04d0c 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,10 @@
12 12
13namespace Tegra { 13namespace Tegra {
14 14
15DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {} 15DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
16 Control::ChannelState& channel_state_)
17 : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
18 *this, channel_state_} {}
16 19
17DmaPusher::~DmaPusher() = default; 20DmaPusher::~DmaPusher() = default;
18 21
@@ -76,11 +79,11 @@ bool DmaPusher::Step() {
76 // Push buffer non-empty, read a word 79 // Push buffer non-empty, read a word
77 command_headers.resize(command_list_header.size); 80 command_headers.resize(command_list_header.size);
78 if (Settings::IsGPULevelHigh()) { 81 if (Settings::IsGPULevelHigh()) {
79 gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), 82 memory_manager.ReadBlock(dma_get, command_headers.data(),
80 command_list_header.size * sizeof(u32)); 83 command_list_header.size * sizeof(u32));
81 } else { 84 } else {
82 gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), 85 memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
83 command_list_header.size * sizeof(u32)); 86 command_list_header.size * sizeof(u32));
84 } 87 }
85 } 88 }
86 for (std::size_t index = 0; index < command_headers.size();) { 89 for (std::size_t index = 0; index < command_headers.size();) {
@@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
154 157
155void DmaPusher::CallMethod(u32 argument) const { 158void DmaPusher::CallMethod(u32 argument) const {
156 if (dma_state.method < non_puller_methods) { 159 if (dma_state.method < non_puller_methods) {
157 gpu.CallMethod(GPU::MethodCall{ 160 puller.CallPullerMethod(Engines::Puller::MethodCall{
158 dma_state.method, 161 dma_state.method,
159 argument, 162 argument,
160 dma_state.subchannel, 163 dma_state.subchannel,
@@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {
168 171
169void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { 172void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
170 if (dma_state.method < non_puller_methods) { 173 if (dma_state.method < non_puller_methods) {
171 gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, 174 puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
172 dma_state.method_count); 175 dma_state.method_count);
173 } else { 176 } else {
174 subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, 177 subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
175 num_methods, dma_state.method_count); 178 num_methods, dma_state.method_count);
176 } 179 }
177} 180}
178 181
182void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
183 puller.BindRasterizer(rasterizer);
184}
185
179} // namespace Tegra 186} // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 872fd146a..fd7c936c4 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/engine_interface.h" 12#include "video_core/engines/engine_interface.h"
13#include "video_core/engines/puller.h"
13 14
14namespace Core { 15namespace Core {
15class System; 16class System;
@@ -17,7 +18,12 @@ class System;
17 18
18namespace Tegra { 19namespace Tegra {
19 20
21namespace Control {
22struct ChannelState;
23}
24
20class GPU; 25class GPU;
26class MemoryManager;
21 27
22enum class SubmissionMode : u32 { 28enum class SubmissionMode : u32 {
23 IncreasingOld = 0, 29 IncreasingOld = 0,
@@ -102,7 +108,8 @@ struct CommandList final {
102 */ 108 */
103class DmaPusher final { 109class DmaPusher final {
104public: 110public:
105 explicit DmaPusher(Core::System& system_, GPU& gpu_); 111 explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
112 Control::ChannelState& channel_state_);
106 ~DmaPusher(); 113 ~DmaPusher();
107 114
108 void Push(CommandList&& entries) { 115 void Push(CommandList&& entries) {
@@ -115,6 +122,8 @@ public:
115 subchannels[subchannel_id] = engine; 122 subchannels[subchannel_id] = engine;
116 } 123 }
117 124
125 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
126
118private: 127private:
119 static constexpr u32 non_puller_methods = 0x40; 128 static constexpr u32 non_puller_methods = 0x40;
120 static constexpr u32 max_subchannels = 8; 129 static constexpr u32 max_subchannels = 8;
@@ -148,6 +157,8 @@ private:
148 157
149 GPU& gpu; 158 GPU& gpu;
150 Core::System& system; 159 Core::System& system;
160 MemoryManager& memory_manager;
161 mutable Engines::Puller puller;
151}; 162};
152 163
153} // namespace Tegra 164} // namespace Tegra
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
new file mode 100644
index 000000000..37f2ced18
--- /dev/null
+++ b/src/video_core/engines/puller.cpp
@@ -0,0 +1,297 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "common/settings.h"
8#include "core/core.h"
9#include "video_core/control/channel_state.h"
10#include "video_core/dma_pusher.h"
11#include "video_core/engines/fermi_2d.h"
12#include "video_core/engines/kepler_compute.h"
13#include "video_core/engines/kepler_memory.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/engines/maxwell_dma.h"
16#include "video_core/engines/puller.h"
17#include "video_core/gpu.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_interface.h"
20
21namespace Tegra::Engines {
22
23Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
24 Control::ChannelState& channel_state_)
25 : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
26 channel_state_} {}
27
28Puller::~Puller() = default;
29
30void Puller::ProcessBindMethod(const MethodCall& method_call) {
31 // Bind the current subchannel to the desired engine id.
32 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
33 method_call.argument);
34 const auto engine_id = static_cast<EngineID>(method_call.argument);
35 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
36 switch (engine_id) {
37 case EngineID::FERMI_TWOD_A:
38 dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
39 break;
40 case EngineID::MAXWELL_B:
41 dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
42 break;
43 case EngineID::KEPLER_COMPUTE_B:
44 dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
45 break;
46 case EngineID::MAXWELL_DMA_COPY_A:
47 dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
48 break;
49 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
50 dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
51 break;
52 default:
53 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
54 }
55}
56
57void Puller::ProcessFenceActionMethod() {
58 switch (regs.fence_action.op) {
59 case Puller::FenceOperation::Acquire:
60 // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
61 // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
62 break;
63 case Puller::FenceOperation::Increment:
64 rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
65 break;
66 default:
67 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
68 }
69}
70
71void Puller::ProcessWaitForInterruptMethod() {
72 // TODO(bunnei) ImplementMe
73 LOG_WARNING(HW_GPU, "(STUBBED) called");
74}
75
76void Puller::ProcessSemaphoreTriggerMethod() {
77 const auto semaphoreOperationMask = 0xF;
78 const auto op =
79 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
80 if (op == GpuSemaphoreOperation::WriteLong) {
81 struct Block {
82 u32 sequence;
83 u32 zeros = 0;
84 u64 timestamp;
85 };
86
87 Block block{};
88 block.sequence = regs.semaphore_sequence;
89 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
90 // CoreTiming
91 block.timestamp = gpu.GetTicks();
92 memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
93 } else {
94 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
95 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
96 (op == GpuSemaphoreOperation::AcquireGequal &&
97 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
98 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
99 // Nothing to do in this case
100 } else {
101 regs.acquire_source = true;
102 regs.acquire_value = regs.semaphore_sequence;
103 if (op == GpuSemaphoreOperation::AcquireEqual) {
104 regs.acquire_active = true;
105 regs.acquire_mode = false;
106 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
107 regs.acquire_active = true;
108 regs.acquire_mode = true;
109 } else if (op == GpuSemaphoreOperation::AcquireMask) {
110 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
111 // semaphore_sequence, gives a non-0 result
112 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
113 } else {
114 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
115 }
116 }
117 }
118}
119
120void Puller::ProcessSemaphoreRelease() {
121 memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
122}
123
124void Puller::ProcessSemaphoreAcquire() {
125 const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
126 const auto value = regs.semaphore_acquire;
127 if (word != value) {
128 regs.acquire_active = true;
129 regs.acquire_value = value;
130 // TODO(kemathe73) figure out how to do the acquire_timeout
131 regs.acquire_mode = false;
132 regs.acquire_source = false;
133 }
134}
135
136/// Calls a GPU puller method.
137void Puller::CallPullerMethod(const MethodCall& method_call) {
138 regs.reg_array[method_call.method] = method_call.argument;
139 const auto method = static_cast<BufferMethods>(method_call.method);
140
141 switch (method) {
142 case BufferMethods::BindObject: {
143 ProcessBindMethod(method_call);
144 break;
145 }
146 case BufferMethods::Nop:
147 case BufferMethods::SemaphoreAddressHigh:
148 case BufferMethods::SemaphoreAddressLow:
149 case BufferMethods::SemaphoreSequence:
150 case BufferMethods::UnkCacheFlush:
151 case BufferMethods::WrcacheFlush:
152 case BufferMethods::FenceValue:
153 break;
154 case BufferMethods::RefCnt:
155 rasterizer->SignalReference();
156 break;
157 case BufferMethods::FenceAction:
158 ProcessFenceActionMethod();
159 break;
160 case BufferMethods::WaitForInterrupt:
161 ProcessWaitForInterruptMethod();
162 break;
163 case BufferMethods::SemaphoreTrigger: {
164 ProcessSemaphoreTriggerMethod();
165 break;
166 }
167 case BufferMethods::NotifyIntr: {
168 // TODO(Kmather73): Research and implement this method.
169 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
170 break;
171 }
172 case BufferMethods::Unk28: {
173 // TODO(Kmather73): Research and implement this method.
174 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
175 break;
176 }
177 case BufferMethods::SemaphoreAcquire: {
178 ProcessSemaphoreAcquire();
179 break;
180 }
181 case BufferMethods::SemaphoreRelease: {
182 ProcessSemaphoreRelease();
183 break;
184 }
185 case BufferMethods::Yield: {
186 // TODO(Kmather73): Research and implement this method.
187 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
188 break;
189 }
190 default:
191 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
192 break;
193 }
194}
195
196/// Calls a GPU engine method.
197void Puller::CallEngineMethod(const MethodCall& method_call) {
198 const EngineID engine = bound_engines[method_call.subchannel];
199
200 switch (engine) {
201 case EngineID::FERMI_TWOD_A:
202 channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
203 method_call.IsLastCall());
204 break;
205 case EngineID::MAXWELL_B:
206 channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
207 method_call.IsLastCall());
208 break;
209 case EngineID::KEPLER_COMPUTE_B:
210 channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
211 method_call.IsLastCall());
212 break;
213 case EngineID::MAXWELL_DMA_COPY_A:
214 channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
215 method_call.IsLastCall());
216 break;
217 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
218 channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
219 method_call.IsLastCall());
220 break;
221 default:
222 UNIMPLEMENTED_MSG("Unimplemented engine");
223 }
224}
225
226/// Calls a GPU engine multivalue method.
227void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
228 u32 methods_pending) {
229 const EngineID engine = bound_engines[subchannel];
230
231 switch (engine) {
232 case EngineID::FERMI_TWOD_A:
233 channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
234 break;
235 case EngineID::MAXWELL_B:
236 channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
237 break;
238 case EngineID::KEPLER_COMPUTE_B:
239 channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
240 break;
241 case EngineID::MAXWELL_DMA_COPY_A:
242 channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
243 break;
244 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
245 channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
246 break;
247 default:
248 UNIMPLEMENTED_MSG("Unimplemented engine");
249 }
250}
251
252/// Calls a GPU method.
253void Puller::CallMethod(const MethodCall& method_call) {
254 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
255 method_call.subchannel);
256
257 ASSERT(method_call.subchannel < bound_engines.size());
258
259 if (ExecuteMethodOnEngine(method_call.method)) {
260 CallEngineMethod(method_call);
261 } else {
262 CallPullerMethod(method_call);
263 }
264}
265
266/// Calls a GPU multivalue method.
267void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
268 u32 methods_pending) {
269 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
270
271 ASSERT(subchannel < bound_engines.size());
272
273 if (ExecuteMethodOnEngine(method)) {
274 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
275 } else {
276 for (std::size_t i = 0; i < amount; i++) {
277 CallPullerMethod(MethodCall{
278 method,
279 base_start[i],
280 subchannel,
281 methods_pending - static_cast<u32>(i),
282 });
283 }
284 }
285}
286
287void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
288 rasterizer = rasterizer_;
289}
290
291/// Determines where the method should be executed.
292[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
293 const auto buffer_method = static_cast<BufferMethods>(method);
294 return buffer_method >= BufferMethods::NonPullerMethods;
295}
296
297} // namespace Tegra::Engines
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
new file mode 100644
index 000000000..d948ec790
--- /dev/null
+++ b/src/video_core/engines/puller.h
@@ -0,0 +1,179 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <vector>
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
14
15namespace Core {
16class System;
17}
18
19namespace Tegra {
20class MemoryManager;
21class DmaPusher;
22
23enum class EngineID {
24 FERMI_TWOD_A = 0x902D, // 2D Engine
25 MAXWELL_B = 0xB197, // 3D Engine
26 KEPLER_COMPUTE_B = 0xB1C0,
27 KEPLER_INLINE_TO_MEMORY_B = 0xA140,
28 MAXWELL_DMA_COPY_A = 0xB0B5,
29};
30
31namespace Control {
32struct ChannelState;
33}
34} // namespace Tegra
35
36namespace VideoCore {
37class RasterizerInterface;
38}
39
40namespace Tegra::Engines {
41
42class Puller final {
43public:
44 struct MethodCall {
45 u32 method{};
46 u32 argument{};
47 u32 subchannel{};
48 u32 method_count{};
49
50 explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
51 : method(method_), argument(argument_), subchannel(subchannel_),
52 method_count(method_count_) {}
53
54 [[nodiscard]] bool IsLastCall() const {
55 return method_count <= 1;
56 }
57 };
58
59 enum class FenceOperation : u32 {
60 Acquire = 0,
61 Increment = 1,
62 };
63
64 union FenceAction {
65 u32 raw;
66 BitField<0, 1, FenceOperation> op;
67 BitField<8, 24, u32> syncpoint_id;
68 };
69
70 explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
71 Control::ChannelState& channel_state);
72 ~Puller();
73
74 void CallMethod(const MethodCall& method_call);
75
76 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
77 u32 methods_pending);
78
79 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
80
81 void CallPullerMethod(const MethodCall& method_call);
82
83 void CallEngineMethod(const MethodCall& method_call);
84
85 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
86 u32 methods_pending);
87
88private:
89 Tegra::GPU& gpu;
90
91 MemoryManager& memory_manager;
92 DmaPusher& dma_pusher;
93 Control::ChannelState& channel_state;
94 VideoCore::RasterizerInterface* rasterizer = nullptr;
95
96 static constexpr std::size_t NUM_REGS = 0x800;
97 struct Regs {
98 static constexpr size_t NUM_REGS = 0x40;
99
100 union {
101 struct {
102 INSERT_PADDING_WORDS_NOINIT(0x4);
103 struct {
104 u32 address_high;
105 u32 address_low;
106
107 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
108 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
109 address_low);
110 }
111 } semaphore_address;
112
113 u32 semaphore_sequence;
114 u32 semaphore_trigger;
115 INSERT_PADDING_WORDS_NOINIT(0xC);
116
117 // The pusher and the puller share the reference counter, the pusher only has read
118 // access
119 u32 reference_count;
120 INSERT_PADDING_WORDS_NOINIT(0x5);
121
122 u32 semaphore_acquire;
123 u32 semaphore_release;
124 u32 fence_value;
125 FenceAction fence_action;
126 INSERT_PADDING_WORDS_NOINIT(0xE2);
127
128 // Puller state
129 u32 acquire_mode;
130 u32 acquire_source;
131 u32 acquire_active;
132 u32 acquire_timeout;
133 u32 acquire_value;
134 };
135 std::array<u32, NUM_REGS> reg_array;
136 };
137 } regs{};
138
139 void ProcessBindMethod(const MethodCall& method_call);
140 void ProcessFenceActionMethod();
141 void ProcessSemaphoreAcquire();
142 void ProcessSemaphoreRelease();
143 void ProcessSemaphoreTriggerMethod();
144 void ProcessWaitForInterruptMethod();
145 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
146
147 /// Mapping of command subchannels to their bound engine ids
148 std::array<EngineID, 8> bound_engines{};
149
150 enum class GpuSemaphoreOperation {
151 AcquireEqual = 0x1,
152 WriteLong = 0x2,
153 AcquireGequal = 0x4,
154 AcquireMask = 0x8,
155 };
156
157#define ASSERT_REG_POSITION(field_name, position) \
158 static_assert(offsetof(Regs, field_name) == position * 4, \
159 "Field " #field_name " has invalid position")
160
161 ASSERT_REG_POSITION(semaphore_address, 0x4);
162 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
163 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
164 ASSERT_REG_POSITION(reference_count, 0x14);
165 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
166 ASSERT_REG_POSITION(semaphore_release, 0x1B);
167 ASSERT_REG_POSITION(fence_value, 0x1C);
168 ASSERT_REG_POSITION(fence_action, 0x1D);
169
170 ASSERT_REG_POSITION(acquire_mode, 0x100);
171 ASSERT_REG_POSITION(acquire_source, 0x101);
172 ASSERT_REG_POSITION(acquire_active, 0x102);
173 ASSERT_REG_POSITION(acquire_timeout, 0x103);
174 ASSERT_REG_POSITION(acquire_value, 0x104);
175
176#undef ASSERT_REG_POSITION
177};
178
179} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 1e9832ddd..d658e038d 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,12 +4,13 @@
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <cstring>
8#include <memory>
7#include <queue> 9#include <queue>
8 10
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "video_core/delayed_destruction_ring.h" 12#include "video_core/delayed_destruction_ring.h"
11#include "video_core/gpu.h" 13#include "video_core/gpu.h"
12#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
14 15
15namespace VideoCommon { 16namespace VideoCommon {
@@ -19,10 +20,10 @@ public:
19 explicit FenceBase(u32 payload_, bool is_stubbed_) 20 explicit FenceBase(u32 payload_, bool is_stubbed_)
20 : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} 21 : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
21 22
22 explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_) 23 explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
23 : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} 24 : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
24 25
25 GPUVAddr GetAddress() const { 26 u8* GetAddress() const {
26 return address; 27 return address;
27 } 28 }
28 29
@@ -35,7 +36,7 @@ public:
35 } 36 }
36 37
37private: 38private:
38 GPUVAddr address; 39 u8* address;
39 u32 payload; 40 u32 payload;
40 bool is_semaphore; 41 bool is_semaphore;
41 42
@@ -57,7 +58,7 @@ public:
57 buffer_cache.AccumulateFlushes(); 58 buffer_cache.AccumulateFlushes();
58 } 59 }
59 60
60 void SignalSemaphore(GPUVAddr addr, u32 value) { 61 void SignalSemaphore(u8* addr, u32 value) {
61 TryReleasePendingFences(); 62 TryReleasePendingFences();
62 const bool should_flush = ShouldFlush(); 63 const bool should_flush = ShouldFlush();
63 CommitAsyncFlushes(); 64 CommitAsyncFlushes();
@@ -91,8 +92,9 @@ public:
91 } 92 }
92 PopAsyncFlushes(); 93 PopAsyncFlushes();
93 if (current_fence->IsSemaphore()) { 94 if (current_fence->IsSemaphore()) {
94 gpu_memory.template Write<u32>(current_fence->GetAddress(), 95 char* address = reinterpret_cast<char*>(current_fence->GetAddress());
95 current_fence->GetPayload()); 96 auto payload = current_fence->GetPayload();
97 std::memcpy(address, &payload, sizeof(payload));
96 } else { 98 } else {
97 gpu.IncrementSyncPoint(current_fence->GetPayload()); 99 gpu.IncrementSyncPoint(current_fence->GetPayload());
98 } 100 }
@@ -104,8 +106,8 @@ protected:
104 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 106 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
105 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, 107 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
106 TQueryCache& query_cache_) 108 TQueryCache& query_cache_)
107 : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()}, 109 : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
108 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} 110 buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
109 111
110 virtual ~FenceManager() = default; 112 virtual ~FenceManager() = default;
111 113
@@ -113,7 +115,7 @@ protected:
113 /// true 115 /// true
114 virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; 116 virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
115 /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true 117 /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
116 virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0; 118 virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
117 /// Queues a fence into the backend if the fence isn't stubbed. 119 /// Queues a fence into the backend if the fence isn't stubbed.
118 virtual void QueueFence(TFence& fence) = 0; 120 virtual void QueueFence(TFence& fence) = 0;
119 /// Notifies that the backend fence has been signaled/reached in host GPU. 121 /// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -123,7 +125,6 @@ protected:
123 125
124 VideoCore::RasterizerInterface& rasterizer; 126 VideoCore::RasterizerInterface& rasterizer;
125 Tegra::GPU& gpu; 127 Tegra::GPU& gpu;
126 Tegra::MemoryManager& gpu_memory;
127 TTextureCache& texture_cache; 128 TTextureCache& texture_cache;
128 TTBufferCache& buffer_cache; 129 TTBufferCache& buffer_cache;
129 TQueryCache& query_cache; 130 TQueryCache& query_cache;
@@ -137,8 +138,9 @@ private:
137 } 138 }
138 PopAsyncFlushes(); 139 PopAsyncFlushes();
139 if (current_fence->IsSemaphore()) { 140 if (current_fence->IsSemaphore()) {
140 gpu_memory.template Write<u32>(current_fence->GetAddress(), 141 char* address = reinterpret_cast<char*>(current_fence->GetAddress());
141 current_fence->GetPayload()); 142 const auto payload = current_fence->GetPayload();
143 std::memcpy(address, &payload, sizeof(payload));
142 } else { 144 } else {
143 gpu.IncrementSyncPoint(current_fence->GetPayload()); 145 gpu.IncrementSyncPoint(current_fence->GetPayload());
144 } 146 }
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 33431f2a0..80a1c69e0 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -18,6 +18,8 @@
18#include "core/hle/service/nvdrv/nvdata.h" 18#include "core/hle/service/nvdrv/nvdata.h"
19#include "core/perf_stats.h" 19#include "core/perf_stats.h"
20#include "video_core/cdma_pusher.h" 20#include "video_core/cdma_pusher.h"
21#include "video_core/control/channel_state.h"
22#include "video_core/control/scheduler.h"
21#include "video_core/dma_pusher.h" 23#include "video_core/dma_pusher.h"
22#include "video_core/engines/fermi_2d.h" 24#include "video_core/engines/fermi_2d.h"
23#include "video_core/engines/kepler_compute.h" 25#include "video_core/engines/kepler_compute.h"
@@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
36 38
37struct GPU::Impl { 39struct GPU::Impl {
38 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) 40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
39 : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( 41 : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
40 system)},
41 dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
42 maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
43 fermi_2d{std::make_unique<Engines::Fermi2D>()},
44 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
45 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
46 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
47 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 42 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
48 gpu_thread{system_, is_async_} {} 43 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
49 44
50 ~Impl() = default; 45 ~Impl() = default;
51 46
52 /// Binds a renderer to the GPU. 47 std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
53 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 48 auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
54 renderer = std::move(renderer_); 49 channels.emplace(channel_id, channel_state);
55 rasterizer = renderer->ReadRasterizer(); 50 scheduler->DeclareChannel(channel_state);
56 51 return channel_state;
57 memory_manager->BindRasterizer(rasterizer);
58 maxwell_3d->BindRasterizer(rasterizer);
59 fermi_2d->BindRasterizer(rasterizer);
60 kepler_compute->BindRasterizer(rasterizer);
61 kepler_memory->BindRasterizer(rasterizer);
62 maxwell_dma->BindRasterizer(rasterizer);
63 } 52 }
64 53
65 /// Calls a GPU method. 54 void BindChannel(s32 channel_id) {
66 void CallMethod(const GPU::MethodCall& method_call) { 55 if (bound_channel == channel_id) {
67 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, 56 return;
68 method_call.subchannel); 57 }
58 auto it = channels.find(channel_id);
59 ASSERT(it != channels.end());
60 bound_channel = channel_id;
61 current_channel = it->second.get();
69 62
70 ASSERT(method_call.subchannel < bound_engines.size()); 63 rasterizer->BindChannel(*current_channel);
64 }
71 65
72 if (ExecuteMethodOnEngine(method_call.method)) { 66 std::shared_ptr<Control::ChannelState> AllocateChannel() {
73 CallEngineMethod(method_call); 67 return CreateChannel(new_channel_id++);
74 } else {
75 CallPullerMethod(method_call);
76 }
77 } 68 }
78 69
79 /// Calls a GPU multivalue method. 70 void InitChannel(Control::ChannelState& to_init) {
80 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 71 to_init.Init(system, gpu);
81 u32 methods_pending) { 72 to_init.BindRasterizer(rasterizer);
82 LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); 73 rasterizer->InitializeChannel(to_init);
74 }
83 75
84 ASSERT(subchannel < bound_engines.size()); 76 void ReleaseChannel(Control::ChannelState& to_release) {
77 UNIMPLEMENTED();
78 }
85 79
86 if (ExecuteMethodOnEngine(method)) { 80 void CreateHost1xChannel() {
87 CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); 81 if (host1x_channel) {
88 } else { 82 return;
89 for (std::size_t i = 0; i < amount; i++) {
90 CallPullerMethod(GPU::MethodCall{
91 method,
92 base_start[i],
93 subchannel,
94 methods_pending - static_cast<u32>(i),
95 });
96 }
97 } 83 }
84 host1x_channel = CreateChannel(0);
85 host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
86 InitChannel(*host1x_channel);
87 }
88
89 /// Binds a renderer to the GPU.
90 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
91 renderer = std::move(renderer_);
92 rasterizer = renderer->ReadRasterizer();
98 } 93 }
99 94
100 /// Flush all current written commands into the host GPU for execution. 95 /// Flush all current written commands into the host GPU for execution.
@@ -146,42 +141,44 @@ struct GPU::Impl {
146 141
147 /// Returns a reference to the Maxwell3D GPU engine. 142 /// Returns a reference to the Maxwell3D GPU engine.
148 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { 143 [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
149 return *maxwell_3d; 144 ASSERT(current_channel);
145 return *current_channel->maxwell_3d;
150 } 146 }
151 147
152 /// Returns a const reference to the Maxwell3D GPU engine. 148 /// Returns a const reference to the Maxwell3D GPU engine.
153 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { 149 [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
154 return *maxwell_3d; 150 ASSERT(current_channel);
151 return *current_channel->maxwell_3d;
155 } 152 }
156 153
157 /// Returns a reference to the KeplerCompute GPU engine. 154 /// Returns a reference to the KeplerCompute GPU engine.
158 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { 155 [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
159 return *kepler_compute; 156 ASSERT(current_channel);
157 return *current_channel->kepler_compute;
160 } 158 }
161 159
162 /// Returns a reference to the KeplerCompute GPU engine. 160 /// Returns a reference to the KeplerCompute GPU engine.
163 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { 161 [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
164 return *kepler_compute; 162 ASSERT(current_channel);
163 return *current_channel->kepler_compute;
165 } 164 }
166 165
167 /// Returns a reference to the GPU memory manager. 166 /// Returns a reference to the GPU memory manager.
168 [[nodiscard]] Tegra::MemoryManager& MemoryManager() { 167 [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
169 return *memory_manager; 168 CreateHost1xChannel();
170 } 169 return *host1x_channel->memory_manager;
171
172 /// Returns a const reference to the GPU memory manager.
173 [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
174 return *memory_manager;
175 } 170 }
176 171
177 /// Returns a reference to the GPU DMA pusher. 172 /// Returns a reference to the GPU DMA pusher.
178 [[nodiscard]] Tegra::DmaPusher& DmaPusher() { 173 [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
179 return *dma_pusher; 174 ASSERT(current_channel);
175 return *current_channel->dma_pusher;
180 } 176 }
181 177
182 /// Returns a const reference to the GPU DMA pusher. 178 /// Returns a const reference to the GPU DMA pusher.
183 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { 179 [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
184 return *dma_pusher; 180 ASSERT(current_channel);
181 return *current_channel->dma_pusher;
185 } 182 }
186 183
187 /// Returns a reference to the underlying renderer. 184 /// Returns a reference to the underlying renderer.
@@ -306,7 +303,7 @@ struct GPU::Impl {
306 /// This can be used to launch any necessary threads and register any necessary 303 /// This can be used to launch any necessary threads and register any necessary
307 /// core timing events. 304 /// core timing events.
308 void Start() { 305 void Start() {
309 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); 306 gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
310 cpu_context = renderer->GetRenderWindow().CreateSharedContext(); 307 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
311 cpu_context->MakeCurrent(); 308 cpu_context->MakeCurrent();
312 } 309 }
@@ -328,8 +325,8 @@ struct GPU::Impl {
328 } 325 }
329 326
330 /// Push GPU command entries to be processed 327 /// Push GPU command entries to be processed
331 void PushGPUEntries(Tegra::CommandList&& entries) { 328 void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
332 gpu_thread.SubmitList(std::move(entries)); 329 gpu_thread.SubmitList(channel, std::move(entries));
333 } 330 }
334 331
335 /// Push GPU command buffer entries to be processed 332 /// Push GPU command buffer entries to be processed
@@ -381,303 +378,16 @@ struct GPU::Impl {
381 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 378 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
382 } 379 }
383 380
384 void ProcessBindMethod(const GPU::MethodCall& method_call) {
385 // Bind the current subchannel to the desired engine id.
386 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
387 method_call.argument);
388 const auto engine_id = static_cast<EngineID>(method_call.argument);
389 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
390 switch (engine_id) {
391 case EngineID::FERMI_TWOD_A:
392 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
393 break;
394 case EngineID::MAXWELL_B:
395 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
396 break;
397 case EngineID::KEPLER_COMPUTE_B:
398 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
399 break;
400 case EngineID::MAXWELL_DMA_COPY_A:
401 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
402 break;
403 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
404 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
405 break;
406 default:
407 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
408 }
409 }
410
411 void ProcessFenceActionMethod() {
412 switch (regs.fence_action.op) {
413 case GPU::FenceOperation::Acquire:
414 WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
415 break;
416 case GPU::FenceOperation::Increment:
417 IncrementSyncPoint(regs.fence_action.syncpoint_id);
418 break;
419 default:
420 UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
421 }
422 }
423
424 void ProcessWaitForInterruptMethod() {
425 // TODO(bunnei) ImplementMe
426 LOG_WARNING(HW_GPU, "(STUBBED) called");
427 }
428
429 void ProcessSemaphoreTriggerMethod() {
430 const auto semaphoreOperationMask = 0xF;
431 const auto op =
432 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
433 if (op == GpuSemaphoreOperation::WriteLong) {
434 struct Block {
435 u32 sequence;
436 u32 zeros = 0;
437 u64 timestamp;
438 };
439
440 Block block{};
441 block.sequence = regs.semaphore_sequence;
442 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
443 // CoreTiming
444 block.timestamp = GetTicks();
445 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
446 sizeof(block));
447 } else {
448 const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
449 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
450 (op == GpuSemaphoreOperation::AcquireGequal &&
451 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
452 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
453 // Nothing to do in this case
454 } else {
455 regs.acquire_source = true;
456 regs.acquire_value = regs.semaphore_sequence;
457 if (op == GpuSemaphoreOperation::AcquireEqual) {
458 regs.acquire_active = true;
459 regs.acquire_mode = false;
460 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
461 regs.acquire_active = true;
462 regs.acquire_mode = true;
463 } else if (op == GpuSemaphoreOperation::AcquireMask) {
464 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
465 // semaphore_sequence, gives a non-0 result
466 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
467 } else {
468 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
469 }
470 }
471 }
472 }
473
474 void ProcessSemaphoreRelease() {
475 memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
476 regs.semaphore_release);
477 }
478
479 void ProcessSemaphoreAcquire() {
480 const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
481 const auto value = regs.semaphore_acquire;
482 if (word != value) {
483 regs.acquire_active = true;
484 regs.acquire_value = value;
485 // TODO(kemathe73) figure out how to do the acquire_timeout
486 regs.acquire_mode = false;
487 regs.acquire_source = false;
488 }
489 }
490
491 /// Calls a GPU puller method.
492 void CallPullerMethod(const GPU::MethodCall& method_call) {
493 regs.reg_array[method_call.method] = method_call.argument;
494 const auto method = static_cast<BufferMethods>(method_call.method);
495
496 switch (method) {
497 case BufferMethods::BindObject: {
498 ProcessBindMethod(method_call);
499 break;
500 }
501 case BufferMethods::Nop:
502 case BufferMethods::SemaphoreAddressHigh:
503 case BufferMethods::SemaphoreAddressLow:
504 case BufferMethods::SemaphoreSequence:
505 break;
506 case BufferMethods::UnkCacheFlush:
507 rasterizer->SyncGuestHost();
508 break;
509 case BufferMethods::WrcacheFlush:
510 rasterizer->SignalReference();
511 break;
512 case BufferMethods::FenceValue:
513 break;
514 case BufferMethods::RefCnt:
515 rasterizer->SignalReference();
516 break;
517 case BufferMethods::FenceAction:
518 ProcessFenceActionMethod();
519 break;
520 case BufferMethods::WaitForInterrupt:
521 rasterizer->WaitForIdle();
522 break;
523 case BufferMethods::SemaphoreTrigger: {
524 ProcessSemaphoreTriggerMethod();
525 break;
526 }
527 case BufferMethods::NotifyIntr: {
528 // TODO(Kmather73): Research and implement this method.
529 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
530 break;
531 }
532 case BufferMethods::Unk28: {
533 // TODO(Kmather73): Research and implement this method.
534 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
535 break;
536 }
537 case BufferMethods::SemaphoreAcquire: {
538 ProcessSemaphoreAcquire();
539 break;
540 }
541 case BufferMethods::SemaphoreRelease: {
542 ProcessSemaphoreRelease();
543 break;
544 }
545 case BufferMethods::Yield: {
546 // TODO(Kmather73): Research and implement this method.
547 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
548 break;
549 }
550 default:
551 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
552 break;
553 }
554 }
555
556 /// Calls a GPU engine method.
557 void CallEngineMethod(const GPU::MethodCall& method_call) {
558 const EngineID engine = bound_engines[method_call.subchannel];
559
560 switch (engine) {
561 case EngineID::FERMI_TWOD_A:
562 fermi_2d->CallMethod(method_call.method, method_call.argument,
563 method_call.IsLastCall());
564 break;
565 case EngineID::MAXWELL_B:
566 maxwell_3d->CallMethod(method_call.method, method_call.argument,
567 method_call.IsLastCall());
568 break;
569 case EngineID::KEPLER_COMPUTE_B:
570 kepler_compute->CallMethod(method_call.method, method_call.argument,
571 method_call.IsLastCall());
572 break;
573 case EngineID::MAXWELL_DMA_COPY_A:
574 maxwell_dma->CallMethod(method_call.method, method_call.argument,
575 method_call.IsLastCall());
576 break;
577 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
578 kepler_memory->CallMethod(method_call.method, method_call.argument,
579 method_call.IsLastCall());
580 break;
581 default:
582 UNIMPLEMENTED_MSG("Unimplemented engine");
583 }
584 }
585
586 /// Calls a GPU engine multivalue method.
587 void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
588 u32 methods_pending) {
589 const EngineID engine = bound_engines[subchannel];
590
591 switch (engine) {
592 case EngineID::FERMI_TWOD_A:
593 fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
594 break;
595 case EngineID::MAXWELL_B:
596 maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
597 break;
598 case EngineID::KEPLER_COMPUTE_B:
599 kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
600 break;
601 case EngineID::MAXWELL_DMA_COPY_A:
602 maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
603 break;
604 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
605 kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
606 break;
607 default:
608 UNIMPLEMENTED_MSG("Unimplemented engine");
609 }
610 }
611
612 /// Determines where the method should be executed.
613 [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
614 const auto buffer_method = static_cast<BufferMethods>(method);
615 return buffer_method >= BufferMethods::NonPullerMethods;
616 }
617
618 struct Regs {
619 static constexpr size_t NUM_REGS = 0x40;
620
621 union {
622 struct {
623 INSERT_PADDING_WORDS_NOINIT(0x4);
624 struct {
625 u32 address_high;
626 u32 address_low;
627
628 [[nodiscard]] GPUVAddr SemaphoreAddress() const {
629 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
630 address_low);
631 }
632 } semaphore_address;
633
634 u32 semaphore_sequence;
635 u32 semaphore_trigger;
636 INSERT_PADDING_WORDS_NOINIT(0xC);
637
638 // The pusher and the puller share the reference counter, the pusher only has read
639 // access
640 u32 reference_count;
641 INSERT_PADDING_WORDS_NOINIT(0x5);
642
643 u32 semaphore_acquire;
644 u32 semaphore_release;
645 u32 fence_value;
646 GPU::FenceAction fence_action;
647 INSERT_PADDING_WORDS_NOINIT(0xE2);
648
649 // Puller state
650 u32 acquire_mode;
651 u32 acquire_source;
652 u32 acquire_active;
653 u32 acquire_timeout;
654 u32 acquire_value;
655 };
656 std::array<u32, NUM_REGS> reg_array;
657 };
658 } regs{};
659
660 GPU& gpu; 381 GPU& gpu;
661 Core::System& system; 382 Core::System& system;
662 std::unique_ptr<Tegra::MemoryManager> memory_manager; 383
663 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
664 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; 384 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
665 std::unique_ptr<VideoCore::RendererBase> renderer; 385 std::unique_ptr<VideoCore::RendererBase> renderer;
666 VideoCore::RasterizerInterface* rasterizer = nullptr; 386 VideoCore::RasterizerInterface* rasterizer = nullptr;
667 const bool use_nvdec; 387 const bool use_nvdec;
668 388
669 /// Mapping of command subchannels to their bound engine ids 389 std::shared_ptr<Control::ChannelState> host1x_channel;
670 std::array<EngineID, 8> bound_engines{}; 390 s32 new_channel_id{1};
671 /// 3D engine
672 std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
673 /// 2D engine
674 std::unique_ptr<Engines::Fermi2D> fermi_2d;
675 /// Compute engine
676 std::unique_ptr<Engines::KeplerCompute> kepler_compute;
677 /// DMA engine
678 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
679 /// Inline memory engine
680 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
681 /// Shader build notifier 391 /// Shader build notifier
682 std::unique_ptr<VideoCore::ShaderNotify> shader_notify; 392 std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
683 /// When true, we are about to shut down emulation session, so terminate outstanding tasks 393 /// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -710,33 +420,10 @@ struct GPU::Impl {
710 VideoCommon::GPUThread::ThreadManager gpu_thread; 420 VideoCommon::GPUThread::ThreadManager gpu_thread;
711 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; 421 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
712 422
713#define ASSERT_REG_POSITION(field_name, position) \ 423 std::unique_ptr<Tegra::Control::Scheduler> scheduler;
714 static_assert(offsetof(Regs, field_name) == position * 4, \ 424 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
715 "Field " #field_name " has invalid position") 425 Tegra::Control::ChannelState* current_channel;
716 426 s32 bound_channel{-1};
717 ASSERT_REG_POSITION(semaphore_address, 0x4);
718 ASSERT_REG_POSITION(semaphore_sequence, 0x6);
719 ASSERT_REG_POSITION(semaphore_trigger, 0x7);
720 ASSERT_REG_POSITION(reference_count, 0x14);
721 ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
722 ASSERT_REG_POSITION(semaphore_release, 0x1B);
723 ASSERT_REG_POSITION(fence_value, 0x1C);
724 ASSERT_REG_POSITION(fence_action, 0x1D);
725
726 ASSERT_REG_POSITION(acquire_mode, 0x100);
727 ASSERT_REG_POSITION(acquire_source, 0x101);
728 ASSERT_REG_POSITION(acquire_active, 0x102);
729 ASSERT_REG_POSITION(acquire_timeout, 0x103);
730 ASSERT_REG_POSITION(acquire_value, 0x104);
731
732#undef ASSERT_REG_POSITION
733
734 enum class GpuSemaphoreOperation {
735 AcquireEqual = 0x1,
736 WriteLong = 0x2,
737 AcquireGequal = 0x4,
738 AcquireMask = 0x8,
739 };
740}; 427};
741 428
742GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) 429GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
744 431
745GPU::~GPU() = default; 432GPU::~GPU() = default;
746 433
747void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { 434std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
748 impl->BindRenderer(std::move(renderer)); 435 return impl->AllocateChannel();
436}
437
438void GPU::InitChannel(Control::ChannelState& to_init) {
439 impl->InitChannel(to_init);
749} 440}
750 441
751void GPU::CallMethod(const MethodCall& method_call) { 442void GPU::BindChannel(s32 channel_id) {
752 impl->CallMethod(method_call); 443 impl->BindChannel(channel_id);
753} 444}
754 445
755void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, 446void GPU::ReleaseChannel(Control::ChannelState& to_release) {
756 u32 methods_pending) { 447 impl->ReleaseChannel(to_release);
757 impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); 448}
449
450void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
451 impl->BindRenderer(std::move(renderer));
758} 452}
759 453
760void GPU::FlushCommands() { 454void GPU::FlushCommands() {
@@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
881 impl->ReleaseContext(); 575 impl->ReleaseContext();
882} 576}
883 577
884void GPU::PushGPUEntries(Tegra::CommandList&& entries) { 578void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
885 impl->PushGPUEntries(std::move(entries)); 579 impl->PushGPUEntries(channel, std::move(entries));
886} 580}
887 581
888void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { 582void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 42c91954f..74d55e074 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -89,57 +89,20 @@ class Maxwell3D;
89class KeplerCompute; 89class KeplerCompute;
90} // namespace Engines 90} // namespace Engines
91 91
92enum class EngineID { 92namespace Control {
93 FERMI_TWOD_A = 0x902D, // 2D Engine 93struct ChannelState;
94 MAXWELL_B = 0xB197, // 3D Engine 94}
95 KEPLER_COMPUTE_B = 0xB1C0,
96 KEPLER_INLINE_TO_MEMORY_B = 0xA140,
97 MAXWELL_DMA_COPY_A = 0xB0B5,
98};
99 95
100class MemoryManager; 96class MemoryManager;
101 97
102class GPU final { 98class GPU final {
103public: 99public:
104 struct MethodCall {
105 u32 method{};
106 u32 argument{};
107 u32 subchannel{};
108 u32 method_count{};
109
110 explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
111 : method(method_), argument(argument_), subchannel(subchannel_),
112 method_count(method_count_) {}
113
114 [[nodiscard]] bool IsLastCall() const {
115 return method_count <= 1;
116 }
117 };
118
119 enum class FenceOperation : u32 {
120 Acquire = 0,
121 Increment = 1,
122 };
123
124 union FenceAction {
125 u32 raw;
126 BitField<0, 1, FenceOperation> op;
127 BitField<8, 24, u32> syncpoint_id;
128 };
129
130 explicit GPU(Core::System& system, bool is_async, bool use_nvdec); 100 explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
131 ~GPU(); 101 ~GPU();
132 102
133 /// Binds a renderer to the GPU. 103 /// Binds a renderer to the GPU.
134 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); 104 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
135 105
136 /// Calls a GPU method.
137 void CallMethod(const MethodCall& method_call);
138
139 /// Calls a GPU multivalue method.
140 void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
141 u32 methods_pending);
142
143 /// Flush all current written commands into the host GPU for execution. 106 /// Flush all current written commands into the host GPU for execution.
144 void FlushCommands(); 107 void FlushCommands();
145 /// Synchronizes CPU writes with Host GPU memory. 108 /// Synchronizes CPU writes with Host GPU memory.
@@ -147,6 +110,14 @@ public:
147 /// Signal the ending of command list. 110 /// Signal the ending of command list.
148 void OnCommandListEnd(); 111 void OnCommandListEnd();
149 112
113 std::shared_ptr<Control::ChannelState> AllocateChannel();
114
115 void InitChannel(Control::ChannelState& to_init);
116
117 void BindChannel(s32 channel_id);
118
119 void ReleaseChannel(Control::ChannelState& to_release);
120
150 /// Request a host GPU memory flush from the CPU. 121 /// Request a host GPU memory flush from the CPU.
151 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 122 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
152 123
@@ -226,7 +197,7 @@ public:
226 void ReleaseContext(); 197 void ReleaseContext();
227 198
228 /// Push GPU command entries to be processed 199 /// Push GPU command entries to be processed
229 void PushGPUEntries(Tegra::CommandList&& entries); 200 void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
230 201
231 /// Push GPU command buffer entries to be processed 202 /// Push GPU command buffer entries to be processed
232 void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); 203 void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@@ -248,7 +219,7 @@ public:
248 219
249private: 220private:
250 struct Impl; 221 struct Impl;
251 std::unique_ptr<Impl> impl; 222 mutable std::unique_ptr<Impl> impl;
252}; 223};
253 224
254} // namespace Tegra 225} // namespace Tegra
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index f0e48cfbd..9844cde43 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -8,6 +8,7 @@
8#include "common/thread.h" 8#include "common/thread.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/frontend/emu_window.h" 10#include "core/frontend/emu_window.h"
11#include "video_core/control/scheduler.h"
11#include "video_core/dma_pusher.h" 12#include "video_core/dma_pusher.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
13#include "video_core/gpu_thread.h" 14#include "video_core/gpu_thread.h"
@@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
18/// Runs the GPU thread 19/// Runs the GPU thread
19static void RunThread(std::stop_token stop_token, Core::System& system, 20static void RunThread(std::stop_token stop_token, Core::System& system,
20 VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 21 VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
21 Tegra::DmaPusher& dma_pusher, SynchState& state) { 22 Tegra::Control::Scheduler& scheduler, SynchState& state) {
22 std::string name = "GPU"; 23 std::string name = "GPU";
23 MicroProfileOnThreadCreate(name.c_str()); 24 MicroProfileOnThreadCreate(name.c_str());
24 SCOPE_EXIT({ MicroProfileOnThreadExit(); }); 25 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
36 break; 37 break;
37 } 38 }
38 if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { 39 if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
39 dma_pusher.Push(std::move(submit_list->entries)); 40 scheduler.Push(submit_list->channel, std::move(submit_list->entries));
40 dma_pusher.DispatchCalls();
41 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { 41 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
42 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 42 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
43 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 43 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
68 68
69void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 69void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
70 Core::Frontend::GraphicsContext& context, 70 Core::Frontend::GraphicsContext& context,
71 Tegra::DmaPusher& dma_pusher) { 71 Tegra::Control::Scheduler& scheduler) {
72 rasterizer = renderer.ReadRasterizer(); 72 rasterizer = renderer.ReadRasterizer();
73 thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), 73 thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
74 std::ref(dma_pusher), std::ref(state)); 74 std::ref(scheduler), std::ref(state));
75} 75}
76 76
77void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 77void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
78 PushCommand(SubmitListCommand(std::move(entries))); 78 PushCommand(SubmitListCommand(channel, std::move(entries)));
79} 79}
80 80
81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2f8210cb9..c5078a2b3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -15,7 +15,9 @@
15 15
16namespace Tegra { 16namespace Tegra {
17struct FramebufferConfig; 17struct FramebufferConfig;
18class DmaPusher; 18namespace Control {
19class Scheduler;
20}
19} // namespace Tegra 21} // namespace Tegra
20 22
21namespace Core { 23namespace Core {
@@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {
34 36
35/// Command to signal to the GPU thread that a command list is ready for processing 37/// Command to signal to the GPU thread that a command list is ready for processing
36struct SubmitListCommand final { 38struct SubmitListCommand final {
37 explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} 39 explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
40 : channel{channel_}, entries{std::move(entries_)} {}
38 41
42 s32 channel;
39 Tegra::CommandList entries; 43 Tegra::CommandList entries;
40}; 44};
41 45
@@ -112,10 +116,10 @@ public:
112 116
113 /// Creates and starts the GPU thread. 117 /// Creates and starts the GPU thread.
114 void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 118 void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
115 Tegra::DmaPusher& dma_pusher); 119 Tegra::Control::Scheduler& scheduler);
116 120
117 /// Push GPU command entries to be processed 121 /// Push GPU command entries to be processed
118 void SubmitList(Tegra::CommandList&& entries); 122 void SubmitList(s32 channel, Tegra::CommandList&& entries);
119 123
120 /// Swap buffers (render frame) 124 /// Swap buffers (render frame)
121 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 125 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index bf9eb735d..a3efd365e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
133 // TryLockPage(page_entry, size); 133 // TryLockPage(page_entry, size);
134 auto& current_page = page_table[PageEntryIndex(gpu_addr)]; 134 auto& current_page = page_table[PageEntryIndex(gpu_addr)];
135 135
136 if ((!current_page.IsValid() && page_entry.IsValid()) ||
137 current_page.ToAddress() != page_entry.ToAddress()) {
138 rasterizer->ModifyGPUMemory(gpu_addr, size);
139 }
140
141 current_page = page_entry; 136 current_page = page_entry;
142} 137}
143 138
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 889b606b3..eb68ea638 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -17,6 +17,7 @@
17 17
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/settings.h" 19#include "common/settings.h"
20#include "video_core/control/channel_state_cache.h"
20#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
21#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
22#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
@@ -90,13 +91,10 @@ private:
90}; 91};
91 92
92template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 93template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
93class QueryCacheBase { 94class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
94public: 95public:
95 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, 96 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
96 Tegra::Engines::Maxwell3D& maxwell3d_, 97 : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
97 Tegra::MemoryManager& gpu_memory_)
98 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
99 gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
100 VideoCore::QueryType::SamplesPassed}}} {} 98 VideoCore::QueryType::SamplesPassed}}} {}
101 99
102 void InvalidateRegion(VAddr addr, std::size_t size) { 100 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -117,13 +115,13 @@ public:
117 */ 115 */
118 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { 116 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
119 std::unique_lock lock{mutex}; 117 std::unique_lock lock{mutex};
120 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 118 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
121 ASSERT(cpu_addr); 119 ASSERT(cpu_addr);
122 120
123 CachedQuery* query = TryGet(*cpu_addr); 121 CachedQuery* query = TryGet(*cpu_addr);
124 if (!query) { 122 if (!query) {
125 ASSERT_OR_EXECUTE(cpu_addr, return;); 123 ASSERT_OR_EXECUTE(cpu_addr, return;);
126 u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); 124 u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
127 125
128 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 126 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
129 } 127 }
@@ -137,7 +135,7 @@ public:
137 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
138 void UpdateCounters() { 136 void UpdateCounters() {
139 std::unique_lock lock{mutex}; 137 std::unique_lock lock{mutex};
140 const auto& regs = maxwell3d.regs; 138 const auto& regs = maxwell3d->regs;
141 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); 139 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
142 } 140 }
143 141
@@ -264,8 +262,6 @@ private:
264 static constexpr unsigned YUZU_PAGEBITS = 12; 262 static constexpr unsigned YUZU_PAGEBITS = 12;
265 263
266 VideoCore::RasterizerInterface& rasterizer; 264 VideoCore::RasterizerInterface& rasterizer;
267 Tegra::Engines::Maxwell3D& maxwell3d;
268 Tegra::MemoryManager& gpu_memory;
269 265
270 std::recursive_mutex mutex; 266 std::recursive_mutex mutex;
271 267
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a04a76481..8dacb2626 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -16,6 +16,9 @@ class MemoryManager;
16namespace Engines { 16namespace Engines {
17class AccelerateDMAInterface; 17class AccelerateDMAInterface;
18} 18}
19namespace Control {
20struct ChannelState;
21}
19} // namespace Tegra 22} // namespace Tegra
20 23
21namespace VideoCore { 24namespace VideoCore {
@@ -137,5 +140,11 @@ public:
137 /// Initialize disk cached resources for the game being emulated 140 /// Initialize disk cached resources for the game being emulated
138 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 141 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
139 const DiskResourceLoadCallback& callback) {} 142 const DiskResourceLoadCallback& callback) {}
143
144 virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
145
146 virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
147
148 virtual void ReleaseChannel(s32 channel_id) {}
140}; 149};
141} // namespace VideoCore 150} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6e82c2e28..c76446b60 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -12,7 +12,7 @@ namespace OpenGL {
12 12
13GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} 13GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
14 14
15GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_) 15GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
16 : FenceBase{address_, payload_, is_stubbed_} {} 16 : FenceBase{address_, payload_, is_stubbed_} {}
17 17
18GLInnerFence::~GLInnerFence() = default; 18GLInnerFence::~GLInnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
52 return std::make_shared<GLInnerFence>(value, is_stubbed); 52 return std::make_shared<GLInnerFence>(value, is_stubbed);
53} 53}
54 54
55Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { 55Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
56 return std::make_shared<GLInnerFence>(addr, value, is_stubbed); 56 return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
57} 57}
58 58
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 14ff00db2..fced8d002 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,7 +17,7 @@ namespace OpenGL {
17class GLInnerFence : public VideoCommon::FenceBase { 17class GLInnerFence : public VideoCommon::FenceBase {
18public: 18public:
19 explicit GLInnerFence(u32 payload_, bool is_stubbed_); 19 explicit GLInnerFence(u32 payload_, bool is_stubbed_);
20 explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_); 20 explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
21 ~GLInnerFence(); 21 ~GLInnerFence();
22 22
23 void Queue(); 23 void Queue();
@@ -41,7 +41,7 @@ public:
41 41
42protected: 42protected:
43 Fence CreateFence(u32 value, bool is_stubbed) override; 43 Fence CreateFence(u32 value, bool is_stubbed) override;
44 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; 44 Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
45 void QueueFence(Fence& fence) override; 45 void QueueFence(Fence& fence) override;
46 bool IsFenceSignaled(Fence& fence) const override; 46 bool IsFenceSignaled(Fence& fence) const override;
47 void WaitFence(Fence& fence) override; 47 void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index ed40f5791..5070db441 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
26 26
27} // Anonymous namespace 27} // Anonymous namespace
28 28
29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, 29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
30 Tegra::MemoryManager& gpu_memory_) 30 : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
31 : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
32 31
33QueryCache::~QueryCache() = default; 32QueryCache::~QueryCache() = default;
34 33
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 8a49f1ef0..14ce59990 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28class QueryCache final 28class QueryCache final
29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 30public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, 31 explicit QueryCache(RasterizerOpenGL& rasterizer_);
32 Tegra::MemoryManager& gpu_memory_);
33 ~QueryCache(); 32 ~QueryCache();
34 33
35 OGLQuery AllocateQuery(VideoCore::QueryType type); 34 OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a0d048b0b..e8d61bd41 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
60 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 60 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
61 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 61 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
62 texture_cache_runtime(device, program_manager, state_tracker), 62 texture_cache_runtime(device, program_manager, state_tracker),
63 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 63 texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
64 buffer_cache_runtime(device), 64 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
65 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 65 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
66 shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, 66 state_tracker, gpu.ShaderNotify()),
67 buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), 67 query_cache(*this), accelerate_dma(buffer_cache),
68 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
69 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} 68 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
70 69
71RasterizerOpenGL::~RasterizerOpenGL() = default; 70RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
392 gpu_memory.Write<u32>(addr, value); 391 gpu_memory.Write<u32>(addr, value);
393 return; 392 return;
394 } 393 }
395 fence_manager.SignalSemaphore(addr, value); 394 auto paddr = gpu_memory.GetPointer(addr);
395 fence_manager.SignalSemaphore(paddr, value);
396} 396}
397 397
398void RasterizerOpenGL::SignalSyncPoint(u32 value) { 398void RasterizerOpenGL::SignalSyncPoint(u32 value) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 0b8d8ec92..494581d0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
151} // Anonymous namespace 151} // Anonymous namespace
152 152
153ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 153ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
154 Tegra::Engines::Maxwell3D& maxwell3d_, 154 const Device& device_, TextureCache& texture_cache_,
155 Tegra::Engines::KeplerCompute& kepler_compute_, 155 BufferCache& buffer_cache_, ProgramManager& program_manager_,
156 Tegra::MemoryManager& gpu_memory_, const Device& device_, 156 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
157 TextureCache& texture_cache_, BufferCache& buffer_cache_, 157 : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
158 ProgramManager& program_manager_, StateTracker& state_tracker_, 158 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
159 VideoCore::ShaderNotify& shader_notify_) 159 state_tracker{state_tracker_}, shader_notify{shader_notify_},
160 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, 160 use_asynchronous_shaders{device.UseAsynchronousShaders()},
161 emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
162 buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
163 shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
164 profile{ 161 profile{
165 .supported_spirv = 0x00010000, 162 .supported_spirv = 0x00010000,
166 163
@@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
310 current_pipeline = nullptr; 307 current_pipeline = nullptr;
311 return nullptr; 308 return nullptr;
312 } 309 }
313 const auto& regs{maxwell3d.regs}; 310 const auto& regs{maxwell3d->regs};
314 graphics_key.raw = 0; 311 graphics_key.raw = 0;
315 graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 312 graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
316 graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 313 graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
351 } 348 }
352 // If something is using depth, we can assume that games are not rendering anything which 349 // If something is using depth, we can assume that games are not rendering anything which
353 // will be used one time. 350 // will be used one time.
354 if (maxwell3d.regs.zeta_enable) { 351 if (maxwell3d->regs.zeta_enable) {
355 return nullptr; 352 return nullptr;
356 } 353 }
357 // If games are using a small index count, we can assume these are full screen quads. 354 // If games are using a small index count, we can assume these are full screen quads.
358 // Usually these shaders are only used once for building textures so we can assume they 355 // Usually these shaders are only used once for building textures so we can assume they
359 // can't be built async 356 // can't be built async
360 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { 357 if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
361 return pipeline; 358 return pipeline;
362 } 359 }
363 return nullptr; 360 return nullptr;
@@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
368 if (!shader) { 365 if (!shader) {
369 return nullptr; 366 return nullptr;
370 } 367 }
371 const auto& qmd{kepler_compute.launch_description}; 368 const auto& qmd{kepler_compute->launch_description};
372 const ComputePipelineKey key{ 369 const ComputePipelineKey key{
373 .unique_hash = shader->unique_hash, 370 .unique_hash = shader->unique_hash,
374 .shared_memory_size = qmd.shared_alloc, 371 .shared_memory_size = qmd.shared_alloc,
@@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
481 } 478 }
482 auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; 479 auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
483 return std::make_unique<GraphicsPipeline>( 480 return std::make_unique<GraphicsPipeline>(
484 device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, 481 device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
485 thread_worker, &shader_notify, sources, sources_spirv, infos, key); 482 state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);
486 483
487} catch (Shader::Exception& exception) { 484} catch (Shader::Exception& exception) {
488 LOG_ERROR(Render_OpenGL, "{}", exception.what()); 485 LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
491 488
492std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( 489std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
493 const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { 490 const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
494 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 491 const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
495 const auto& qmd{kepler_compute.launch_description}; 492 const auto& qmd{kepler_compute->launch_description};
496 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; 493 ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
497 env.SetCachedSize(shader->size_bytes); 494 env.SetCachedSize(shader->size_bytes);
498 495
499 main_pools.ReleaseContents(); 496 main_pools.ReleaseContents();
@@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
536 break; 533 break;
537 } 534 }
538 535
539 return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, 536 return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
540 kepler_compute, program_manager, program.info, code, 537 *kepler_compute, program_manager, program.info, code,
541 code_spirv); 538 code_spirv);
542} catch (Shader::Exception& exception) { 539} catch (Shader::Exception& exception) {
543 LOG_ERROR(Render_OpenGL, "{}", exception.what()); 540 LOG_ERROR(Render_OpenGL, "{}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a14269dea..89f181fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
30class ShaderCache : public VideoCommon::ShaderCache { 30class ShaderCache : public VideoCommon::ShaderCache {
31public: 31public:
32 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 32 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
33 Tegra::Engines::Maxwell3D& maxwell3d_, 33 const Device& device_, TextureCache& texture_cache_,
34 Tegra::Engines::KeplerCompute& kepler_compute_, 34 BufferCache& buffer_cache_, ProgramManager& program_manager_,
35 Tegra::MemoryManager& gpu_memory_, const Device& device_, 35 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
36 TextureCache& texture_cache_, BufferCache& buffer_cache_,
37 ProgramManager& program_manager_, StateTracker& state_tracker_,
38 VideoCore::ShaderNotify& shader_notify_);
39 ~ShaderCache(); 36 ~ShaderCache();
40 37
41 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 38 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c78d0299..68c2bc34c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
95 Core::Frontend::EmuWindow& emu_window, 95 Core::Frontend::EmuWindow& emu_window,
96 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 96 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
97 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try 97 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
98 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), 98 : RendererBase(emu_window, std::move(context_)),
99 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), 99 telemetry_session(telemetry_session_),
100 cpu_memory(cpu_memory_),
101 gpu(gpu_),
102 library(OpenLibrary()),
100 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 103 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
101 true, Settings::values.renderer_debug.GetValue())), 104 true, Settings::values.renderer_debug.GetValue())),
102 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 105 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
103 surface(CreateSurface(instance, render_window)), 106 surface(CreateSurface(instance, render_window)),
104 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), 107 device(CreateDevice(instance, dld, *surface)),
105 state_tracker(gpu), scheduler(device, state_tracker), 108 memory_allocator(device, false),
109 state_tracker(gpu),
110 scheduler(device, state_tracker),
106 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, 111 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
107 render_window.GetFramebufferLayout().height, false), 112 render_window.GetFramebufferLayout().height, false),
108 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, 113 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
109 screen_info), 114 screen_info),
110 rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, 115 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
111 memory_allocator, state_tracker, scheduler) { 116 state_tracker, scheduler) {
112 Report(); 117 Report();
113} catch (const vk::Exception& exception) { 118} catch (const vk::Exception& exception) {
114 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); 119 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index c249b34d4..301cbbabe 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -14,7 +14,7 @@ namespace Vulkan {
14InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) 14InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
15 : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} 15 : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
16 16
17InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_) 17InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
18 : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} 18 : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
19 19
20InnerFence::~InnerFence() = default; 20InnerFence::~InnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
52 return std::make_shared<InnerFence>(scheduler, value, is_stubbed); 52 return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
53} 53}
54 54
55Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { 55Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
56 return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); 56 return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
57} 57}
58 58
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7c0bbd80a..ea9e88052 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -26,7 +26,7 @@ class Scheduler;
26class InnerFence : public VideoCommon::FenceBase { 26class InnerFence : public VideoCommon::FenceBase {
27public: 27public:
28 explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); 28 explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
29 explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_); 29 explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
30 ~InnerFence(); 30 ~InnerFence();
31 31
32 void Queue(); 32 void Queue();
@@ -51,7 +51,7 @@ public:
51 51
52protected: 52protected:
53 Fence CreateFence(u32 value, bool is_stubbed) override; 53 Fence CreateFence(u32 value, bool is_stubbed) override;
54 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; 54 Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
55 void QueueFence(Fence& fence) override; 55 void QueueFence(Fence& fence) override;
56 bool IsFenceSignaled(Fence& fence) const override; 56 bool IsFenceSignaled(Fence& fence) const override;
57 void WaitFence(Fence& fence) override; 57 void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index accbfc8e1..b1e0b96c4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
259 return std::memcmp(&rhs, this, Size()) == 0; 259 return std::memcmp(&rhs, this, Size()) == 0;
260} 260}
261 261
262PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, 262PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
263 Tegra::Engines::KeplerCompute& kepler_compute_,
264 Tegra::MemoryManager& gpu_memory_, const Device& device_,
265 Scheduler& scheduler_, DescriptorPool& descriptor_pool_, 263 Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
266 UpdateDescriptorQueue& update_descriptor_queue_, 264 UpdateDescriptorQueue& update_descriptor_queue_,
267 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, 265 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
268 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) 266 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
269 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, 267 : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
270 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, 268 descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
271 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, 269 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
272 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, 270 texture_cache{texture_cache_}, shader_notify{shader_notify_},
273 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, 271 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
274 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), 272 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
275 serialization_thread(1, "VkPipelineSerialization") { 273 serialization_thread(1, "VkPipelineSerialization") {
@@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
337 current_pipeline = nullptr; 335 current_pipeline = nullptr;
338 return nullptr; 336 return nullptr;
339 } 337 }
340 graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), 338 graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
341 device.IsExtVertexInputDynamicStateSupported()); 339 device.IsExtVertexInputDynamicStateSupported());
342 340
343 if (current_pipeline) { 341 if (current_pipeline) {
@@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
357 if (!shader) { 355 if (!shader) {
358 return nullptr; 356 return nullptr;
359 } 357 }
360 const auto& qmd{kepler_compute.launch_description}; 358 const auto& qmd{kepler_compute->launch_description};
361 const ComputePipelineCacheKey key{ 359 const ComputePipelineCacheKey key{
362 .unique_hash = shader->unique_hash, 360 .unique_hash = shader->unique_hash,
363 .shared_memory_size = qmd.shared_alloc, 361 .shared_memory_size = qmd.shared_alloc,
@@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
486 } 484 }
487 // If something is using depth, we can assume that games are not rendering anything which 485 // If something is using depth, we can assume that games are not rendering anything which
488 // will be used one time. 486 // will be used one time.
489 if (maxwell3d.regs.zeta_enable) { 487 if (maxwell3d->regs.zeta_enable) {
490 return nullptr; 488 return nullptr;
491 } 489 }
492 // If games are using a small index count, we can assume these are full screen quads. 490 // If games are using a small index count, we can assume these are full screen quads.
493 // Usually these shaders are only used once for building textures so we can assume they 491 // Usually these shaders are only used once for building textures so we can assume they
494 // can't be built async 492 // can't be built async
495 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { 493 if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
496 return pipeline; 494 return pipeline;
497 } 495 }
498 return nullptr; 496 return nullptr;
@@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
558 } 556 }
559 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 557 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
560 return std::make_unique<GraphicsPipeline>( 558 return std::make_unique<GraphicsPipeline>(
561 maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, 559 *maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
562 descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, 560 descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
563 std::move(modules), infos); 561 std::move(modules), infos);
564 562
@@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
592 590
593std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( 591std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
594 const ComputePipelineCacheKey& key, const ShaderInfo* shader) { 592 const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
595 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 593 const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
596 const auto& qmd{kepler_compute.launch_description}; 594 const auto& qmd{kepler_compute->launch_description};
597 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; 595 ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
598 env.SetCachedSize(shader->size_bytes); 596 env.SetCachedSize(shader->size_bytes);
599 597
600 main_pools.ReleaseContents(); 598 main_pools.ReleaseContents();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 127957dbf..61f9e9366 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -100,10 +100,8 @@ struct ShaderPools {
100 100
101class PipelineCache : public VideoCommon::ShaderCache { 101class PipelineCache : public VideoCommon::ShaderCache {
102public: 102public:
103 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, 103 explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
104 Tegra::Engines::KeplerCompute& kepler_compute, 104 DescriptorPool& descriptor_pool,
105 Tegra::MemoryManager& gpu_memory, const Device& device,
106 Scheduler& scheduler, DescriptorPool& descriptor_pool,
107 UpdateDescriptorQueue& update_descriptor_queue, 105 UpdateDescriptorQueue& update_descriptor_queue,
108 RenderPassCache& render_pass_cache, BufferCache& buffer_cache, 106 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
109 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); 107 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2b859c6b8..393bbdf37 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
65 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; 65 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
66} 66}
67 67
68QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, 68QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
69 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 69 Scheduler& scheduler_)
70 const Device& device_, Scheduler& scheduler_) 70 : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
71 : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
72 query_pools{ 71 query_pools{
73 QueryPool{device_, scheduler_, QueryType::SamplesPassed}, 72 QueryPool{device_, scheduler_, QueryType::SamplesPassed},
74 } {} 73 } {}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index b0d86c4f8..26762ee09 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,9 +52,8 @@ private:
52class QueryCache final 52class QueryCache final
53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
54public: 54public:
55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, 55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
56 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 56 Scheduler& scheduler_);
57 const Device& device_, Scheduler& scheduler_);
58 ~QueryCache(); 57 ~QueryCache();
59 58
60 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); 59 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7e40c2df1..5d9ff0589 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -11,6 +11,7 @@
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/scope_exit.h" 12#include "common/scope_exit.h"
13#include "common/settings.h" 13#include "common/settings.h"
14#include "video_core/control/channel_state.h"
14#include "video_core/engines/kepler_compute.h" 15#include "video_core/engines/kepler_compute.h"
15#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
16#include "video_core/renderer_vulkan/blit_image.h" 17#include "video_core/renderer_vulkan/blit_image.h"
@@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
148} // Anonymous namespace 149} // Anonymous namespace
149 150
150RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 151RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
151 Tegra::MemoryManager& gpu_memory_,
152 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, 152 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
153 const Device& device_, MemoryAllocator& memory_allocator_, 153 const Device& device_, MemoryAllocator& memory_allocator_,
154 StateTracker& state_tracker_, Scheduler& scheduler_) 154 StateTracker& state_tracker_, Scheduler& scheduler_)
155 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, 155 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
156 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, 156 memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
157 screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
158 state_tracker{state_tracker_}, scheduler{scheduler_},
159 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 157 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
160 update_descriptor_queue(device, scheduler), 158 update_descriptor_queue(device, scheduler),
161 blit_image(device, scheduler, state_tracker, descriptor_pool), 159 blit_image(device, scheduler, state_tracker, descriptor_pool),
@@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
165 memory_allocator, staging_pool, 163 memory_allocator, staging_pool,
166 blit_image, astc_decoder_pass, 164 blit_image, astc_decoder_pass,
167 render_pass_cache}, 165 render_pass_cache},
168 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 166 texture_cache(texture_cache_runtime, *this),
169 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 167 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
170 update_descriptor_queue, descriptor_pool), 168 update_descriptor_queue, descriptor_pool),
171 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 169 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
172 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 170 pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
173 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, 171 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
174 texture_cache, gpu.ShaderNotify()), 172 query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
175 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
176 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 173 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
177 wfi_event(device.GetLogical().CreateEvent()) { 174 wfi_event(device.GetLogical().CreateEvent()) {
178 scheduler.SetQueryCache(query_cache); 175 scheduler.SetQueryCache(query_cache);
@@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
199 196
200 UpdateDynamicStates(); 197 UpdateDynamicStates();
201 198
202 const auto& regs{maxwell3d.regs}; 199 const auto& regs{maxwell3d->regs};
203 const u32 num_instances{maxwell3d.mme_draw.instance_count}; 200 const u32 num_instances{maxwell3d->mme_draw.instance_count};
204 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; 201 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
205 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { 202 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
206 if (draw_params.is_indexed) { 203 if (draw_params.is_indexed) {
@@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
218void RasterizerVulkan::Clear() { 215void RasterizerVulkan::Clear() {
219 MICROPROFILE_SCOPE(Vulkan_Clearing); 216 MICROPROFILE_SCOPE(Vulkan_Clearing);
220 217
221 if (!maxwell3d.ShouldExecute()) { 218 if (!maxwell3d->ShouldExecute()) {
222 return; 219 return;
223 } 220 }
224 FlushWork(); 221 FlushWork();
225 222
226 query_cache.UpdateCounters(); 223 query_cache.UpdateCounters();
227 224
228 auto& regs = maxwell3d.regs; 225 auto& regs = maxwell3d->regs;
229 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 226 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
230 regs.clear_buffers.A; 227 regs.clear_buffers.A;
231 const bool use_depth = regs.clear_buffers.Z; 228 const bool use_depth = regs.clear_buffers.Z;
@@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
339 return; 336 return;
340 } 337 }
341 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; 338 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
342 pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); 339 pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
343 340
344 const auto& qmd{kepler_compute.launch_description}; 341 const auto& qmd{kepler_compute->launch_description};
345 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; 342 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
346 scheduler.RequestOutsideRenderPassOperationContext(); 343 scheduler.RequestOutsideRenderPassOperationContext();
347 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); 344 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
451 448
452void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 449void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
453 if (!gpu.IsAsync()) { 450 if (!gpu.IsAsync()) {
454 gpu_memory.Write<u32>(addr, value); 451 gpu_memory->Write<u32>(addr, value);
455 return; 452 return;
456 } 453 }
457 fence_manager.SignalSemaphore(addr, value); 454 auto paddr = gpu_memory->GetPointer(addr);
455 fence_manager.SignalSemaphore(paddr, value);
458} 456}
459 457
460void RasterizerVulkan::SignalSyncPoint(u32 value) { 458void RasterizerVulkan::SignalSyncPoint(u32 value) {
@@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
553 551
554void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 552void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
555 std::span<u8> memory) { 553 std::span<u8> memory) {
556 auto cpu_addr = gpu_memory.GpuToCpuAddress(address); 554 auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
557 if (!cpu_addr) [[unlikely]] { 555 if (!cpu_addr) [[unlikely]] {
558 gpu_memory.WriteBlock(address, memory.data(), copy_size); 556 gpu_memory->WriteBlock(address, memory.data(), copy_size);
559 return; 557 return;
560 } 558 }
561 gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); 559 gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
562 { 560 {
563 std::unique_lock<std::mutex> lock{buffer_cache.mutex}; 561 std::unique_lock<std::mutex> lock{buffer_cache.mutex};
564 if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { 562 if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
627} 625}
628 626
629void RasterizerVulkan::UpdateDynamicStates() { 627void RasterizerVulkan::UpdateDynamicStates() {
630 auto& regs = maxwell3d.regs; 628 auto& regs = maxwell3d->regs;
631 UpdateViewportsState(regs); 629 UpdateViewportsState(regs);
632 UpdateScissorsState(regs); 630 UpdateScissorsState(regs);
633 UpdateDepthBias(regs); 631 UpdateDepthBias(regs);
@@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
651} 649}
652 650
653void RasterizerVulkan::BeginTransformFeedback() { 651void RasterizerVulkan::BeginTransformFeedback() {
654 const auto& regs = maxwell3d.regs; 652 const auto& regs = maxwell3d->regs;
655 if (regs.tfb_enabled == 0) { 653 if (regs.tfb_enabled == 0) {
656 return; 654 return;
657 } 655 }
@@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
667} 665}
668 666
669void RasterizerVulkan::EndTransformFeedback() { 667void RasterizerVulkan::EndTransformFeedback() {
670 const auto& regs = maxwell3d.regs; 668 const auto& regs = maxwell3d->regs;
671 if (regs.tfb_enabled == 0) { 669 if (regs.tfb_enabled == 0) {
672 return; 670 return;
673 } 671 }
@@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
917} 915}
918 916
919void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { 917void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
920 auto& dirty{maxwell3d.dirty.flags}; 918 auto& dirty{maxwell3d->dirty.flags};
921 if (!dirty[Dirty::VertexInput]) { 919 if (!dirty[Dirty::VertexInput]) {
922 return; 920 return;
923 } 921 }
@@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
974 }); 972 });
975} 973}
976 974
975void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
976 CreateChannel(channel);
977 {
978 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
979 texture_cache.CreateChannel(channel);
980 buffer_cache.CreateChannel(channel);
981 }
982 pipeline_cache.CreateChannel(channel);
983 query_cache.CreateChannel(channel);
984 state_tracker.SetupTables(channel);
985}
986
987void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
988 const s32 channel_id = channel.bind_id;
989 BindToChannel(channel_id);
990 {
991 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
992 texture_cache.BindToChannel(channel_id);
993 buffer_cache.BindToChannel(channel_id);
994 }
995 pipeline_cache.BindToChannel(channel_id);
996 query_cache.BindToChannel(channel_id);
997 state_tracker.ChangeChannel(channel);
998 scheduler.InvalidateState();
999}
1000
1001void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
1002 EraseChannel(channel_id);
1003 {
1004 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
1005 texture_cache.EraseChannel(channel_id);
1006 buffer_cache.EraseChannel(channel_id);
1007 }
1008 pipeline_cache.EraseChannel(channel_id);
1009 query_cache.EraseChannel(channel_id);
1010}
1011
977} // namespace Vulkan 1012} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0370ea39b..642fe6576 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -8,6 +8,7 @@
8#include <boost/container/static_vector.hpp> 8#include <boost/container/static_vector.hpp>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/control/channel_state_cache.h"
11#include "video_core/engines/maxwell_dma.h" 12#include "video_core/engines/maxwell_dma.h"
12#include "video_core/rasterizer_accelerated.h" 13#include "video_core/rasterizer_accelerated.h"
13#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
@@ -54,13 +55,13 @@ private:
54 BufferCache& buffer_cache; 55 BufferCache& buffer_cache;
55}; 56};
56 57
57class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 58class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
59 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
58public: 60public:
59 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 61 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
60 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 62 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
61 ScreenInfo& screen_info_, const Device& device_, 63 const Device& device_, MemoryAllocator& memory_allocator_,
62 MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, 64 StateTracker& state_tracker_, Scheduler& scheduler_);
63 Scheduler& scheduler_);
64 ~RasterizerVulkan() override; 65 ~RasterizerVulkan() override;
65 66
66 void Draw(bool is_indexed, bool is_instanced) override; 67 void Draw(bool is_indexed, bool is_instanced) override;
@@ -99,6 +100,12 @@ public:
99 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 100 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
100 const VideoCore::DiskResourceLoadCallback& callback) override; 101 const VideoCore::DiskResourceLoadCallback& callback) override;
101 102
103 void InitializeChannel(Tegra::Control::ChannelState& channel) override;
104
105 void BindChannel(Tegra::Control::ChannelState& channel) override;
106
107 void ReleaseChannel(s32 channel_id) override;
108
102private: 109private:
103 static constexpr size_t MAX_TEXTURES = 192; 110 static constexpr size_t MAX_TEXTURES = 192;
104 static constexpr size_t MAX_IMAGES = 48; 111 static constexpr size_t MAX_IMAGES = 48;
@@ -134,9 +141,6 @@ private:
134 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); 141 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
135 142
136 Tegra::GPU& gpu; 143 Tegra::GPU& gpu;
137 Tegra::MemoryManager& gpu_memory;
138 Tegra::Engines::Maxwell3D& maxwell3d;
139 Tegra::Engines::KeplerCompute& kepler_compute;
140 144
141 ScreenInfo& screen_info; 145 ScreenInfo& screen_info;
142 const Device& device; 146 const Device& device;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9ad096431..a87bf8dd3 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -7,6 +7,7 @@
7 7
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "video_core/control/channel_state.h"
10#include "video_core/dirty_flags.h" 11#include "video_core/dirty_flags.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
@@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
174} 175}
175} // Anonymous namespace 176} // Anonymous namespace
176 177
177StateTracker::StateTracker(Tegra::GPU& gpu) 178void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
178 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 179 auto& tables{channel_state.maxwell_3d->dirty.tables};
179 auto& tables{gpu.Maxwell3D().dirty.tables};
180 SetupDirtyFlags(tables); 180 SetupDirtyFlags(tables);
181 SetupDirtyViewports(tables); 181 SetupDirtyViewports(tables);
182 SetupDirtyScissors(tables); 182 SetupDirtyScissors(tables);
@@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
199 SetupDirtyVertexBindings(tables); 199 SetupDirtyVertexBindings(tables);
200} 200}
201 201
202void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
203 flags = &channel_state.maxwell_3d->dirty.flags;
204}
205
206StateTracker::StateTracker(Tegra::GPU& gpu)
207 : flags{}, invalidation_flags{MakeInvalidationFlags()} {}
208
202} // namespace Vulkan 209} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index a85bc1c10..9f8a887f9 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -10,6 +10,12 @@
10#include "video_core/dirty_flags.h" 10#include "video_core/dirty_flags.h"
11#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
12 12
13namespace Tegra {
14namespace Control {
15struct ChannelState;
16}
17} // namespace Tegra
18
13namespace Vulkan { 19namespace Vulkan {
14 20
15namespace Dirty { 21namespace Dirty {
@@ -56,16 +62,16 @@ public:
56 explicit StateTracker(Tegra::GPU& gpu); 62 explicit StateTracker(Tegra::GPU& gpu);
57 63
58 void InvalidateCommandBufferState() { 64 void InvalidateCommandBufferState() {
59 flags |= invalidation_flags; 65 (*flags) |= invalidation_flags;
60 current_topology = INVALID_TOPOLOGY; 66 current_topology = INVALID_TOPOLOGY;
61 } 67 }
62 68
63 void InvalidateViewports() { 69 void InvalidateViewports() {
64 flags[Dirty::Viewports] = true; 70 (*flags)[Dirty::Viewports] = true;
65 } 71 }
66 72
67 void InvalidateScissors() { 73 void InvalidateScissors() {
68 flags[Dirty::Scissors] = true; 74 (*flags)[Dirty::Scissors] = true;
69 } 75 }
70 76
71 bool TouchViewports() { 77 bool TouchViewports() {
@@ -139,16 +145,20 @@ public:
139 return has_changed; 145 return has_changed;
140 } 146 }
141 147
148 void SetupTables(Tegra::Control::ChannelState& channel_state);
149
150 void ChangeChannel(Tegra::Control::ChannelState& channel_state);
151
142private: 152private:
143 static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); 153 static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
144 154
145 bool Exchange(std::size_t id, bool new_value) const noexcept { 155 bool Exchange(std::size_t id, bool new_value) const noexcept {
146 const bool is_dirty = flags[id]; 156 const bool is_dirty = (*flags)[id];
147 flags[id] = new_value; 157 (*flags)[id] = new_value;
148 return is_dirty; 158 return is_dirty;
149 } 159 }
150 160
151 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; 161 Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
152 Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; 162 Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
153 Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; 163 Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
154}; 164};
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 164e4ee0e..f53066579 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -8,6 +8,7 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "shader_recompiler/frontend/maxwell/control_flow.h" 9#include "shader_recompiler/frontend/maxwell/control_flow.h"
10#include "shader_recompiler/object_pool.h" 10#include "shader_recompiler/object_pool.h"
11#include "video_core/control/channel_state.h"
11#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
12#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
13#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
@@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
33 RemovePendingShaders(); 34 RemovePendingShaders();
34} 35}
35 36
36ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, 37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
37 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
38 Tegra::Engines::KeplerCompute& kepler_compute_)
39 : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
40 rasterizer{rasterizer_} {}
41 38
42bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { 39bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
43 auto& dirty{maxwell3d.dirty.flags}; 40 auto& dirty{maxwell3d->dirty.flags};
44 if (!dirty[VideoCommon::Dirty::Shaders]) { 41 if (!dirty[VideoCommon::Dirty::Shaders]) {
45 return last_shaders_valid; 42 return last_shaders_valid;
46 } 43 }
47 dirty[VideoCommon::Dirty::Shaders] = false; 44 dirty[VideoCommon::Dirty::Shaders] = false;
48 45
49 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 46 const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
50 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { 47 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
51 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 48 if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
52 unique_hashes[index] = 0; 49 unique_hashes[index] = 0;
53 continue; 50 continue;
54 } 51 }
55 const auto& shader_config{maxwell3d.regs.shader_config[index]}; 52 const auto& shader_config{maxwell3d->regs.shader_config[index]};
56 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; 53 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
57 const GPUVAddr shader_addr{base_addr + shader_config.offset}; 54 const GPUVAddr shader_addr{base_addr + shader_config.offset};
58 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; 55 const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
59 if (!cpu_shader_addr) { 56 if (!cpu_shader_addr) {
60 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); 57 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
61 last_shaders_valid = false; 58 last_shaders_valid = false;
@@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
64 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; 61 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
65 if (!shader_info) { 62 if (!shader_info) {
66 const u32 start_address{shader_config.offset}; 63 const u32 start_address{shader_config.offset};
67 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; 64 GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
68 shader_info = MakeShaderInfo(env, *cpu_shader_addr); 65 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
69 } 66 }
70 shader_infos[index] = shader_info; 67 shader_infos[index] = shader_info;
@@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
75} 72}
76 73
77const ShaderInfo* ShaderCache::ComputeShader() { 74const ShaderInfo* ShaderCache::ComputeShader() {
78 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 75 const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
79 const auto& qmd{kepler_compute.launch_description}; 76 const auto& qmd{kepler_compute->launch_description};
80 const GPUVAddr shader_addr{program_base + qmd.program_start}; 77 const GPUVAddr shader_addr{program_base + qmd.program_start};
81 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; 78 const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
82 if (!cpu_shader_addr) { 79 if (!cpu_shader_addr) {
83 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); 80 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
84 return nullptr; 81 return nullptr;
@@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
86 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { 83 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
87 return shader; 84 return shader;
88 } 85 }
89 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; 86 ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
90 return MakeShaderInfo(env, *cpu_shader_addr); 87 return MakeShaderInfo(env, *cpu_shader_addr);
91} 88}
92 89
93void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, 90void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
94 const std::array<u64, NUM_PROGRAMS>& unique_hashes) { 91 const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
95 size_t env_index{}; 92 size_t env_index{};
96 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 93 const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
97 for (size_t index = 0; index < NUM_PROGRAMS; ++index) { 94 for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
98 if (unique_hashes[index] == 0) { 95 if (unique_hashes[index] == 0) {
99 continue; 96 continue;
100 } 97 }
101 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; 98 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
102 auto& env{result.envs[index]}; 99 auto& env{result.envs[index]};
103 const u32 start_address{maxwell3d.regs.shader_config[index].offset}; 100 const u32 start_address{maxwell3d->regs.shader_config[index].offset};
104 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; 101 env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
105 env.SetCachedSize(shader_infos[index]->size_bytes); 102 env.SetCachedSize(shader_infos[index]->size_bytes);
106 result.env_ptrs[env_index++] = &env; 103 result.env_ptrs[env_index++] = &env;
107 } 104 }
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f67cea8c4..a4391202d 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -12,6 +12,7 @@
12#include <vector> 12#include <vector>
13 13
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "video_core/control/channel_state_cache.h"
15#include "video_core/rasterizer_interface.h" 16#include "video_core/rasterizer_interface.h"
16#include "video_core/shader_environment.h" 17#include "video_core/shader_environment.h"
17 18
@@ -19,6 +20,10 @@ namespace Tegra {
19class MemoryManager; 20class MemoryManager;
20} 21}
21 22
23namespace Tegra::Control {
24struct ChannelState;
25}
26
22namespace VideoCommon { 27namespace VideoCommon {
23 28
24class GenericEnvironment; 29class GenericEnvironment;
@@ -28,7 +33,7 @@ struct ShaderInfo {
28 size_t size_bytes{}; 33 size_t size_bytes{};
29}; 34};
30 35
31class ShaderCache { 36class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
32 static constexpr u64 YUZU_PAGEBITS = 14; 37 static constexpr u64 YUZU_PAGEBITS = 14;
33 static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS; 38 static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
34 39
@@ -71,9 +76,7 @@ protected:
71 } 76 }
72 }; 77 };
73 78
74 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, 79 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
75 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
76 Tegra::Engines::KeplerCompute& kepler_compute_);
77 80
78 /// @brief Update the hashes and information of shader stages 81 /// @brief Update the hashes and information of shader stages
79 /// @param unique_hashes Shader hashes to store into when a stage is enabled 82 /// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -88,10 +91,6 @@ protected:
88 void GetGraphicsEnvironments(GraphicsEnvironments& result, 91 void GetGraphicsEnvironments(GraphicsEnvironments& result,
89 const std::array<u64, NUM_PROGRAMS>& unique_hashes); 92 const std::array<u64, NUM_PROGRAMS>& unique_hashes);
90 93
91 Tegra::MemoryManager& gpu_memory;
92 Tegra::Engines::Maxwell3D& maxwell3d;
93 Tegra::Engines::KeplerCompute& kepler_compute;
94
95 std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; 94 std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
96 bool last_shaders_valid = false; 95 bool last_shaders_valid = false;
97 96
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1f85ec9da..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
88 u32 scale_rating = 0; 88 u32 scale_rating = 0;
89 u64 scale_tick = 0; 89 u64 scale_tick = 0;
90 bool has_scaled = false; 90 bool has_scaled = false;
91
92 size_t channel = 0;
93
91 ImageFlagBits flags = ImageFlagBits::CpuModified; 94 ImageFlagBits flags = ImageFlagBits::CpuModified;
92 95
93 GPUVAddr gpu_addr = 0; 96 GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1dbe01bc0..2731aead0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/settings.h" 9#include "common/settings.h"
10#include "video_core/control/channel_state.h"
10#include "video_core/dirty_flags.h" 11#include "video_core/dirty_flags.h"
11#include "video_core/engines/kepler_compute.h" 12#include "video_core/engines/kepler_compute.h"
12#include "video_core/texture_cache/image_view_base.h" 13#include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
29using namespace Common::Literals; 30using namespace Common::Literals;
30 31
31template <class P> 32template <class P>
32TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, 33TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
33 Tegra::Engines::Maxwell3D& maxwell3d_, 34 : runtime{runtime_}, rasterizer{rasterizer_} {
34 Tegra::Engines::KeplerCompute& kepler_compute_,
35 Tegra::MemoryManager& gpu_memory_)
36 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
37 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
38 // Configure null sampler 35 // Configure null sampler
39 TSCEntry sampler_descriptor{}; 36 TSCEntry sampler_descriptor{};
40 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); 37 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
42 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); 39 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
43 sampler_descriptor.cubemap_anisotropy.Assign(1); 40 sampler_descriptor.cubemap_anisotropy.Assign(1);
44 41
42 // Setup channels
43 current_channel_id = UNSET_CHANNEL;
44 state = nullptr;
45 maxwell3d = nullptr;
46 kepler_compute = nullptr;
47 gpu_memory = nullptr;
48
45 // Make sure the first index is reserved for the null resources 49 // Make sure the first index is reserved for the null resources
46 // This way the null resource becomes a compile time constant 50 // This way the null resource becomes a compile time constant
47 void(slot_images.insert(NullImageParams{})); 51 void(slot_images.insert(NullImageParams{}));
@@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
93 const auto copies = FullDownloadCopies(image.info); 97 const auto copies = FullDownloadCopies(image.info);
94 image.DownloadMemory(map, copies); 98 image.DownloadMemory(map, copies);
95 runtime.Finish(); 99 runtime.Finish();
96 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 100 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
97 } 101 }
98 if (True(image.flags & ImageFlagBits::Tracked)) { 102 if (True(image.flags & ImageFlagBits::Tracked)) {
99 UntrackImage(image, image_id); 103 UntrackImage(image, image_id);
@@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
152template <class P> 156template <class P>
153template <bool has_blacklists> 157template <bool has_blacklists>
154void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { 158void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
155 FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); 159 FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
160 views);
156} 161}
157 162
158template <class P> 163template <class P>
159void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { 164void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
160 FillImageViews<true>(compute_image_table, compute_image_view_ids, views); 165 FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
161} 166}
162 167
163template <class P> 168template <class P>
164typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 169typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
165 if (index > graphics_sampler_table.Limit()) { 170 if (index > state->graphics_sampler_table.Limit()) {
166 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 171 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
167 return &slot_samplers[NULL_SAMPLER_ID]; 172 return &slot_samplers[NULL_SAMPLER_ID];
168 } 173 }
169 const auto [descriptor, is_new] = graphics_sampler_table.Read(index); 174 const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
170 SamplerId& id = graphics_sampler_ids[index]; 175 SamplerId& id = state->graphics_sampler_ids[index];
171 if (is_new) { 176 if (is_new) {
172 id = FindSampler(descriptor); 177 id = FindSampler(descriptor);
173 } 178 }
@@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
176 181
177template <class P> 182template <class P>
178typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 183typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
179 if (index > compute_sampler_table.Limit()) { 184 if (index > state->compute_sampler_table.Limit()) {
180 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 185 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
181 return &slot_samplers[NULL_SAMPLER_ID]; 186 return &slot_samplers[NULL_SAMPLER_ID];
182 } 187 }
183 const auto [descriptor, is_new] = compute_sampler_table.Read(index); 188 const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
184 SamplerId& id = compute_sampler_ids[index]; 189 SamplerId& id = state->compute_sampler_ids[index];
185 if (is_new) { 190 if (is_new) {
186 id = FindSampler(descriptor); 191 id = FindSampler(descriptor);
187 } 192 }
@@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
191template <class P> 196template <class P>
192void TextureCache<P>::SynchronizeGraphicsDescriptors() { 197void TextureCache<P>::SynchronizeGraphicsDescriptors() {
193 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; 198 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
194 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; 199 const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
195 const u32 tic_limit = maxwell3d.regs.tic.limit; 200 const u32 tic_limit = maxwell3d->regs.tic.limit;
196 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; 201 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
197 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { 202 if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
198 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 203 state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
199 } 204 }
200 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { 205 if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
201 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 206 state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
202 } 207 }
203} 208}
204 209
205template <class P> 210template <class P>
206void TextureCache<P>::SynchronizeComputeDescriptors() { 211void TextureCache<P>::SynchronizeComputeDescriptors() {
207 const bool linked_tsc = kepler_compute.launch_description.linked_tsc; 212 const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
208 const u32 tic_limit = kepler_compute.regs.tic.limit; 213 const u32 tic_limit = kepler_compute->regs.tic.limit;
209 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; 214 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
210 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); 215 const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
211 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { 216 if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
212 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 217 state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
213 } 218 }
214 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { 219 if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
215 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 220 state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
216 } 221 }
217} 222}
218 223
219template <class P> 224template <class P>
220bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { 225bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
221 auto& flags = maxwell3d.dirty.flags; 226 auto& flags = maxwell3d->dirty.flags;
222 u32 scale_rating = 0; 227 u32 scale_rating = 0;
223 bool rescaled = false; 228 bool rescaled = false;
224 std::array<ImageId, NUM_RT> tmp_color_images{}; 229 std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
315template <class P> 320template <class P>
316void TextureCache<P>::UpdateRenderTargets(bool is_clear) { 321void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
317 using namespace VideoCommon::Dirty; 322 using namespace VideoCommon::Dirty;
318 auto& flags = maxwell3d.dirty.flags; 323 auto& flags = maxwell3d->dirty.flags;
319 if (!flags[Dirty::RenderTargets]) { 324 if (!flags[Dirty::RenderTargets]) {
320 for (size_t index = 0; index < NUM_RT; ++index) { 325 for (size_t index = 0; index < NUM_RT; ++index) {
321 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; 326 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
342 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); 347 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
343 348
344 for (size_t index = 0; index < NUM_RT; ++index) { 349 for (size_t index = 0; index < NUM_RT; ++index) {
345 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); 350 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
346 } 351 }
347 u32 up_scale = 1; 352 u32 up_scale = 1;
348 u32 down_shift = 0; 353 u32 down_shift = 0;
@@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
351 down_shift = Settings::values.resolution_info.down_shift; 356 down_shift = Settings::values.resolution_info.down_shift;
352 } 357 }
353 render_targets.size = Extent2D{ 358 render_targets.size = Extent2D{
354 (maxwell3d.regs.render_area.width * up_scale) >> down_shift, 359 (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
355 (maxwell3d.regs.render_area.height * up_scale) >> down_shift, 360 (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
356 }; 361 };
357 362
358 flags[Dirty::DepthBiasGlobal] = true; 363 flags[Dirty::DepthBiasGlobal] = true;
@@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
458 const auto copies = FullDownloadCopies(image.info); 463 const auto copies = FullDownloadCopies(image.info);
459 image.DownloadMemory(map, copies); 464 image.DownloadMemory(map, copies);
460 runtime.Finish(); 465 runtime.Finish();
461 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 466 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
462 } 467 }
463} 468}
464 469
@@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
655 for (const ImageId image_id : download_ids) { 660 for (const ImageId image_id : download_ids) {
656 const ImageBase& image = slot_images[image_id]; 661 const ImageBase& image = slot_images[image_id];
657 const auto copies = FullDownloadCopies(image.info); 662 const auto copies = FullDownloadCopies(image.info);
658 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); 663 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
659 download_map.offset += image.unswizzled_size_bytes; 664 download_map.offset += image.unswizzled_size_bytes;
660 download_span = download_span.subspan(image.unswizzled_size_bytes); 665 download_span = download_span.subspan(image.unswizzled_size_bytes);
661 } 666 }
@@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
714 const GPUVAddr gpu_addr = image.gpu_addr; 719 const GPUVAddr gpu_addr = image.gpu_addr;
715 720
716 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 721 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
717 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 722 gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
718 const auto uploads = FullUploadSwizzles(image.info); 723 const auto uploads = FullUploadSwizzles(image.info);
719 runtime.AccelerateImageUpload(image, staging, uploads); 724 runtime.AccelerateImageUpload(image, staging, uploads);
720 } else if (True(image.flags & ImageFlagBits::Converted)) { 725 } else if (True(image.flags & ImageFlagBits::Converted)) {
721 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 726 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
722 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 727 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
723 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 728 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
724 image.UploadMemory(staging, copies); 729 image.UploadMemory(staging, copies);
725 } else { 730 } else {
726 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 731 const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
727 image.UploadMemory(staging, copies); 732 image.UploadMemory(staging, copies);
728 } 733 }
729} 734}
730 735
731template <class P> 736template <class P>
732ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 737ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
733 if (!IsValidEntry(gpu_memory, config)) { 738 if (!IsValidEntry(*gpu_memory, config)) {
734 return NULL_IMAGE_VIEW_ID; 739 return NULL_IMAGE_VIEW_ID;
735 } 740 }
736 const auto [pair, is_new] = image_views.try_emplace(config); 741 const auto [pair, is_new] = state->image_views.try_emplace(config);
737 ImageViewId& image_view_id = pair->second; 742 ImageViewId& image_view_id = pair->second;
738 if (is_new) { 743 if (is_new) {
739 image_view_id = CreateImageView(config); 744 image_view_id = CreateImageView(config);
@@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
777template <class P> 782template <class P>
778ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 783ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
779 RelaxedOptions options) { 784 RelaxedOptions options) {
780 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 785 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
781 if (!cpu_addr) { 786 if (!cpu_addr) {
782 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 787 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
783 if (!cpu_addr) { 788 if (!cpu_addr) {
784 return ImageId{}; 789 return ImageId{};
785 } 790 }
@@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
860 image.scale_tick = frame_tick + 1; 865 image.scale_tick = frame_tick + 1;
861 } 866 }
862 const std::span<const ImageViewId> image_view_ids = image.image_view_ids; 867 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
863 auto& dirty = maxwell3d.dirty.flags; 868 auto& dirty = maxwell3d->dirty.flags;
864 dirty[Dirty::RenderTargets] = true; 869 dirty[Dirty::RenderTargets] = true;
865 dirty[Dirty::ZetaBuffer] = true; 870 dirty[Dirty::ZetaBuffer] = true;
866 for (size_t rt = 0; rt < NUM_RT; ++rt) { 871 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
881 image.image_view_ids.clear(); 886 image.image_view_ids.clear();
882 image.image_view_infos.clear(); 887 image.image_view_infos.clear();
883 if constexpr (ENABLE_VALIDATION) { 888 if constexpr (ENABLE_VALIDATION) {
884 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 889 std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
885 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 890 std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
886 } 891 }
887 graphics_image_table.Invalidate(); 892 state->graphics_image_table.Invalidate();
888 compute_image_table.Invalidate(); 893 state->compute_image_table.Invalidate();
889 has_deleted_images = true; 894 has_deleted_images = true;
890} 895}
891 896
@@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
929template <class P> 934template <class P>
930ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 935ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
931 RelaxedOptions options) { 936 RelaxedOptions options) {
932 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 937 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
933 if (!cpu_addr) { 938 if (!cpu_addr) {
934 const auto size = CalculateGuestSizeInBytes(info); 939 const auto size = CalculateGuestSizeInBytes(info);
935 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); 940 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
936 if (!cpu_addr) { 941 if (!cpu_addr) {
937 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; 942 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
938 virtual_invalid_space += Common::AlignUp(size, 32); 943 virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1050 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1055 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1051 Image& new_image = slot_images[new_image_id]; 1056 Image& new_image = slot_images[new_image_id];
1052 1057
1053 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { 1058 if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1054 new_image.flags |= ImageFlagBits::Sparse; 1059 new_image.flags |= ImageFlagBits::Sparse;
1055 } 1060 }
1056 1061
@@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1192 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { 1197 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1193 return NULL_SAMPLER_ID; 1198 return NULL_SAMPLER_ID;
1194 } 1199 }
1195 const auto [pair, is_new] = samplers.try_emplace(config); 1200 const auto [pair, is_new] = state->samplers.try_emplace(config);
1196 if (is_new) { 1201 if (is_new) {
1197 pair->second = slot_samplers.insert(runtime, config); 1202 pair->second = slot_samplers.insert(runtime, config);
1198 } 1203 }
@@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1201 1206
1202template <class P> 1207template <class P>
1203ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { 1208ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1204 const auto& regs = maxwell3d.regs; 1209 const auto& regs = maxwell3d->regs;
1205 if (index >= regs.rt_control.count) { 1210 if (index >= regs.rt_control.count) {
1206 return ImageViewId{}; 1211 return ImageViewId{};
1207 } 1212 }
@@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1219 1224
1220template <class P> 1225template <class P>
1221ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { 1226ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1222 const auto& regs = maxwell3d.regs; 1227 const auto& regs = maxwell3d->regs;
1223 if (!regs.zeta_enable) { 1228 if (!regs.zeta_enable) {
1224 return ImageViewId{}; 1229 return ImageViewId{};
1225 } 1230 }
@@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
1321 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1326 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1322 boost::container::small_vector<ImageId, 8> images; 1327 boost::container::small_vector<ImageId, 8> images;
1323 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { 1328 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1324 const auto it = gpu_page_table.find(page); 1329 const auto it = state->gpu_page_table.find(page);
1325 if (it == gpu_page_table.end()) { 1330 if (it == state->gpu_page_table.end()) {
1326 if constexpr (BOOL_BREAK) { 1331 if constexpr (BOOL_BREAK) {
1327 return false; 1332 return false;
1328 } else { 1333 } else {
@@ -1403,9 +1408,9 @@ template <typename Func>
1403void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1408void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1404 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1409 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1405 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; 1410 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1406 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); 1411 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1407 for (const auto& [gpu_addr, size] : segments) { 1412 for (const auto& [gpu_addr, size] : segments) {
1408 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1413 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1409 ASSERT(cpu_addr); 1414 ASSERT(cpu_addr);
1410 if constexpr (RETURNS_BOOL) { 1415 if constexpr (RETURNS_BOOL) {
1411 if (func(gpu_addr, *cpu_addr, size)) { 1416 if (func(gpu_addr, *cpu_addr, size)) {
@@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1449 image.lru_index = lru_cache.Insert(image_id, frame_tick); 1454 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1450 1455
1451 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1456 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1452 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); 1457 [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
1453 if (False(image.flags & ImageFlagBits::Sparse)) { 1458 if (False(image.flags & ImageFlagBits::Sparse)) {
1454 auto map_id = 1459 auto map_id =
1455 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); 1460 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1497 } 1502 }
1498 image_ids.erase(vector_it); 1503 image_ids.erase(vector_it);
1499 }; 1504 };
1500 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1505 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1501 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); 1506 clear_page_table(page, state->gpu_page_table);
1507 });
1502 if (False(image.flags & ImageFlagBits::Sparse)) { 1508 if (False(image.flags & ImageFlagBits::Sparse)) {
1503 const auto map_id = image.map_view_id; 1509 const auto map_id = image.map_view_id;
1504 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { 1510 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1631 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); 1637 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1632 1638
1633 // Mark render targets as dirty 1639 // Mark render targets as dirty
1634 auto& dirty = maxwell3d.dirty.flags; 1640 auto& dirty = maxwell3d->dirty.flags;
1635 dirty[Dirty::RenderTargets] = true; 1641 dirty[Dirty::RenderTargets] = true;
1636 dirty[Dirty::ZetaBuffer] = true; 1642 dirty[Dirty::ZetaBuffer] = true;
1637 for (size_t rt = 0; rt < NUM_RT; ++rt) { 1643 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1681 if (alloc_images.empty()) { 1687 if (alloc_images.empty()) {
1682 image_allocs_table.erase(alloc_it); 1688 image_allocs_table.erase(alloc_it);
1683 } 1689 }
1684 if constexpr (ENABLE_VALIDATION) { 1690 for (auto& this_state : channel_storage) {
1685 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 1691 if constexpr (ENABLE_VALIDATION) {
1686 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 1692 std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
1693 std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
1694 }
1695 this_state.graphics_image_table.Invalidate();
1696 this_state.compute_image_table.Invalidate();
1687 } 1697 }
1688 graphics_image_table.Invalidate();
1689 compute_image_table.Invalidate();
1690 has_deleted_images = true; 1698 has_deleted_images = true;
1691} 1699}
1692 1700
1693template <class P> 1701template <class P>
1694void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { 1702void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1695 auto it = image_views.begin(); 1703 auto it = state->image_views.begin();
1696 while (it != image_views.end()) { 1704 while (it != state->image_views.end()) {
1697 const auto found = std::ranges::find(removed_views, it->second); 1705 const auto found = std::ranges::find(removed_views, it->second);
1698 if (found != removed_views.end()) { 1706 if (found != removed_views.end()) {
1699 it = image_views.erase(it); 1707 it = state->image_views.erase(it);
1700 } else { 1708 } else {
1701 ++it; 1709 ++it;
1702 } 1710 }
@@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1943 const ImageViewBase& image_view = slot_image_views[id]; 1951 const ImageViewBase& image_view = slot_image_views[id];
1944 const ImageBase& image = slot_images[image_view.image_id]; 1952 const ImageBase& image = slot_images[image_view.image_id];
1945 const Extent3D size = image_view.size; 1953 const Extent3D size = image_view.size;
1946 const auto& regs = maxwell3d.regs; 1954 const auto& regs = maxwell3d->regs;
1947 const auto& scissor = regs.scissor_test[0]; 1955 const auto& scissor = regs.scissor_test[0];
1948 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { 1956 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1949 // Images with multiple resources can't be cleared in a single call 1957 // Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1958 scissor.max_y >= size.height; 1966 scissor.max_y >= size.height;
1959} 1967}
1960 1968
1969template <class P>
1970TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
1971 : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
1972 gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
1973 graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
1974 gpu_memory} {}
1975
1976template <class P>
1977void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
1978 ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
1979 auto new_id = [this, &channel]() {
1980 if (!free_channel_ids.empty()) {
1981 auto id = free_channel_ids.front();
1982 free_channel_ids.pop_front();
1983 new (&channel_storage[id]) ChannelInfo(channel);
1984 return id;
1985 }
1986 channel_storage.emplace_back(channel);
1987 return channel_storage.size() - 1;
1988 }();
1989 channel_map.emplace(channel.bind_id, new_id);
1990 if (current_channel_id != UNSET_CHANNEL) {
1991 state = &channel_storage[current_channel_id];
1992 }
1993}
1994
1995/// Bind a channel for execution.
1996template <class P>
1997void TextureCache<P>::BindToChannel(s32 id) {
1998 auto it = channel_map.find(id);
1999 ASSERT(it != channel_map.end() && id >= 0);
2000 current_channel_id = it->second;
2001 state = &channel_storage[current_channel_id];
2002 maxwell3d = &state->maxwell3d;
2003 kepler_compute = &state->kepler_compute;
2004 gpu_memory = &state->gpu_memory;
2005}
2006
2007/// Erase channel's state.
2008template <class P>
2009void TextureCache<P>::EraseChannel(s32 id) {
2010 const auto it = channel_map.find(id);
2011 ASSERT(it != channel_map.end() && id >= 0);
2012 const auto this_id = it->second;
2013 free_channel_ids.push_back(this_id);
2014 channel_map.erase(it);
2015 if (this_id == current_channel_id) {
2016 current_channel_id = UNSET_CHANNEL;
2017 state = nullptr;
2018 maxwell3d = nullptr;
2019 kepler_compute = nullptr;
2020 gpu_memory = nullptr;
2021 } else if (current_channel_id != UNSET_CHANNEL) {
2022 state = &channel_storage[current_channel_id];
2023 }
2024}
2025
1961} // namespace VideoCommon 2026} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7e6c6cef2..69efcb718 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <deque>
7#include <limits>
6#include <mutex> 8#include <mutex>
7#include <span> 9#include <span>
8#include <type_traits> 10#include <type_traits>
@@ -26,6 +28,10 @@
26#include "video_core/texture_cache/types.h" 28#include "video_core/texture_cache/types.h"
27#include "video_core/textures/texture.h" 29#include "video_core/textures/texture.h"
28 30
31namespace Tegra::Control {
32struct ChannelState;
33}
34
29namespace VideoCommon { 35namespace VideoCommon {
30 36
31using Tegra::Texture::SwizzleSource; 37using Tegra::Texture::SwizzleSource;
@@ -58,6 +64,8 @@ class TextureCache {
58 /// True when the API can provide info about the memory of the device. 64 /// True when the API can provide info about the memory of the device.
59 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 65 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
60 66
67 static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
68
61 static constexpr s64 TARGET_THRESHOLD = 4_GiB; 69 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
62 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; 70 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
63 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; 71 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -85,8 +93,7 @@ class TextureCache {
85 }; 93 };
86 94
87public: 95public:
88 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, 96 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
89 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
90 97
91 /// Notify the cache that a new frame has been queued 98 /// Notify the cache that a new frame has been queued
92 void TickFrame(); 99 void TickFrame();
@@ -171,6 +178,15 @@ public:
171 178
172 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; 179 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
173 180
181 /// Create channel state.
182 void CreateChannel(struct Tegra::Control::ChannelState& channel);
183
184 /// Bind a channel for execution.
185 void BindToChannel(s32 id);
186
187 /// Erase channel's state.
188 void EraseChannel(s32 id);
189
174 std::mutex mutex; 190 std::mutex mutex;
175 191
176private: 192private:
@@ -338,31 +354,52 @@ private:
338 u64 GetScaledImageSizeBytes(ImageBase& image); 354 u64 GetScaledImageSizeBytes(ImageBase& image);
339 355
340 Runtime& runtime; 356 Runtime& runtime;
341 VideoCore::RasterizerInterface& rasterizer;
342 Tegra::Engines::Maxwell3D& maxwell3d;
343 Tegra::Engines::KeplerCompute& kepler_compute;
344 Tegra::MemoryManager& gpu_memory;
345 357
346 DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; 358 struct ChannelInfo {
347 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; 359 ChannelInfo() = delete;
348 std::vector<SamplerId> graphics_sampler_ids; 360 ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
349 std::vector<ImageViewId> graphics_image_view_ids; 361 ChannelInfo(const ChannelInfo& state) = delete;
362 ChannelInfo& operator=(const ChannelInfo&) = delete;
363 ChannelInfo(ChannelInfo&& other) noexcept = default;
364 ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
365
366 Tegra::Engines::Maxwell3D& maxwell3d;
367 Tegra::Engines::KeplerCompute& kepler_compute;
368 Tegra::MemoryManager& gpu_memory;
369
370 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
371 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
372 std::vector<SamplerId> graphics_sampler_ids;
373 std::vector<ImageViewId> graphics_image_view_ids;
350 374
351 DescriptorTable<TICEntry> compute_image_table{gpu_memory}; 375 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
352 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; 376 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
353 std::vector<SamplerId> compute_sampler_ids; 377 std::vector<SamplerId> compute_sampler_ids;
354 std::vector<ImageViewId> compute_image_view_ids; 378 std::vector<ImageViewId> compute_image_view_ids;
379
380 std::unordered_map<TICEntry, ImageViewId> image_views;
381 std::unordered_map<TSCEntry, SamplerId> samplers;
382
383 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
384 };
385
386 std::deque<ChannelInfo> channel_storage;
387 std::deque<size_t> free_channel_ids;
388 std::unordered_map<s32, size_t> channel_map;
389
390 ChannelInfo* state;
391 size_t current_channel_id{UNSET_CHANNEL};
392 VideoCore::RasterizerInterface& rasterizer;
393 Tegra::Engines::Maxwell3D* maxwell3d;
394 Tegra::Engines::KeplerCompute* kepler_compute;
395 Tegra::MemoryManager* gpu_memory;
355 396
356 RenderTargets render_targets; 397 RenderTargets render_targets;
357 398
358 std::unordered_map<TICEntry, ImageViewId> image_views;
359 std::unordered_map<TSCEntry, SamplerId> samplers;
360 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 399 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
361 400
362 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 401 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
363 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
364 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; 402 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
365
366 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 403 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
367 404
368 VAddr virtual_invalid_space{}; 405 VAddr virtual_invalid_space{};