diff options
50 files changed, 1461 insertions, 809 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index b1c683511..9946ce624 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -10,13 +10,17 @@ | |||
| 10 | #include "core/hle/service/nvdrv/core/container.h" | 10 | #include "core/hle/service/nvdrv/core/container.h" |
| 11 | #include "core/hle/service/nvdrv/core/nvmap.h" | 11 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | 12 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" |
| 13 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | ||
| 14 | #include "core/hle/service/nvdrv/nvdrv.h" | ||
| 15 | #include "video_core/control/channel_state.h" | ||
| 13 | #include "video_core/memory_manager.h" | 16 | #include "video_core/memory_manager.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 15 | 18 | ||
| 16 | namespace Service::Nvidia::Devices { | 19 | namespace Service::Nvidia::Devices { |
| 17 | 20 | ||
| 18 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core) | 21 | nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) |
| 19 | : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {} | 22 | : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, |
| 23 | gmmu{std::make_shared<Tegra::MemoryManager>(system)} {} | ||
| 20 | nvhost_as_gpu::~nvhost_as_gpu() = default; | 24 | nvhost_as_gpu::~nvhost_as_gpu() = default; |
| 21 | 25 | ||
| 22 | NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 26 | NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| @@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector< | |||
| 102 | 106 | ||
| 103 | const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; | 107 | const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; |
| 104 | if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { | 108 | if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) { |
| 105 | params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size); | 109 | params.offset = *(gmmu->AllocateFixed(params.offset, size)); |
| 106 | } else { | 110 | } else { |
| 107 | params.offset = system.GPU().MemoryManager().Allocate(size, params.align); | 111 | params.offset = gmmu->Allocate(size, params.align); |
| 108 | } | 112 | } |
| 109 | 113 | ||
| 110 | auto result = NvResult::Success; | 114 | auto result = NvResult::Success; |
| @@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>& | |||
| 124 | LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, | 128 | LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset, |
| 125 | params.pages, params.page_size); | 129 | params.pages, params.page_size); |
| 126 | 130 | ||
| 127 | system.GPU().MemoryManager().Unmap(params.offset, | 131 | gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size); |
| 128 | static_cast<std::size_t>(params.pages) * params.page_size); | ||
| 129 | 132 | ||
| 130 | std::memcpy(output.data(), ¶ms, output.size()); | 133 | std::memcpy(output.data(), ¶ms, output.size()); |
| 131 | return NvResult::Success; | 134 | return NvResult::Success; |
| @@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||
| 148 | // If nvmap handle is null, we should unmap instead. | 151 | // If nvmap handle is null, we should unmap instead. |
| 149 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; | 152 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; |
| 150 | const auto size{static_cast<u64>(entry.pages) << 0x10}; | 153 | const auto size{static_cast<u64>(entry.pages) << 0x10}; |
| 151 | system.GPU().MemoryManager().Unmap(offset, size); | 154 | gmmu->Unmap(offset, size); |
| 152 | continue; | 155 | continue; |
| 153 | } | 156 | } |
| 154 | 157 | ||
| @@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||
| 162 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; | 165 | const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10}; |
| 163 | const auto size{static_cast<u64>(entry.pages) << 0x10}; | 166 | const auto size{static_cast<u64>(entry.pages) << 0x10}; |
| 164 | const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; | 167 | const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10}; |
| 165 | const auto addr{ | 168 | const auto addr{gmmu->Map(object->address + map_offset, offset, size)}; |
| 166 | system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)}; | ||
| 167 | 169 | ||
| 168 | if (!addr) { | 170 | if (!addr) { |
| 169 | LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); | 171 | LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!"); |
| @@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 186 | params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, | 188 | params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size, |
| 187 | params.offset); | 189 | params.offset); |
| 188 | 190 | ||
| 189 | auto& gpu = system.GPU(); | ||
| 190 | if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { | 191 | if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) { |
| 191 | if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { | 192 | if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) { |
| 192 | const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; | 193 | const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)}; |
| 193 | const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; | 194 | const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)}; |
| 194 | 195 | ||
| 195 | if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) { | 196 | if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) { |
| 196 | LOG_CRITICAL(Service_NVDRV, | 197 | LOG_CRITICAL(Service_NVDRV, |
| 197 | "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " | 198 | "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, " |
| 198 | "mapping_size = {}, offset={}", | 199 | "mapping_size = {}, offset={}", |
| @@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||
| 238 | 239 | ||
| 239 | const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; | 240 | const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None}; |
| 240 | if (is_alloc) { | 241 | if (is_alloc) { |
| 241 | params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size); | 242 | params.offset = gmmu->MapAllocate(physical_address, size, page_size); |
| 242 | } else { | 243 | } else { |
| 243 | params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size); | 244 | params.offset = gmmu->Map(physical_address, params.offset, size); |
| 244 | } | 245 | } |
| 245 | 246 | ||
| 246 | auto result = NvResult::Success; | 247 | auto result = NvResult::Success; |
| @@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 | |||
| 262 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | 263 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); |
| 263 | 264 | ||
| 264 | if (const auto size{RemoveBufferMap(params.offset)}; size) { | 265 | if (const auto size{RemoveBufferMap(params.offset)}; size) { |
| 265 | system.GPU().MemoryManager().Unmap(params.offset, *size); | 266 | gmmu->Unmap(params.offset, *size); |
| 266 | } else { | 267 | } else { |
| 267 | LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); | 268 | LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset); |
| 268 | } | 269 | } |
| @@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 | |||
| 274 | NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) { | 275 | NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) { |
| 275 | IoctlBindChannel params{}; | 276 | IoctlBindChannel params{}; |
| 276 | std::memcpy(¶ms, input.data(), input.size()); | 277 | std::memcpy(¶ms, input.data(), input.size()); |
| 277 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd); | 278 | LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); |
| 278 | 279 | ||
| 279 | channel = params.fd; | 280 | auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd); |
| 281 | gpu_channel_device->channel_state->memory_manager = gmmu; | ||
| 280 | return NvResult::Success; | 282 | return NvResult::Success; |
| 281 | } | 283 | } |
| 282 | 284 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 67d2f1e87..4ecae3caf 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -13,6 +13,14 @@ | |||
| 13 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 14 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 14 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 15 | 15 | ||
| 16 | namespace Tegra { | ||
| 17 | class MemoryManager; | ||
| 18 | } // namespace Tegra | ||
| 19 | |||
| 20 | namespace Service::Nvidia { | ||
| 21 | class Module; | ||
| 22 | } | ||
| 23 | |||
| 16 | namespace Service::Nvidia::NvCore { | 24 | namespace Service::Nvidia::NvCore { |
| 17 | class Container; | 25 | class Container; |
| 18 | class NvMap; | 26 | class NvMap; |
| @@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags); | |||
| 34 | 42 | ||
| 35 | class nvhost_as_gpu final : public nvdevice { | 43 | class nvhost_as_gpu final : public nvdevice { |
| 36 | public: | 44 | public: |
| 37 | explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core); | 45 | explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core); |
| 38 | ~nvhost_as_gpu() override; | 46 | ~nvhost_as_gpu() override; |
| 39 | 47 | ||
| 40 | NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | 48 | NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, |
| @@ -187,9 +195,13 @@ private: | |||
| 187 | void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); | 195 | void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated); |
| 188 | std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); | 196 | std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr); |
| 189 | 197 | ||
| 198 | Module& module; | ||
| 199 | |||
| 190 | NvCore::Container& container; | 200 | NvCore::Container& container; |
| 191 | NvCore::NvMap& nvmap; | 201 | NvCore::NvMap& nvmap; |
| 192 | 202 | ||
| 203 | std::shared_ptr<Tegra::MemoryManager> gmmu; | ||
| 204 | |||
| 193 | // This is expected to be ordered, therefore we must use a map, not unordered_map | 205 | // This is expected to be ordered, therefore we must use a map, not unordered_map |
| 194 | std::map<GPUVAddr, BufferMap> buffer_mappings; | 206 | std::map<GPUVAddr, BufferMap> buffer_mappings; |
| 195 | }; | 207 | }; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index cb54ee5a4..38d45cb79 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -11,12 +11,14 @@ | |||
| 11 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | 11 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" |
| 12 | #include "core/hle/service/nvdrv/nvdrv.h" | 12 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 13 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 14 | #include "video_core/control/channel_state.h" | ||
| 15 | #include "video_core/engines/puller.h" | ||
| 14 | #include "video_core/gpu.h" | 16 | #include "video_core/gpu.h" |
| 15 | 17 | ||
| 16 | namespace Service::Nvidia::Devices { | 18 | namespace Service::Nvidia::Devices { |
| 17 | namespace { | 19 | namespace { |
| 18 | Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) { | 20 | Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) { |
| 19 | Tegra::GPU::FenceAction result{}; | 21 | Tegra::Engines::Puller::FenceAction result{}; |
| 20 | result.op.Assign(op); | 22 | result.op.Assign(op); |
| 21 | result.syncpoint_id.Assign(syncpoint_id); | 23 | result.syncpoint_id.Assign(syncpoint_id); |
| 22 | return {result.raw}; | 24 | return {result.raw}; |
| @@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi | |||
| 26 | nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, | 28 | nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_, |
| 27 | NvCore::Container& core_) | 29 | NvCore::Container& core_) |
| 28 | : nvdevice{system_}, events_interface{events_interface_}, core{core_}, | 30 | : nvdevice{system_}, events_interface{events_interface_}, core{core_}, |
| 29 | syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} { | 31 | syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()}, |
| 32 | channel_state{system.GPU().AllocateChannel()} { | ||
| 30 | channel_fence.id = syncpoint_manager.AllocateSyncpoint(); | 33 | channel_fence.id = syncpoint_manager.AllocateSyncpoint(); |
| 31 | channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id); | 34 | channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id); |
| 32 | sm_exception_breakpoint_int_report_event = | 35 | sm_exception_breakpoint_int_report_event = |
| @@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8 | |||
| 180 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | 183 | params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |
| 181 | params.unk3); | 184 | params.unk3); |
| 182 | 185 | ||
| 186 | if (channel_state->initiated) { | ||
| 187 | LOG_CRITICAL(Service_NVDRV, "Already allocated!"); | ||
| 188 | return NvResult::AlreadyAllocated; | ||
| 189 | } | ||
| 190 | |||
| 191 | system.GPU().InitChannel(*channel_state); | ||
| 183 | channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); | 192 | channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); |
| 184 | 193 | ||
| 185 | params.fence_out = channel_fence; | 194 | params.fence_out = channel_fence; |
| @@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { | |||
| 206 | {fence.value}, | 215 | {fence.value}, |
| 207 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 216 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 208 | Tegra::SubmissionMode::Increasing), | 217 | Tegra::SubmissionMode::Increasing), |
| 209 | BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id), | 218 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id), |
| 210 | }; | 219 | }; |
| 211 | } | 220 | } |
| 212 | 221 | ||
| @@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence | |||
| 220 | for (u32 count = 0; count < add_increment; ++count) { | 229 | for (u32 count = 0; count < add_increment; ++count) { |
| 221 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 230 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 222 | Tegra::SubmissionMode::Increasing)); | 231 | Tegra::SubmissionMode::Increasing)); |
| 223 | result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id)); | 232 | result.emplace_back( |
| 233 | BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); | ||
| 224 | } | 234 | } |
| 225 | 235 | ||
| 226 | return result; | 236 | return result; |
| @@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8> | |||
| 247 | 257 | ||
| 248 | auto& gpu = system.GPU(); | 258 | auto& gpu = system.GPU(); |
| 249 | 259 | ||
| 260 | const auto bind_id = channel_state->bind_id; | ||
| 261 | |||
| 250 | params.fence_out.id = channel_fence.id; | 262 | params.fence_out.id = channel_fence.id; |
| 251 | 263 | ||
| 252 | if (params.flags.add_wait.Value() && | 264 | if (params.flags.add_wait.Value() && |
| 253 | !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { | 265 | !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { |
| 254 | gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); | 266 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); |
| 255 | } | 267 | } |
| 256 | 268 | ||
| 257 | if (params.flags.add_increment.Value() || params.flags.increment.Value()) { | 269 | if (params.flags.add_increment.Value() || params.flags.increment.Value()) { |
| @@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8> | |||
| 262 | params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); | 274 | params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); |
| 263 | } | 275 | } |
| 264 | 276 | ||
| 265 | gpu.PushGPUEntries(std::move(entries)); | 277 | gpu.PushGPUEntries(bind_id, std::move(entries)); |
| 266 | 278 | ||
| 267 | if (params.flags.add_increment.Value()) { | 279 | if (params.flags.add_increment.Value()) { |
| 268 | if (params.flags.suppress_wfi) { | 280 | if (params.flags.suppress_wfi) { |
| 269 | gpu.PushGPUEntries(Tegra::CommandList{ | 281 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList( |
| 270 | BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); | 282 | params.fence_out, params.AddIncrementValue())}); |
| 271 | } else { | 283 | } else { |
| 272 | gpu.PushGPUEntries(Tegra::CommandList{ | 284 | gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList( |
| 273 | BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); | 285 | params.fence_out, params.AddIncrementValue())}); |
| 274 | } | 286 | } |
| 275 | } | 287 | } |
| 276 | 288 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 440c0c42d..3a65ed06d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -13,6 +13,12 @@ | |||
| 13 | #include "core/hle/service/nvdrv/nvdata.h" | 13 | #include "core/hle/service/nvdrv/nvdata.h" |
| 14 | #include "video_core/dma_pusher.h" | 14 | #include "video_core/dma_pusher.h" |
| 15 | 15 | ||
| 16 | namespace Tegra { | ||
| 17 | namespace Control { | ||
| 18 | struct ChannelState; | ||
| 19 | } | ||
| 20 | } // namespace Tegra | ||
| 21 | |||
| 16 | namespace Service::Nvidia { | 22 | namespace Service::Nvidia { |
| 17 | 23 | ||
| 18 | namespace NvCore { | 24 | namespace NvCore { |
| @@ -26,6 +32,7 @@ class EventInterface; | |||
| 26 | 32 | ||
| 27 | namespace Service::Nvidia::Devices { | 33 | namespace Service::Nvidia::Devices { |
| 28 | 34 | ||
| 35 | class nvhost_as_gpu; | ||
| 29 | class nvmap; | 36 | class nvmap; |
| 30 | class nvhost_gpu final : public nvdevice { | 37 | class nvhost_gpu final : public nvdevice { |
| 31 | public: | 38 | public: |
| @@ -46,6 +53,7 @@ public: | |||
| 46 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 53 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
| 47 | 54 | ||
| 48 | private: | 55 | private: |
| 56 | friend class nvhost_as_gpu; | ||
| 49 | enum class CtxObjects : u32_le { | 57 | enum class CtxObjects : u32_le { |
| 50 | Ctx2D = 0x902D, | 58 | Ctx2D = 0x902D, |
| 51 | Ctx3D = 0xB197, | 59 | Ctx3D = 0xB197, |
| @@ -204,6 +212,7 @@ private: | |||
| 204 | NvCore::Container& core; | 212 | NvCore::Container& core; |
| 205 | NvCore::SyncpointManager& syncpoint_manager; | 213 | NvCore::SyncpointManager& syncpoint_manager; |
| 206 | NvCore::NvMap& nvmap; | 214 | NvCore::NvMap& nvmap; |
| 215 | std::shared_ptr<Tegra::Control::ChannelState> channel_state; | ||
| 207 | NvFence channel_fence; | 216 | NvFence channel_fence; |
| 208 | 217 | ||
| 209 | // Events | 218 | // Events |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 57f58055d..279997e81 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 168 | IocFromIdParams params; | 168 | IocFromIdParams params; |
| 169 | std::memcpy(¶ms, input.data(), sizeof(params)); | 169 | std::memcpy(¶ms, input.data(), sizeof(params)); |
| 170 | 170 | ||
| 171 | LOG_DEBUG(Service_NVDRV, "called, id:{}"); | 171 | LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id); |
| 172 | 172 | ||
| 173 | // Handles and IDs are always the same value in nvmap however IDs can be used globally given the | 173 | // Handles and IDs are always the same value in nvmap however IDs can be used globally given the |
| 174 | // right permissions. | 174 | // right permissions. |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 208de0b75..b39a4c6db 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -74,7 +74,7 @@ Module::Module(Core::System& system) | |||
| 74 | : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} { | 74 | : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} { |
| 75 | builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) { | 75 | builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) { |
| 76 | std::shared_ptr<Devices::nvdevice> device = | 76 | std::shared_ptr<Devices::nvdevice> device = |
| 77 | std::make_shared<Devices::nvhost_as_gpu>(system, container); | 77 | std::make_shared<Devices::nvhost_as_gpu>(system, *this, container); |
| 78 | return open_files.emplace(fd, device).first; | 78 | return open_files.emplace(fd, device).first; |
| 79 | }; | 79 | }; |
| 80 | builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) { | 80 | builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) { |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5b3808351..e216c51a2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -35,6 +35,12 @@ add_library(video_core STATIC | |||
| 35 | command_classes/vic.h | 35 | command_classes/vic.h |
| 36 | compatible_formats.cpp | 36 | compatible_formats.cpp |
| 37 | compatible_formats.h | 37 | compatible_formats.h |
| 38 | control/channel_state.cpp | ||
| 39 | control/channel_state.h | ||
| 40 | control/channel_state_cache.cpp | ||
| 41 | control/channel_state_cache.h | ||
| 42 | control/scheduler.cpp | ||
| 43 | control/scheduler.h | ||
| 38 | delayed_destruction_ring.h | 44 | delayed_destruction_ring.h |
| 39 | dirty_flags.cpp | 45 | dirty_flags.cpp |
| 40 | dirty_flags.h | 46 | dirty_flags.h |
| @@ -54,6 +60,8 @@ add_library(video_core STATIC | |||
| 54 | engines/maxwell_3d.h | 60 | engines/maxwell_3d.h |
| 55 | engines/maxwell_dma.cpp | 61 | engines/maxwell_dma.cpp |
| 56 | engines/maxwell_dma.h | 62 | engines/maxwell_dma.h |
| 63 | engines/puller.cpp | ||
| 64 | engines/puller.h | ||
| 57 | framebuffer_config.h | 65 | framebuffer_config.h |
| 58 | macro/macro.cpp | 66 | macro/macro.cpp |
| 59 | macro/macro.h | 67 | macro/macro.h |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f015dae56..6b6764d72 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <deque> | ||
| 9 | #include <memory> | 8 | #include <memory> |
| 10 | #include <mutex> | 9 | #include <mutex> |
| 11 | #include <numeric> | 10 | #include <numeric> |
| @@ -23,6 +22,7 @@ | |||
| 23 | #include "common/settings.h" | 22 | #include "common/settings.h" |
| 24 | #include "core/memory.h" | 23 | #include "core/memory.h" |
| 25 | #include "video_core/buffer_cache/buffer_base.h" | 24 | #include "video_core/buffer_cache/buffer_base.h" |
| 25 | #include "video_core/control/channel_state_cache.h" | ||
| 26 | #include "video_core/delayed_destruction_ring.h" | 26 | #include "video_core/delayed_destruction_ring.h" |
| 27 | #include "video_core/dirty_flags.h" | 27 | #include "video_core/dirty_flags.h" |
| 28 | #include "video_core/engines/kepler_compute.h" | 28 | #include "video_core/engines/kepler_compute.h" |
| @@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE | |||
| 56 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | 56 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; |
| 57 | 57 | ||
| 58 | template <typename P> | 58 | template <typename P> |
| 59 | class BufferCache { | 59 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 60 | 60 | ||
| 61 | // Page size for caching purposes. | 61 | // Page size for caching purposes. |
| 62 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | 62 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. |
| @@ -116,10 +116,7 @@ public: | |||
| 116 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | 116 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 117 | 117 | ||
| 118 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 118 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 119 | Tegra::Engines::Maxwell3D& maxwell3d_, | 119 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); |
| 120 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 121 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 122 | Runtime& runtime_); | ||
| 123 | 120 | ||
| 124 | void TickFrame(); | 121 | void TickFrame(); |
| 125 | 122 | ||
| @@ -367,9 +364,6 @@ private: | |||
| 367 | void ClearDownload(IntervalType subtract_interval); | 364 | void ClearDownload(IntervalType subtract_interval); |
| 368 | 365 | ||
| 369 | VideoCore::RasterizerInterface& rasterizer; | 366 | VideoCore::RasterizerInterface& rasterizer; |
| 370 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 371 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 372 | Tegra::MemoryManager& gpu_memory; | ||
| 373 | Core::Memory::Memory& cpu_memory; | 367 | Core::Memory::Memory& cpu_memory; |
| 374 | 368 | ||
| 375 | SlotVector<Buffer> slot_buffers; | 369 | SlotVector<Buffer> slot_buffers; |
| @@ -444,12 +438,8 @@ private: | |||
| 444 | 438 | ||
| 445 | template <class P> | 439 | template <class P> |
| 446 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 440 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 447 | Tegra::Engines::Maxwell3D& maxwell3d_, | 441 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| 448 | Tegra::Engines::KeplerCompute& kepler_compute_, | 442 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { |
| 449 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 450 | Runtime& runtime_) | ||
| 451 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 452 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { | ||
| 453 | // Ensure the first slot is used for the null buffer | 443 | // Ensure the first slot is used for the null buffer |
| 454 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 444 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 455 | common_ranges.clear(); | 445 | common_ranges.clear(); |
| @@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | |||
| 552 | 542 | ||
| 553 | template <class P> | 543 | template <class P> |
| 554 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 544 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 555 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | 545 | const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); |
| 556 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | 546 | const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); |
| 557 | if (!cpu_src_address || !cpu_dest_address) { | 547 | if (!cpu_src_address || !cpu_dest_address) { |
| 558 | return false; | 548 | return false; |
| 559 | } | 549 | } |
| @@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 611 | 601 | ||
| 612 | template <class P> | 602 | template <class P> |
| 613 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | 603 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { |
| 614 | const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); | 604 | const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); |
| 615 | if (!cpu_dst_address) { | 605 | if (!cpu_dst_address) { |
| 616 | return false; | 606 | return false; |
| 617 | } | 607 | } |
| @@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 635 | template <class P> | 625 | template <class P> |
| 636 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 626 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 637 | u32 size) { | 627 | u32 size) { |
| 638 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 628 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 639 | const Binding binding{ | 629 | const Binding binding{ |
| 640 | .cpu_addr = *cpu_addr, | 630 | .cpu_addr = *cpu_addr, |
| 641 | .size = size, | 631 | .size = size, |
| @@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | |||
| 673 | if (is_indexed) { | 663 | if (is_indexed) { |
| 674 | BindHostIndexBuffer(); | 664 | BindHostIndexBuffer(); |
| 675 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 665 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 676 | const auto& regs = maxwell3d.regs; | 666 | const auto& regs = maxwell3d->regs; |
| 677 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | 667 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| 678 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); | 668 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); |
| 679 | } | 669 | } |
| @@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, | |||
| 733 | enabled_storage_buffers[stage] |= 1U << ssbo_index; | 723 | enabled_storage_buffers[stage] |= 1U << ssbo_index; |
| 734 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; | 724 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; |
| 735 | 725 | ||
| 736 | const auto& cbufs = maxwell3d.state.shader_stages[stage]; | 726 | const auto& cbufs = maxwell3d->state.shader_stages[stage]; |
| 737 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; | 727 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; |
| 738 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); | 728 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); |
| 739 | } | 729 | } |
| @@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, | |||
| 770 | enabled_compute_storage_buffers |= 1U << ssbo_index; | 760 | enabled_compute_storage_buffers |= 1U << ssbo_index; |
| 771 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; | 761 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; |
| 772 | 762 | ||
| 773 | const auto& launch_desc = kepler_compute.launch_description; | 763 | const auto& launch_desc = kepler_compute->launch_description; |
| 774 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | 764 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); |
| 775 | 765 | ||
| 776 | const auto& cbufs = launch_desc.const_buffer_config; | 766 | const auto& cbufs = launch_desc.const_buffer_config; |
| @@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 991 | const u32 size = index_buffer.size; | 981 | const u32 size = index_buffer.size; |
| 992 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 982 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| 993 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 983 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 994 | const u32 new_offset = offset + maxwell3d.regs.index_array.first * | 984 | const u32 new_offset = offset + maxwell3d->regs.index_array.first * |
| 995 | maxwell3d.regs.index_array.FormatSizeInBytes(); | 985 | maxwell3d->regs.index_array.FormatSizeInBytes(); |
| 996 | runtime.BindIndexBuffer(buffer, new_offset, size); | 986 | runtime.BindIndexBuffer(buffer, new_offset, size); |
| 997 | } else { | 987 | } else { |
| 998 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | 988 | runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format, |
| 999 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | 989 | maxwell3d->regs.index_array.first, |
| 1000 | buffer, offset, size); | 990 | maxwell3d->regs.index_array.count, buffer, offset, size); |
| 1001 | } | 991 | } |
| 1002 | } | 992 | } |
| 1003 | 993 | ||
| 1004 | template <class P> | 994 | template <class P> |
| 1005 | void BufferCache<P>::BindHostVertexBuffers() { | 995 | void BufferCache<P>::BindHostVertexBuffers() { |
| 1006 | auto& flags = maxwell3d.dirty.flags; | 996 | auto& flags = maxwell3d->dirty.flags; |
| 1007 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 997 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 1008 | const Binding& binding = vertex_buffers[index]; | 998 | const Binding& binding = vertex_buffers[index]; |
| 1009 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 999 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| @@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 1014 | } | 1004 | } |
| 1015 | flags[Dirty::VertexBuffer0 + index] = false; | 1005 | flags[Dirty::VertexBuffer0 + index] = false; |
| 1016 | 1006 | ||
| 1017 | const u32 stride = maxwell3d.regs.vertex_array[index].stride; | 1007 | const u32 stride = maxwell3d->regs.vertex_array[index].stride; |
| 1018 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1008 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1019 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); | 1009 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); |
| 1020 | } | 1010 | } |
| @@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 1154 | 1144 | ||
| 1155 | template <class P> | 1145 | template <class P> |
| 1156 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { | 1146 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { |
| 1157 | if (maxwell3d.regs.tfb_enabled == 0) { | 1147 | if (maxwell3d->regs.tfb_enabled == 0) { |
| 1158 | return; | 1148 | return; |
| 1159 | } | 1149 | } |
| 1160 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 1150 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| @@ -1262,8 +1252,8 @@ template <class P> | |||
| 1262 | void BufferCache<P>::UpdateIndexBuffer() { | 1252 | void BufferCache<P>::UpdateIndexBuffer() { |
| 1263 | // We have to check for the dirty flags and index count | 1253 | // We have to check for the dirty flags and index count |
| 1264 | // The index count is currently changed without updating the dirty flags | 1254 | // The index count is currently changed without updating the dirty flags |
| 1265 | const auto& index_array = maxwell3d.regs.index_array; | 1255 | const auto& index_array = maxwell3d->regs.index_array; |
| 1266 | auto& flags = maxwell3d.dirty.flags; | 1256 | auto& flags = maxwell3d->dirty.flags; |
| 1267 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | 1257 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { |
| 1268 | return; | 1258 | return; |
| 1269 | } | 1259 | } |
| @@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1272 | 1262 | ||
| 1273 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | 1263 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); |
| 1274 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1264 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); |
| 1275 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1265 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1276 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1266 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1277 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | 1267 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); |
| 1278 | const u32 size = std::min(address_size, draw_size); | 1268 | const u32 size = std::min(address_size, draw_size); |
| @@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1289 | 1279 | ||
| 1290 | template <class P> | 1280 | template <class P> |
| 1291 | void BufferCache<P>::UpdateVertexBuffers() { | 1281 | void BufferCache<P>::UpdateVertexBuffers() { |
| 1292 | auto& flags = maxwell3d.dirty.flags; | 1282 | auto& flags = maxwell3d->dirty.flags; |
| 1293 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { | 1283 | if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) { |
| 1294 | return; | 1284 | return; |
| 1295 | } | 1285 | } |
| 1296 | flags[Dirty::VertexBuffers] = false; | 1286 | flags[Dirty::VertexBuffers] = false; |
| @@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() { | |||
| 1302 | 1292 | ||
| 1303 | template <class P> | 1293 | template <class P> |
| 1304 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { | 1294 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { |
| 1305 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { | 1295 | if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) { |
| 1306 | return; | 1296 | return; |
| 1307 | } | 1297 | } |
| 1308 | const auto& array = maxwell3d.regs.vertex_array[index]; | 1298 | const auto& array = maxwell3d->regs.vertex_array[index]; |
| 1309 | const auto& limit = maxwell3d.regs.vertex_array_limit[index]; | 1299 | const auto& limit = maxwell3d->regs.vertex_array_limit[index]; |
| 1310 | const GPUVAddr gpu_addr_begin = array.StartAddress(); | 1300 | const GPUVAddr gpu_addr_begin = array.StartAddress(); |
| 1311 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; | 1301 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; |
| 1312 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1302 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1313 | u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1303 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1314 | if (address_size >= 64_MiB) { | ||
| 1315 | // Reported vertex buffer size is very large, cap to mapped buffer size | ||
| 1316 | GPUVAddr submapped_addr_end = gpu_addr_begin; | ||
| 1317 | |||
| 1318 | const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)}; | ||
| 1319 | if (ranges.size() > 0) { | ||
| 1320 | const auto& [addr, size] = *ranges.begin(); | ||
| 1321 | submapped_addr_end = addr + size; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | address_size = | ||
| 1325 | std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin)); | ||
| 1326 | } | ||
| 1327 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | 1304 | const u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1328 | if (array.enable == 0 || size == 0 || !cpu_addr) { | 1305 | if (array.enable == 0 || size == 0 || !cpu_addr) { |
| 1329 | vertex_buffers[index] = NULL_BINDING; | 1306 | vertex_buffers[index] = NULL_BINDING; |
| @@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | |||
| 1382 | 1359 | ||
| 1383 | template <class P> | 1360 | template <class P> |
| 1384 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { | 1361 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { |
| 1385 | if (maxwell3d.regs.tfb_enabled == 0) { | 1362 | if (maxwell3d->regs.tfb_enabled == 0) { |
| 1386 | return; | 1363 | return; |
| 1387 | } | 1364 | } |
| 1388 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 1365 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| @@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() { | |||
| 1392 | 1369 | ||
| 1393 | template <class P> | 1370 | template <class P> |
| 1394 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | 1371 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { |
| 1395 | const auto& binding = maxwell3d.regs.tfb_bindings[index]; | 1372 | const auto& binding = maxwell3d->regs.tfb_bindings[index]; |
| 1396 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; | 1373 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; |
| 1397 | const u32 size = binding.buffer_size; | 1374 | const u32 size = binding.buffer_size; |
| 1398 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1375 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1399 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { | 1376 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { |
| 1400 | transform_feedback_buffers[index] = NULL_BINDING; | 1377 | transform_feedback_buffers[index] = NULL_BINDING; |
| 1401 | return; | 1378 | return; |
| @@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1414 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | 1391 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1415 | Binding& binding = compute_uniform_buffers[index]; | 1392 | Binding& binding = compute_uniform_buffers[index]; |
| 1416 | binding = NULL_BINDING; | 1393 | binding = NULL_BINDING; |
| 1417 | const auto& launch_desc = kepler_compute.launch_description; | 1394 | const auto& launch_desc = kepler_compute->launch_description; |
| 1418 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1395 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| 1419 | const auto& cbuf = launch_desc.const_buffer_config[index]; | 1396 | const auto& cbuf = launch_desc.const_buffer_config[index]; |
| 1420 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); | 1397 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); |
| 1421 | if (cpu_addr) { | 1398 | if (cpu_addr) { |
| 1422 | binding.cpu_addr = *cpu_addr; | 1399 | binding.cpu_addr = *cpu_addr; |
| 1423 | binding.size = cbuf.size; | 1400 | binding.size = cbuf.size; |
| @@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() { | |||
| 1831 | dirty_uniform_buffers.fill(~u32{0}); | 1808 | dirty_uniform_buffers.fill(~u32{0}); |
| 1832 | uniform_buffer_binding_sizes.fill({}); | 1809 | uniform_buffer_binding_sizes.fill({}); |
| 1833 | } | 1810 | } |
| 1834 | auto& flags = maxwell3d.dirty.flags; | 1811 | auto& flags = maxwell3d->dirty.flags; |
| 1835 | flags[Dirty::IndexBuffer] = true; | 1812 | flags[Dirty::IndexBuffer] = true; |
| 1836 | flags[Dirty::VertexBuffers] = true; | 1813 | flags[Dirty::VertexBuffers] = true; |
| 1837 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 1814 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| @@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() { | |||
| 1842 | 1819 | ||
| 1843 | template <class P> | 1820 | template <class P> |
| 1844 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { | 1821 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { |
| 1845 | const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); | 1822 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); |
| 1846 | const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); | 1823 | const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); |
| 1847 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1824 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1848 | if (!cpu_addr || size == 0) { | 1825 | if (!cpu_addr || size == 0) { |
| 1849 | return NULL_BINDING; | 1826 | return NULL_BINDING; |
| 1850 | } | 1827 | } |
| @@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 1859 | template <class P> | 1836 | template <class P> |
| 1860 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( | 1837 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( |
| 1861 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { | 1838 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { |
| 1862 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1839 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1863 | TextureBufferBinding binding; | 1840 | TextureBufferBinding binding; |
| 1864 | if (!cpu_addr || size == 0) { | 1841 | if (!cpu_addr || size == 0) { |
| 1865 | binding.cpu_addr = 0; | 1842 | binding.cpu_addr = 0; |
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp new file mode 100644 index 000000000..67803fe94 --- /dev/null +++ b/src/video_core/control/channel_state.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/control/channel_state.h" | ||
| 7 | #include "video_core/dma_pusher.h" | ||
| 8 | #include "video_core/engines/fermi_2d.h" | ||
| 9 | #include "video_core/engines/kepler_compute.h" | ||
| 10 | #include "video_core/engines/kepler_memory.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/maxwell_dma.h" | ||
| 13 | #include "video_core/engines/puller.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | |||
| 16 | namespace Tegra::Control { | ||
| 17 | |||
| 18 | ChannelState::ChannelState(s32 bind_id_) { | ||
| 19 | bind_id = bind_id_; | ||
| 20 | initiated = false; | ||
| 21 | } | ||
| 22 | |||
| 23 | void ChannelState::Init(Core::System& system, GPU& gpu) { | ||
| 24 | ASSERT(memory_manager); | ||
| 25 | dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this); | ||
| 26 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); | ||
| 27 | fermi_2d = std::make_unique<Engines::Fermi2D>(); | ||
| 28 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager); | ||
| 29 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); | ||
| 30 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | ||
| 31 | initiated = true; | ||
| 32 | } | ||
| 33 | |||
| 34 | void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { | ||
| 35 | dma_pusher->BindRasterizer(rasterizer); | ||
| 36 | memory_manager->BindRasterizer(rasterizer); | ||
| 37 | maxwell_3d->BindRasterizer(rasterizer); | ||
| 38 | fermi_2d->BindRasterizer(rasterizer); | ||
| 39 | kepler_memory->BindRasterizer(rasterizer); | ||
| 40 | kepler_compute->BindRasterizer(rasterizer); | ||
| 41 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Tegra::Control | ||
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h new file mode 100644 index 000000000..82808a6b8 --- /dev/null +++ b/src/video_core/control/channel_state.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Core { | ||
| 12 | class System; | ||
| 13 | } | ||
| 14 | |||
| 15 | namespace VideoCore { | ||
| 16 | class RasterizerInterface; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace Tegra { | ||
| 20 | |||
| 21 | class GPU; | ||
| 22 | |||
| 23 | namespace Engines { | ||
| 24 | class Puller; | ||
| 25 | class Fermi2D; | ||
| 26 | class Maxwell3D; | ||
| 27 | class MaxwellDMA; | ||
| 28 | class KeplerCompute; | ||
| 29 | class KeplerMemory; | ||
| 30 | } // namespace Engines | ||
| 31 | |||
| 32 | class MemoryManager; | ||
| 33 | class DmaPusher; | ||
| 34 | |||
| 35 | namespace Control { | ||
| 36 | |||
| 37 | struct ChannelState { | ||
| 38 | ChannelState(s32 bind_id); | ||
| 39 | ChannelState(const ChannelState& state) = delete; | ||
| 40 | ChannelState& operator=(const ChannelState&) = delete; | ||
| 41 | ChannelState(ChannelState&& other) noexcept = default; | ||
| 42 | ChannelState& operator=(ChannelState&& other) noexcept = default; | ||
| 43 | |||
| 44 | void Init(Core::System& system, GPU& gpu); | ||
| 45 | |||
| 46 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||
| 47 | |||
| 48 | s32 bind_id = -1; | ||
| 49 | /// 3D engine | ||
| 50 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 51 | /// 2D engine | ||
| 52 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 53 | /// Compute engine | ||
| 54 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 55 | /// DMA engine | ||
| 56 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 57 | /// Inline memory engine | ||
| 58 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 59 | |||
| 60 | std::shared_ptr<MemoryManager> memory_manager; | ||
| 61 | |||
| 62 | std::unique_ptr<DmaPusher> dma_pusher; | ||
| 63 | |||
| 64 | bool initiated{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | } // namespace Control | ||
| 68 | |||
| 69 | } // namespace Tegra | ||
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp new file mode 100644 index 000000000..f72a97b2f --- /dev/null +++ b/src/video_core/control/channel_state_cache.cpp | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #include "video_core/control/channel_state_cache.inc" | ||
| 2 | |||
| 3 | namespace VideoCommon { | ||
| 4 | template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>; | ||
| 5 | } | ||
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h new file mode 100644 index 000000000..c8298c003 --- /dev/null +++ b/src/video_core/control/channel_state_cache.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <deque> | ||
| 4 | #include <limits> | ||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Tegra { | ||
| 10 | |||
| 11 | namespace Engines { | ||
| 12 | class Maxwell3D; | ||
| 13 | class KeplerCompute; | ||
| 14 | } // namespace Engines | ||
| 15 | |||
| 16 | class MemoryManager; | ||
| 17 | |||
| 18 | namespace Control { | ||
| 19 | struct ChannelState; | ||
| 20 | } | ||
| 21 | |||
| 22 | } // namespace Tegra | ||
| 23 | |||
| 24 | namespace VideoCommon { | ||
| 25 | |||
| 26 | class ChannelInfo { | ||
| 27 | public: | ||
| 28 | ChannelInfo() = delete; | ||
| 29 | ChannelInfo(Tegra::Control::ChannelState& state); | ||
| 30 | ChannelInfo(const ChannelInfo& state) = delete; | ||
| 31 | ChannelInfo& operator=(const ChannelInfo&) = delete; | ||
| 32 | ChannelInfo(ChannelInfo&& other) = default; | ||
| 33 | ChannelInfo& operator=(ChannelInfo&& other) = default; | ||
| 34 | |||
| 35 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 36 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 37 | Tegra::MemoryManager& gpu_memory; | ||
| 38 | }; | ||
| 39 | |||
| 40 | template <class P> | ||
| 41 | class ChannelSetupCaches { | ||
| 42 | public: | ||
| 43 | /// Operations for seting the channel of execution. | ||
| 44 | |||
| 45 | /// Create channel state. | ||
| 46 | void CreateChannel(Tegra::Control::ChannelState& channel); | ||
| 47 | |||
| 48 | /// Bind a channel for execution. | ||
| 49 | void BindToChannel(s32 id); | ||
| 50 | |||
| 51 | /// Erase channel's state. | ||
| 52 | void EraseChannel(s32 id); | ||
| 53 | |||
| 54 | protected: | ||
| 55 | static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; | ||
| 56 | |||
| 57 | std::deque<P> channel_storage; | ||
| 58 | std::deque<size_t> free_channel_ids; | ||
| 59 | std::unordered_map<s32, size_t> channel_map; | ||
| 60 | |||
| 61 | P* channel_state; | ||
| 62 | size_t current_channel_id{UNSET_CHANNEL}; | ||
| 63 | Tegra::Engines::Maxwell3D* maxwell3d; | ||
| 64 | Tegra::Engines::KeplerCompute* kepler_compute; | ||
| 65 | Tegra::MemoryManager* gpu_memory; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace VideoCommon | ||
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc new file mode 100644 index 000000000..3eb73af9f --- /dev/null +++ b/src/video_core/control/channel_state_cache.inc | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | #include "video_core/control/channel_state.h" | ||
| 2 | #include "video_core/control/channel_state_cache.h" | ||
| 3 | #include "video_core/engines/kepler_compute.h" | ||
| 4 | #include "video_core/engines/maxwell_3d.h" | ||
| 5 | #include "video_core/memory_manager.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | |||
| 9 | ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state) | ||
| 10 | : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute}, | ||
| 11 | gpu_memory{*channel_state.memory_manager} {} | ||
| 12 | |||
| 13 | template <class P> | ||
| 14 | void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) { | ||
| 15 | ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); | ||
| 16 | auto new_id = [this, &channel]() { | ||
| 17 | if (!free_channel_ids.empty()) { | ||
| 18 | auto id = free_channel_ids.front(); | ||
| 19 | free_channel_ids.pop_front(); | ||
| 20 | new (&channel_storage[id]) ChannelInfo(channel); | ||
| 21 | return id; | ||
| 22 | } | ||
| 23 | channel_storage.emplace_back(channel); | ||
| 24 | return channel_storage.size() - 1; | ||
| 25 | }(); | ||
| 26 | channel_map.emplace(channel.bind_id, new_id); | ||
| 27 | if (current_channel_id != UNSET_CHANNEL) { | ||
| 28 | channel_state = &channel_storage[current_channel_id]; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | /// Bind a channel for execution. | ||
| 33 | template <class P> | ||
| 34 | void ChannelSetupCaches<P>::BindToChannel(s32 id) { | ||
| 35 | auto it = channel_map.find(id); | ||
| 36 | ASSERT(it != channel_map.end() && id >= 0); | ||
| 37 | current_channel_id = it->second; | ||
| 38 | channel_state = &channel_storage[current_channel_id]; | ||
| 39 | maxwell3d = &channel_state->maxwell3d; | ||
| 40 | kepler_compute = &channel_state->kepler_compute; | ||
| 41 | gpu_memory = &channel_state->gpu_memory; | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Erase channel's channel_state. | ||
| 45 | template <class P> | ||
| 46 | void ChannelSetupCaches<P>::EraseChannel(s32 id) { | ||
| 47 | const auto it = channel_map.find(id); | ||
| 48 | ASSERT(it != channel_map.end() && id >= 0); | ||
| 49 | const auto this_id = it->second; | ||
| 50 | free_channel_ids.push_back(this_id); | ||
| 51 | channel_map.erase(it); | ||
| 52 | if (this_id == current_channel_id) { | ||
| 53 | current_channel_id = UNSET_CHANNEL; | ||
| 54 | channel_state = nullptr; | ||
| 55 | maxwell3d = nullptr; | ||
| 56 | kepler_compute = nullptr; | ||
| 57 | gpu_memory = nullptr; | ||
| 58 | } else if (current_channel_id != UNSET_CHANNEL) { | ||
| 59 | channel_state = &channel_storage[current_channel_id]; | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | |||
| 64 | } // namespace VideoCommon | ||
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp new file mode 100644 index 000000000..e1abcb188 --- /dev/null +++ b/src/video_core/control/scheduler.cpp | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | |||
| 7 | #include "video_core/control/channel_state.h" | ||
| 8 | #include "video_core/control/scheduler.h" | ||
| 9 | #include "video_core/gpu.h" | ||
| 10 | |||
| 11 | namespace Tegra::Control { | ||
| 12 | Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {} | ||
| 13 | |||
| 14 | Scheduler::~Scheduler() = default; | ||
| 15 | |||
| 16 | void Scheduler::Push(s32 channel, CommandList&& entries) { | ||
| 17 | std::unique_lock<std::mutex> lk(scheduling_guard); | ||
| 18 | auto it = channels.find(channel); | ||
| 19 | auto channel_state = it->second; | ||
| 20 | gpu.BindChannel(channel_state->bind_id); | ||
| 21 | channel_state->dma_pusher->Push(std::move(entries)); | ||
| 22 | channel_state->dma_pusher->DispatchCalls(); | ||
| 23 | } | ||
| 24 | |||
| 25 | void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) { | ||
| 26 | s32 channel = new_channel->bind_id; | ||
| 27 | std::unique_lock<std::mutex> lk(scheduling_guard); | ||
| 28 | channels.emplace(channel, new_channel); | ||
| 29 | } | ||
| 30 | |||
| 31 | } // namespace Tegra::Control | ||
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h new file mode 100644 index 000000000..802e9caff --- /dev/null +++ b/src/video_core/control/scheduler.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <mutex> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include "video_core/dma_pusher.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | |||
| 15 | class GPU; | ||
| 16 | |||
| 17 | namespace Control { | ||
| 18 | |||
| 19 | struct ChannelState; | ||
| 20 | |||
| 21 | class Scheduler { | ||
| 22 | public: | ||
| 23 | Scheduler(GPU& gpu_); | ||
| 24 | ~Scheduler(); | ||
| 25 | |||
| 26 | void Push(s32 channel, CommandList&& entries); | ||
| 27 | |||
| 28 | void DeclareChannel(std::shared_ptr<ChannelState> new_channel); | ||
| 29 | |||
| 30 | private: | ||
| 31 | std::unordered_map<s32, std::shared_ptr<ChannelState>> channels; | ||
| 32 | std::mutex scheduling_guard; | ||
| 33 | GPU& gpu; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace Control | ||
| 37 | |||
| 38 | } // namespace Tegra | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 29b8582ab..b01f04d0c 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -12,7 +12,10 @@ | |||
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | 14 | ||
| 15 | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {} | 15 | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, |
| 16 | Control::ChannelState& channel_state_) | ||
| 17 | : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, | ||
| 18 | *this, channel_state_} {} | ||
| 16 | 19 | ||
| 17 | DmaPusher::~DmaPusher() = default; | 20 | DmaPusher::~DmaPusher() = default; |
| 18 | 21 | ||
| @@ -76,11 +79,11 @@ bool DmaPusher::Step() { | |||
| 76 | // Push buffer non-empty, read a word | 79 | // Push buffer non-empty, read a word |
| 77 | command_headers.resize(command_list_header.size); | 80 | command_headers.resize(command_list_header.size); |
| 78 | if (Settings::IsGPULevelHigh()) { | 81 | if (Settings::IsGPULevelHigh()) { |
| 79 | gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), | 82 | memory_manager.ReadBlock(dma_get, command_headers.data(), |
| 80 | command_list_header.size * sizeof(u32)); | 83 | command_list_header.size * sizeof(u32)); |
| 81 | } else { | 84 | } else { |
| 82 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | 85 | memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(), |
| 83 | command_list_header.size * sizeof(u32)); | 86 | command_list_header.size * sizeof(u32)); |
| 84 | } | 87 | } |
| 85 | } | 88 | } |
| 86 | for (std::size_t index = 0; index < command_headers.size();) { | 89 | for (std::size_t index = 0; index < command_headers.size();) { |
| @@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) { | |||
| 154 | 157 | ||
| 155 | void DmaPusher::CallMethod(u32 argument) const { | 158 | void DmaPusher::CallMethod(u32 argument) const { |
| 156 | if (dma_state.method < non_puller_methods) { | 159 | if (dma_state.method < non_puller_methods) { |
| 157 | gpu.CallMethod(GPU::MethodCall{ | 160 | puller.CallPullerMethod(Engines::Puller::MethodCall{ |
| 158 | dma_state.method, | 161 | dma_state.method, |
| 159 | argument, | 162 | argument, |
| 160 | dma_state.subchannel, | 163 | dma_state.subchannel, |
| @@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const { | |||
| 168 | 171 | ||
| 169 | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | 172 | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { |
| 170 | if (dma_state.method < non_puller_methods) { | 173 | if (dma_state.method < non_puller_methods) { |
| 171 | gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, | 174 | puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, |
| 172 | dma_state.method_count); | 175 | dma_state.method_count); |
| 173 | } else { | 176 | } else { |
| 174 | subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, | 177 | subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, |
| 175 | num_methods, dma_state.method_count); | 178 | num_methods, dma_state.method_count); |
| 176 | } | 179 | } |
| 177 | } | 180 | } |
| 178 | 181 | ||
| 182 | void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) { | ||
| 183 | puller.BindRasterizer(rasterizer); | ||
| 184 | } | ||
| 185 | |||
| 179 | } // namespace Tegra | 186 | } // namespace Tegra |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 872fd146a..fd7c936c4 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/engine_interface.h" | 12 | #include "video_core/engines/engine_interface.h" |
| 13 | #include "video_core/engines/puller.h" | ||
| 13 | 14 | ||
| 14 | namespace Core { | 15 | namespace Core { |
| 15 | class System; | 16 | class System; |
| @@ -17,7 +18,12 @@ class System; | |||
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | 20 | ||
| 21 | namespace Control { | ||
| 22 | struct ChannelState; | ||
| 23 | } | ||
| 24 | |||
| 20 | class GPU; | 25 | class GPU; |
| 26 | class MemoryManager; | ||
| 21 | 27 | ||
| 22 | enum class SubmissionMode : u32 { | 28 | enum class SubmissionMode : u32 { |
| 23 | IncreasingOld = 0, | 29 | IncreasingOld = 0, |
| @@ -102,7 +108,8 @@ struct CommandList final { | |||
| 102 | */ | 108 | */ |
| 103 | class DmaPusher final { | 109 | class DmaPusher final { |
| 104 | public: | 110 | public: |
| 105 | explicit DmaPusher(Core::System& system_, GPU& gpu_); | 111 | explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, |
| 112 | Control::ChannelState& channel_state_); | ||
| 106 | ~DmaPusher(); | 113 | ~DmaPusher(); |
| 107 | 114 | ||
| 108 | void Push(CommandList&& entries) { | 115 | void Push(CommandList&& entries) { |
| @@ -115,6 +122,8 @@ public: | |||
| 115 | subchannels[subchannel_id] = engine; | 122 | subchannels[subchannel_id] = engine; |
| 116 | } | 123 | } |
| 117 | 124 | ||
| 125 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||
| 126 | |||
| 118 | private: | 127 | private: |
| 119 | static constexpr u32 non_puller_methods = 0x40; | 128 | static constexpr u32 non_puller_methods = 0x40; |
| 120 | static constexpr u32 max_subchannels = 8; | 129 | static constexpr u32 max_subchannels = 8; |
| @@ -148,6 +157,8 @@ private: | |||
| 148 | 157 | ||
| 149 | GPU& gpu; | 158 | GPU& gpu; |
| 150 | Core::System& system; | 159 | Core::System& system; |
| 160 | MemoryManager& memory_manager; | ||
| 161 | mutable Engines::Puller puller; | ||
| 151 | }; | 162 | }; |
| 152 | 163 | ||
| 153 | } // namespace Tegra | 164 | } // namespace Tegra |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp new file mode 100644 index 000000000..37f2ced18 --- /dev/null +++ b/src/video_core/engines/puller.cpp | |||
| @@ -0,0 +1,297 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "common/settings.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/control/channel_state.h" | ||
| 10 | #include "video_core/dma_pusher.h" | ||
| 11 | #include "video_core/engines/fermi_2d.h" | ||
| 12 | #include "video_core/engines/kepler_compute.h" | ||
| 13 | #include "video_core/engines/kepler_memory.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/engines/maxwell_dma.h" | ||
| 16 | #include "video_core/engines/puller.h" | ||
| 17 | #include "video_core/gpu.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_interface.h" | ||
| 20 | |||
| 21 | namespace Tegra::Engines { | ||
| 22 | |||
| 23 | Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_, | ||
| 24 | Control::ChannelState& channel_state_) | ||
| 25 | : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{ | ||
| 26 | channel_state_} {} | ||
| 27 | |||
| 28 | Puller::~Puller() = default; | ||
| 29 | |||
| 30 | void Puller::ProcessBindMethod(const MethodCall& method_call) { | ||
| 31 | // Bind the current subchannel to the desired engine id. | ||
| 32 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 33 | method_call.argument); | ||
| 34 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 35 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 36 | switch (engine_id) { | ||
| 37 | case EngineID::FERMI_TWOD_A: | ||
| 38 | dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); | ||
| 39 | break; | ||
| 40 | case EngineID::MAXWELL_B: | ||
| 41 | dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); | ||
| 42 | break; | ||
| 43 | case EngineID::KEPLER_COMPUTE_B: | ||
| 44 | dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); | ||
| 45 | break; | ||
| 46 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 47 | dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); | ||
| 48 | break; | ||
| 49 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 50 | dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | void Puller::ProcessFenceActionMethod() { | ||
| 58 | switch (regs.fence_action.op) { | ||
| 59 | case Puller::FenceOperation::Acquire: | ||
| 60 | // UNIMPLEMENTED_MSG("Channel Scheduling pending."); | ||
| 61 | // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 62 | break; | ||
| 63 | case Puller::FenceOperation::Increment: | ||
| 64 | rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); | ||
| 65 | break; | ||
| 66 | default: | ||
| 67 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void Puller::ProcessWaitForInterruptMethod() { | ||
| 72 | // TODO(bunnei) ImplementMe | ||
| 73 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 74 | } | ||
| 75 | |||
| 76 | void Puller::ProcessSemaphoreTriggerMethod() { | ||
| 77 | const auto semaphoreOperationMask = 0xF; | ||
| 78 | const auto op = | ||
| 79 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 80 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 81 | struct Block { | ||
| 82 | u32 sequence; | ||
| 83 | u32 zeros = 0; | ||
| 84 | u64 timestamp; | ||
| 85 | }; | ||
| 86 | |||
| 87 | Block block{}; | ||
| 88 | block.sequence = regs.semaphore_sequence; | ||
| 89 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 90 | // CoreTiming | ||
| 91 | block.timestamp = gpu.GetTicks(); | ||
| 92 | memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); | ||
| 93 | } else { | ||
| 94 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 95 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 96 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 97 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 98 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 99 | // Nothing to do in this case | ||
| 100 | } else { | ||
| 101 | regs.acquire_source = true; | ||
| 102 | regs.acquire_value = regs.semaphore_sequence; | ||
| 103 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 104 | regs.acquire_active = true; | ||
| 105 | regs.acquire_mode = false; | ||
| 106 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 107 | regs.acquire_active = true; | ||
| 108 | regs.acquire_mode = true; | ||
| 109 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 110 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 111 | // semaphore_sequence, gives a non-0 result | ||
| 112 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 113 | } else { | ||
| 114 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | void Puller::ProcessSemaphoreRelease() { | ||
| 121 | memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); | ||
| 122 | } | ||
| 123 | |||
| 124 | void Puller::ProcessSemaphoreAcquire() { | ||
| 125 | const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 126 | const auto value = regs.semaphore_acquire; | ||
| 127 | if (word != value) { | ||
| 128 | regs.acquire_active = true; | ||
| 129 | regs.acquire_value = value; | ||
| 130 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 131 | regs.acquire_mode = false; | ||
| 132 | regs.acquire_source = false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Calls a GPU puller method. | ||
| 137 | void Puller::CallPullerMethod(const MethodCall& method_call) { | ||
| 138 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 139 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 140 | |||
| 141 | switch (method) { | ||
| 142 | case BufferMethods::BindObject: { | ||
| 143 | ProcessBindMethod(method_call); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case BufferMethods::Nop: | ||
| 147 | case BufferMethods::SemaphoreAddressHigh: | ||
| 148 | case BufferMethods::SemaphoreAddressLow: | ||
| 149 | case BufferMethods::SemaphoreSequence: | ||
| 150 | case BufferMethods::UnkCacheFlush: | ||
| 151 | case BufferMethods::WrcacheFlush: | ||
| 152 | case BufferMethods::FenceValue: | ||
| 153 | break; | ||
| 154 | case BufferMethods::RefCnt: | ||
| 155 | rasterizer->SignalReference(); | ||
| 156 | break; | ||
| 157 | case BufferMethods::FenceAction: | ||
| 158 | ProcessFenceActionMethod(); | ||
| 159 | break; | ||
| 160 | case BufferMethods::WaitForInterrupt: | ||
| 161 | ProcessWaitForInterruptMethod(); | ||
| 162 | break; | ||
| 163 | case BufferMethods::SemaphoreTrigger: { | ||
| 164 | ProcessSemaphoreTriggerMethod(); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | case BufferMethods::NotifyIntr: { | ||
| 168 | // TODO(Kmather73): Research and implement this method. | ||
| 169 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | case BufferMethods::Unk28: { | ||
| 173 | // TODO(Kmather73): Research and implement this method. | ||
| 174 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | case BufferMethods::SemaphoreAcquire: { | ||
| 178 | ProcessSemaphoreAcquire(); | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | case BufferMethods::SemaphoreRelease: { | ||
| 182 | ProcessSemaphoreRelease(); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | case BufferMethods::Yield: { | ||
| 186 | // TODO(Kmather73): Research and implement this method. | ||
| 187 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | default: | ||
| 191 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | /// Calls a GPU engine method. | ||
| 197 | void Puller::CallEngineMethod(const MethodCall& method_call) { | ||
| 198 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 199 | |||
| 200 | switch (engine) { | ||
| 201 | case EngineID::FERMI_TWOD_A: | ||
| 202 | channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, | ||
| 203 | method_call.IsLastCall()); | ||
| 204 | break; | ||
| 205 | case EngineID::MAXWELL_B: | ||
| 206 | channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, | ||
| 207 | method_call.IsLastCall()); | ||
| 208 | break; | ||
| 209 | case EngineID::KEPLER_COMPUTE_B: | ||
| 210 | channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 211 | method_call.IsLastCall()); | ||
| 212 | break; | ||
| 213 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 214 | channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, | ||
| 215 | method_call.IsLastCall()); | ||
| 216 | break; | ||
| 217 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 218 | channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 219 | method_call.IsLastCall()); | ||
| 220 | break; | ||
| 221 | default: | ||
| 222 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | /// Calls a GPU engine multivalue method. | ||
| 227 | void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 228 | u32 methods_pending) { | ||
| 229 | const EngineID engine = bound_engines[subchannel]; | ||
| 230 | |||
| 231 | switch (engine) { | ||
| 232 | case EngineID::FERMI_TWOD_A: | ||
| 233 | channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 234 | break; | ||
| 235 | case EngineID::MAXWELL_B: | ||
| 236 | channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 237 | break; | ||
| 238 | case EngineID::KEPLER_COMPUTE_B: | ||
| 239 | channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 240 | break; | ||
| 241 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 242 | channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 243 | break; | ||
| 244 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 245 | channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 246 | break; | ||
| 247 | default: | ||
| 248 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | /// Calls a GPU method. | ||
| 253 | void Puller::CallMethod(const MethodCall& method_call) { | ||
| 254 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | ||
| 255 | method_call.subchannel); | ||
| 256 | |||
| 257 | ASSERT(method_call.subchannel < bound_engines.size()); | ||
| 258 | |||
| 259 | if (ExecuteMethodOnEngine(method_call.method)) { | ||
| 260 | CallEngineMethod(method_call); | ||
| 261 | } else { | ||
| 262 | CallPullerMethod(method_call); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | /// Calls a GPU multivalue method. | ||
| 267 | void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 268 | u32 methods_pending) { | ||
| 269 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | ||
| 270 | |||
| 271 | ASSERT(subchannel < bound_engines.size()); | ||
| 272 | |||
| 273 | if (ExecuteMethodOnEngine(method)) { | ||
| 274 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 275 | } else { | ||
| 276 | for (std::size_t i = 0; i < amount; i++) { | ||
| 277 | CallPullerMethod(MethodCall{ | ||
| 278 | method, | ||
| 279 | base_start[i], | ||
| 280 | subchannel, | ||
| 281 | methods_pending - static_cast<u32>(i), | ||
| 282 | }); | ||
| 283 | } | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||
| 288 | rasterizer = rasterizer_; | ||
| 289 | } | ||
| 290 | |||
| 291 | /// Determines where the method should be executed. | ||
| 292 | [[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) { | ||
| 293 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 294 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 295 | } | ||
| 296 | |||
| 297 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h new file mode 100644 index 000000000..d948ec790 --- /dev/null +++ b/src/video_core/engines/puller.h | |||
| @@ -0,0 +1,179 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/engine_interface.h" | ||
| 14 | |||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace Tegra { | ||
| 20 | class MemoryManager; | ||
| 21 | class DmaPusher; | ||
| 22 | |||
| 23 | enum class EngineID { | ||
| 24 | FERMI_TWOD_A = 0x902D, // 2D Engine | ||
| 25 | MAXWELL_B = 0xB197, // 3D Engine | ||
| 26 | KEPLER_COMPUTE_B = 0xB1C0, | ||
| 27 | KEPLER_INLINE_TO_MEMORY_B = 0xA140, | ||
| 28 | MAXWELL_DMA_COPY_A = 0xB0B5, | ||
| 29 | }; | ||
| 30 | |||
| 31 | namespace Control { | ||
| 32 | struct ChannelState; | ||
| 33 | } | ||
| 34 | } // namespace Tegra | ||
| 35 | |||
| 36 | namespace VideoCore { | ||
| 37 | class RasterizerInterface; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace Tegra::Engines { | ||
| 41 | |||
| 42 | class Puller final { | ||
| 43 | public: | ||
| 44 | struct MethodCall { | ||
| 45 | u32 method{}; | ||
| 46 | u32 argument{}; | ||
| 47 | u32 subchannel{}; | ||
| 48 | u32 method_count{}; | ||
| 49 | |||
| 50 | explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0) | ||
| 51 | : method(method_), argument(argument_), subchannel(subchannel_), | ||
| 52 | method_count(method_count_) {} | ||
| 53 | |||
| 54 | [[nodiscard]] bool IsLastCall() const { | ||
| 55 | return method_count <= 1; | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | |||
| 59 | enum class FenceOperation : u32 { | ||
| 60 | Acquire = 0, | ||
| 61 | Increment = 1, | ||
| 62 | }; | ||
| 63 | |||
| 64 | union FenceAction { | ||
| 65 | u32 raw; | ||
| 66 | BitField<0, 1, FenceOperation> op; | ||
| 67 | BitField<8, 24, u32> syncpoint_id; | ||
| 68 | }; | ||
| 69 | |||
| 70 | explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher, | ||
| 71 | Control::ChannelState& channel_state); | ||
| 72 | ~Puller(); | ||
| 73 | |||
| 74 | void CallMethod(const MethodCall& method_call); | ||
| 75 | |||
| 76 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 77 | u32 methods_pending); | ||
| 78 | |||
| 79 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||
| 80 | |||
| 81 | void CallPullerMethod(const MethodCall& method_call); | ||
| 82 | |||
| 83 | void CallEngineMethod(const MethodCall& method_call); | ||
| 84 | |||
| 85 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 86 | u32 methods_pending); | ||
| 87 | |||
| 88 | private: | ||
| 89 | Tegra::GPU& gpu; | ||
| 90 | |||
| 91 | MemoryManager& memory_manager; | ||
| 92 | DmaPusher& dma_pusher; | ||
| 93 | Control::ChannelState& channel_state; | ||
| 94 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 95 | |||
| 96 | static constexpr std::size_t NUM_REGS = 0x800; | ||
| 97 | struct Regs { | ||
| 98 | static constexpr size_t NUM_REGS = 0x40; | ||
| 99 | |||
| 100 | union { | ||
| 101 | struct { | ||
| 102 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 103 | struct { | ||
| 104 | u32 address_high; | ||
| 105 | u32 address_low; | ||
| 106 | |||
| 107 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 108 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 109 | address_low); | ||
| 110 | } | ||
| 111 | } semaphore_address; | ||
| 112 | |||
| 113 | u32 semaphore_sequence; | ||
| 114 | u32 semaphore_trigger; | ||
| 115 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 116 | |||
| 117 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 118 | // access | ||
| 119 | u32 reference_count; | ||
| 120 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 121 | |||
| 122 | u32 semaphore_acquire; | ||
| 123 | u32 semaphore_release; | ||
| 124 | u32 fence_value; | ||
| 125 | FenceAction fence_action; | ||
| 126 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 127 | |||
| 128 | // Puller state | ||
| 129 | u32 acquire_mode; | ||
| 130 | u32 acquire_source; | ||
| 131 | u32 acquire_active; | ||
| 132 | u32 acquire_timeout; | ||
| 133 | u32 acquire_value; | ||
| 134 | }; | ||
| 135 | std::array<u32, NUM_REGS> reg_array; | ||
| 136 | }; | ||
| 137 | } regs{}; | ||
| 138 | |||
| 139 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 140 | void ProcessFenceActionMethod(); | ||
| 141 | void ProcessSemaphoreAcquire(); | ||
| 142 | void ProcessSemaphoreRelease(); | ||
| 143 | void ProcessSemaphoreTriggerMethod(); | ||
| 144 | void ProcessWaitForInterruptMethod(); | ||
| 145 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | ||
| 146 | |||
| 147 | /// Mapping of command subchannels to their bound engine ids | ||
| 148 | std::array<EngineID, 8> bound_engines{}; | ||
| 149 | |||
| 150 | enum class GpuSemaphoreOperation { | ||
| 151 | AcquireEqual = 0x1, | ||
| 152 | WriteLong = 0x2, | ||
| 153 | AcquireGequal = 0x4, | ||
| 154 | AcquireMask = 0x8, | ||
| 155 | }; | ||
| 156 | |||
| 157 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 158 | static_assert(offsetof(Regs, field_name) == position * 4, \ | ||
| 159 | "Field " #field_name " has invalid position") | ||
| 160 | |||
| 161 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 162 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 163 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 164 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 165 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 166 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 167 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 168 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 169 | |||
| 170 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 171 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 172 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 173 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 174 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 175 | |||
| 176 | #undef ASSERT_REG_POSITION | ||
| 177 | }; | ||
| 178 | |||
| 179 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 1e9832ddd..d658e038d 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -4,12 +4,13 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <cstring> | ||
| 8 | #include <memory> | ||
| 7 | #include <queue> | 9 | #include <queue> |
| 8 | 10 | ||
| 9 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 10 | #include "video_core/delayed_destruction_ring.h" | 12 | #include "video_core/delayed_destruction_ring.h" |
| 11 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 12 | #include "video_core/memory_manager.h" | ||
| 13 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 14 | 15 | ||
| 15 | namespace VideoCommon { | 16 | namespace VideoCommon { |
| @@ -19,10 +20,10 @@ public: | |||
| 19 | explicit FenceBase(u32 payload_, bool is_stubbed_) | 20 | explicit FenceBase(u32 payload_, bool is_stubbed_) |
| 20 | : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} | 21 | : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} |
| 21 | 22 | ||
| 22 | explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_) | 23 | explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_) |
| 23 | : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} | 24 | : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} |
| 24 | 25 | ||
| 25 | GPUVAddr GetAddress() const { | 26 | u8* GetAddress() const { |
| 26 | return address; | 27 | return address; |
| 27 | } | 28 | } |
| 28 | 29 | ||
| @@ -35,7 +36,7 @@ public: | |||
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | private: | 38 | private: |
| 38 | GPUVAddr address; | 39 | u8* address; |
| 39 | u32 payload; | 40 | u32 payload; |
| 40 | bool is_semaphore; | 41 | bool is_semaphore; |
| 41 | 42 | ||
| @@ -57,7 +58,7 @@ public: | |||
| 57 | buffer_cache.AccumulateFlushes(); | 58 | buffer_cache.AccumulateFlushes(); |
| 58 | } | 59 | } |
| 59 | 60 | ||
| 60 | void SignalSemaphore(GPUVAddr addr, u32 value) { | 61 | void SignalSemaphore(u8* addr, u32 value) { |
| 61 | TryReleasePendingFences(); | 62 | TryReleasePendingFences(); |
| 62 | const bool should_flush = ShouldFlush(); | 63 | const bool should_flush = ShouldFlush(); |
| 63 | CommitAsyncFlushes(); | 64 | CommitAsyncFlushes(); |
| @@ -91,8 +92,9 @@ public: | |||
| 91 | } | 92 | } |
| 92 | PopAsyncFlushes(); | 93 | PopAsyncFlushes(); |
| 93 | if (current_fence->IsSemaphore()) { | 94 | if (current_fence->IsSemaphore()) { |
| 94 | gpu_memory.template Write<u32>(current_fence->GetAddress(), | 95 | char* address = reinterpret_cast<char*>(current_fence->GetAddress()); |
| 95 | current_fence->GetPayload()); | 96 | auto payload = current_fence->GetPayload(); |
| 97 | std::memcpy(address, &payload, sizeof(payload)); | ||
| 96 | } else { | 98 | } else { |
| 97 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 99 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 98 | } | 100 | } |
| @@ -104,8 +106,8 @@ protected: | |||
| 104 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 106 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 105 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 107 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, |
| 106 | TQueryCache& query_cache_) | 108 | TQueryCache& query_cache_) |
| 107 | : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()}, | 109 | : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_}, |
| 108 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} | 110 | buffer_cache{buffer_cache_}, query_cache{query_cache_} {} |
| 109 | 111 | ||
| 110 | virtual ~FenceManager() = default; | 112 | virtual ~FenceManager() = default; |
| 111 | 113 | ||
| @@ -113,7 +115,7 @@ protected: | |||
| 113 | /// true | 115 | /// true |
| 114 | virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | 116 | virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; |
| 115 | /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true | 117 | /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true |
| 116 | virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0; | 118 | virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0; |
| 117 | /// Queues a fence into the backend if the fence isn't stubbed. | 119 | /// Queues a fence into the backend if the fence isn't stubbed. |
| 118 | virtual void QueueFence(TFence& fence) = 0; | 120 | virtual void QueueFence(TFence& fence) = 0; |
| 119 | /// Notifies that the backend fence has been signaled/reached in host GPU. | 121 | /// Notifies that the backend fence has been signaled/reached in host GPU. |
| @@ -123,7 +125,6 @@ protected: | |||
| 123 | 125 | ||
| 124 | VideoCore::RasterizerInterface& rasterizer; | 126 | VideoCore::RasterizerInterface& rasterizer; |
| 125 | Tegra::GPU& gpu; | 127 | Tegra::GPU& gpu; |
| 126 | Tegra::MemoryManager& gpu_memory; | ||
| 127 | TTextureCache& texture_cache; | 128 | TTextureCache& texture_cache; |
| 128 | TTBufferCache& buffer_cache; | 129 | TTBufferCache& buffer_cache; |
| 129 | TQueryCache& query_cache; | 130 | TQueryCache& query_cache; |
| @@ -137,8 +138,9 @@ private: | |||
| 137 | } | 138 | } |
| 138 | PopAsyncFlushes(); | 139 | PopAsyncFlushes(); |
| 139 | if (current_fence->IsSemaphore()) { | 140 | if (current_fence->IsSemaphore()) { |
| 140 | gpu_memory.template Write<u32>(current_fence->GetAddress(), | 141 | char* address = reinterpret_cast<char*>(current_fence->GetAddress()); |
| 141 | current_fence->GetPayload()); | 142 | const auto payload = current_fence->GetPayload(); |
| 143 | std::memcpy(address, &payload, sizeof(payload)); | ||
| 142 | } else { | 144 | } else { |
| 143 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | 145 | gpu.IncrementSyncPoint(current_fence->GetPayload()); |
| 144 | } | 146 | } |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 33431f2a0..80a1c69e0 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include "core/hle/service/nvdrv/nvdata.h" | 18 | #include "core/hle/service/nvdrv/nvdata.h" |
| 19 | #include "core/perf_stats.h" | 19 | #include "core/perf_stats.h" |
| 20 | #include "video_core/cdma_pusher.h" | 20 | #include "video_core/cdma_pusher.h" |
| 21 | #include "video_core/control/channel_state.h" | ||
| 22 | #include "video_core/control/scheduler.h" | ||
| 21 | #include "video_core/dma_pusher.h" | 23 | #include "video_core/dma_pusher.h" |
| 22 | #include "video_core/engines/fermi_2d.h" | 24 | #include "video_core/engines/fermi_2d.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | 25 | #include "video_core/engines/kepler_compute.h" |
| @@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 36 | 38 | ||
| 37 | struct GPU::Impl { | 39 | struct GPU::Impl { |
| 38 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) | 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 39 | : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( | 41 | : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, |
| 40 | system)}, | ||
| 41 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_}, | ||
| 42 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | ||
| 43 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | ||
| 44 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | ||
| 45 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | ||
| 46 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | ||
| 47 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 42 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 48 | gpu_thread{system_, is_async_} {} | 43 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} |
| 49 | 44 | ||
| 50 | ~Impl() = default; | 45 | ~Impl() = default; |
| 51 | 46 | ||
| 52 | /// Binds a renderer to the GPU. | 47 | std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) { |
| 53 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 48 | auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id); |
| 54 | renderer = std::move(renderer_); | 49 | channels.emplace(channel_id, channel_state); |
| 55 | rasterizer = renderer->ReadRasterizer(); | 50 | scheduler->DeclareChannel(channel_state); |
| 56 | 51 | return channel_state; | |
| 57 | memory_manager->BindRasterizer(rasterizer); | ||
| 58 | maxwell_3d->BindRasterizer(rasterizer); | ||
| 59 | fermi_2d->BindRasterizer(rasterizer); | ||
| 60 | kepler_compute->BindRasterizer(rasterizer); | ||
| 61 | kepler_memory->BindRasterizer(rasterizer); | ||
| 62 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 63 | } | 52 | } |
| 64 | 53 | ||
| 65 | /// Calls a GPU method. | 54 | void BindChannel(s32 channel_id) { |
| 66 | void CallMethod(const GPU::MethodCall& method_call) { | 55 | if (bound_channel == channel_id) { |
| 67 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | 56 | return; |
| 68 | method_call.subchannel); | 57 | } |
| 58 | auto it = channels.find(channel_id); | ||
| 59 | ASSERT(it != channels.end()); | ||
| 60 | bound_channel = channel_id; | ||
| 61 | current_channel = it->second.get(); | ||
| 69 | 62 | ||
| 70 | ASSERT(method_call.subchannel < bound_engines.size()); | 63 | rasterizer->BindChannel(*current_channel); |
| 64 | } | ||
| 71 | 65 | ||
| 72 | if (ExecuteMethodOnEngine(method_call.method)) { | 66 | std::shared_ptr<Control::ChannelState> AllocateChannel() { |
| 73 | CallEngineMethod(method_call); | 67 | return CreateChannel(new_channel_id++); |
| 74 | } else { | ||
| 75 | CallPullerMethod(method_call); | ||
| 76 | } | ||
| 77 | } | 68 | } |
| 78 | 69 | ||
| 79 | /// Calls a GPU multivalue method. | 70 | void InitChannel(Control::ChannelState& to_init) { |
| 80 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 71 | to_init.Init(system, gpu); |
| 81 | u32 methods_pending) { | 72 | to_init.BindRasterizer(rasterizer); |
| 82 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | 73 | rasterizer->InitializeChannel(to_init); |
| 74 | } | ||
| 83 | 75 | ||
| 84 | ASSERT(subchannel < bound_engines.size()); | 76 | void ReleaseChannel(Control::ChannelState& to_release) { |
| 77 | UNIMPLEMENTED(); | ||
| 78 | } | ||
| 85 | 79 | ||
| 86 | if (ExecuteMethodOnEngine(method)) { | 80 | void CreateHost1xChannel() { |
| 87 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | 81 | if (host1x_channel) { |
| 88 | } else { | 82 | return; |
| 89 | for (std::size_t i = 0; i < amount; i++) { | ||
| 90 | CallPullerMethod(GPU::MethodCall{ | ||
| 91 | method, | ||
| 92 | base_start[i], | ||
| 93 | subchannel, | ||
| 94 | methods_pending - static_cast<u32>(i), | ||
| 95 | }); | ||
| 96 | } | ||
| 97 | } | 83 | } |
| 84 | host1x_channel = CreateChannel(0); | ||
| 85 | host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system); | ||
| 86 | InitChannel(*host1x_channel); | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Binds a renderer to the GPU. | ||
| 90 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||
| 91 | renderer = std::move(renderer_); | ||
| 92 | rasterizer = renderer->ReadRasterizer(); | ||
| 98 | } | 93 | } |
| 99 | 94 | ||
| 100 | /// Flush all current written commands into the host GPU for execution. | 95 | /// Flush all current written commands into the host GPU for execution. |
| @@ -146,42 +141,44 @@ struct GPU::Impl { | |||
| 146 | 141 | ||
| 147 | /// Returns a reference to the Maxwell3D GPU engine. | 142 | /// Returns a reference to the Maxwell3D GPU engine. |
| 148 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { | 143 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { |
| 149 | return *maxwell_3d; | 144 | ASSERT(current_channel); |
| 145 | return *current_channel->maxwell_3d; | ||
| 150 | } | 146 | } |
| 151 | 147 | ||
| 152 | /// Returns a const reference to the Maxwell3D GPU engine. | 148 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 153 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { | 149 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { |
| 154 | return *maxwell_3d; | 150 | ASSERT(current_channel); |
| 151 | return *current_channel->maxwell_3d; | ||
| 155 | } | 152 | } |
| 156 | 153 | ||
| 157 | /// Returns a reference to the KeplerCompute GPU engine. | 154 | /// Returns a reference to the KeplerCompute GPU engine. |
| 158 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { | 155 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { |
| 159 | return *kepler_compute; | 156 | ASSERT(current_channel); |
| 157 | return *current_channel->kepler_compute; | ||
| 160 | } | 158 | } |
| 161 | 159 | ||
| 162 | /// Returns a reference to the KeplerCompute GPU engine. | 160 | /// Returns a reference to the KeplerCompute GPU engine. |
| 163 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { | 161 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { |
| 164 | return *kepler_compute; | 162 | ASSERT(current_channel); |
| 163 | return *current_channel->kepler_compute; | ||
| 165 | } | 164 | } |
| 166 | 165 | ||
| 167 | /// Returns a reference to the GPU memory manager. | 166 | /// Returns a reference to the GPU memory manager. |
| 168 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { | 167 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { |
| 169 | return *memory_manager; | 168 | CreateHost1xChannel(); |
| 170 | } | 169 | return *host1x_channel->memory_manager; |
| 171 | |||
| 172 | /// Returns a const reference to the GPU memory manager. | ||
| 173 | [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const { | ||
| 174 | return *memory_manager; | ||
| 175 | } | 170 | } |
| 176 | 171 | ||
| 177 | /// Returns a reference to the GPU DMA pusher. | 172 | /// Returns a reference to the GPU DMA pusher. |
| 178 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { | 173 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { |
| 179 | return *dma_pusher; | 174 | ASSERT(current_channel); |
| 175 | return *current_channel->dma_pusher; | ||
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | /// Returns a const reference to the GPU DMA pusher. | 178 | /// Returns a const reference to the GPU DMA pusher. |
| 183 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { | 179 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { |
| 184 | return *dma_pusher; | 180 | ASSERT(current_channel); |
| 181 | return *current_channel->dma_pusher; | ||
| 185 | } | 182 | } |
| 186 | 183 | ||
| 187 | /// Returns a reference to the underlying renderer. | 184 | /// Returns a reference to the underlying renderer. |
| @@ -306,7 +303,7 @@ struct GPU::Impl { | |||
| 306 | /// This can be used to launch any necessary threads and register any necessary | 303 | /// This can be used to launch any necessary threads and register any necessary |
| 307 | /// core timing events. | 304 | /// core timing events. |
| 308 | void Start() { | 305 | void Start() { |
| 309 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | 306 | gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); |
| 310 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | 307 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); |
| 311 | cpu_context->MakeCurrent(); | 308 | cpu_context->MakeCurrent(); |
| 312 | } | 309 | } |
| @@ -328,8 +325,8 @@ struct GPU::Impl { | |||
| 328 | } | 325 | } |
| 329 | 326 | ||
| 330 | /// Push GPU command entries to be processed | 327 | /// Push GPU command entries to be processed |
| 331 | void PushGPUEntries(Tegra::CommandList&& entries) { | 328 | void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { |
| 332 | gpu_thread.SubmitList(std::move(entries)); | 329 | gpu_thread.SubmitList(channel, std::move(entries)); |
| 333 | } | 330 | } |
| 334 | 331 | ||
| 335 | /// Push GPU command buffer entries to be processed | 332 | /// Push GPU command buffer entries to be processed |
| @@ -381,303 +378,16 @@ struct GPU::Impl { | |||
| 381 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 378 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 382 | } | 379 | } |
| 383 | 380 | ||
| 384 | void ProcessBindMethod(const GPU::MethodCall& method_call) { | ||
| 385 | // Bind the current subchannel to the desired engine id. | ||
| 386 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 387 | method_call.argument); | ||
| 388 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 389 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 390 | switch (engine_id) { | ||
| 391 | case EngineID::FERMI_TWOD_A: | ||
| 392 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 393 | break; | ||
| 394 | case EngineID::MAXWELL_B: | ||
| 395 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 396 | break; | ||
| 397 | case EngineID::KEPLER_COMPUTE_B: | ||
| 398 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 399 | break; | ||
| 400 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 401 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 402 | break; | ||
| 403 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 404 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 405 | break; | ||
| 406 | default: | ||
| 407 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | void ProcessFenceActionMethod() { | ||
| 412 | switch (regs.fence_action.op) { | ||
| 413 | case GPU::FenceOperation::Acquire: | ||
| 414 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 415 | break; | ||
| 416 | case GPU::FenceOperation::Increment: | ||
| 417 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 418 | break; | ||
| 419 | default: | ||
| 420 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | void ProcessWaitForInterruptMethod() { | ||
| 425 | // TODO(bunnei) ImplementMe | ||
| 426 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 427 | } | ||
| 428 | |||
| 429 | void ProcessSemaphoreTriggerMethod() { | ||
| 430 | const auto semaphoreOperationMask = 0xF; | ||
| 431 | const auto op = | ||
| 432 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 433 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 434 | struct Block { | ||
| 435 | u32 sequence; | ||
| 436 | u32 zeros = 0; | ||
| 437 | u64 timestamp; | ||
| 438 | }; | ||
| 439 | |||
| 440 | Block block{}; | ||
| 441 | block.sequence = regs.semaphore_sequence; | ||
| 442 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 443 | // CoreTiming | ||
| 444 | block.timestamp = GetTicks(); | ||
| 445 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 446 | sizeof(block)); | ||
| 447 | } else { | ||
| 448 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 449 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 450 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 451 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 452 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 453 | // Nothing to do in this case | ||
| 454 | } else { | ||
| 455 | regs.acquire_source = true; | ||
| 456 | regs.acquire_value = regs.semaphore_sequence; | ||
| 457 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 458 | regs.acquire_active = true; | ||
| 459 | regs.acquire_mode = false; | ||
| 460 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 461 | regs.acquire_active = true; | ||
| 462 | regs.acquire_mode = true; | ||
| 463 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 464 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 465 | // semaphore_sequence, gives a non-0 result | ||
| 466 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 467 | } else { | ||
| 468 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 469 | } | ||
| 470 | } | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | void ProcessSemaphoreRelease() { | ||
| 475 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), | ||
| 476 | regs.semaphore_release); | ||
| 477 | } | ||
| 478 | |||
| 479 | void ProcessSemaphoreAcquire() { | ||
| 480 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 481 | const auto value = regs.semaphore_acquire; | ||
| 482 | if (word != value) { | ||
| 483 | regs.acquire_active = true; | ||
| 484 | regs.acquire_value = value; | ||
| 485 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 486 | regs.acquire_mode = false; | ||
| 487 | regs.acquire_source = false; | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | /// Calls a GPU puller method. | ||
| 492 | void CallPullerMethod(const GPU::MethodCall& method_call) { | ||
| 493 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 494 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 495 | |||
| 496 | switch (method) { | ||
| 497 | case BufferMethods::BindObject: { | ||
| 498 | ProcessBindMethod(method_call); | ||
| 499 | break; | ||
| 500 | } | ||
| 501 | case BufferMethods::Nop: | ||
| 502 | case BufferMethods::SemaphoreAddressHigh: | ||
| 503 | case BufferMethods::SemaphoreAddressLow: | ||
| 504 | case BufferMethods::SemaphoreSequence: | ||
| 505 | break; | ||
| 506 | case BufferMethods::UnkCacheFlush: | ||
| 507 | rasterizer->SyncGuestHost(); | ||
| 508 | break; | ||
| 509 | case BufferMethods::WrcacheFlush: | ||
| 510 | rasterizer->SignalReference(); | ||
| 511 | break; | ||
| 512 | case BufferMethods::FenceValue: | ||
| 513 | break; | ||
| 514 | case BufferMethods::RefCnt: | ||
| 515 | rasterizer->SignalReference(); | ||
| 516 | break; | ||
| 517 | case BufferMethods::FenceAction: | ||
| 518 | ProcessFenceActionMethod(); | ||
| 519 | break; | ||
| 520 | case BufferMethods::WaitForInterrupt: | ||
| 521 | rasterizer->WaitForIdle(); | ||
| 522 | break; | ||
| 523 | case BufferMethods::SemaphoreTrigger: { | ||
| 524 | ProcessSemaphoreTriggerMethod(); | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | case BufferMethods::NotifyIntr: { | ||
| 528 | // TODO(Kmather73): Research and implement this method. | ||
| 529 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | case BufferMethods::Unk28: { | ||
| 533 | // TODO(Kmather73): Research and implement this method. | ||
| 534 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 535 | break; | ||
| 536 | } | ||
| 537 | case BufferMethods::SemaphoreAcquire: { | ||
| 538 | ProcessSemaphoreAcquire(); | ||
| 539 | break; | ||
| 540 | } | ||
| 541 | case BufferMethods::SemaphoreRelease: { | ||
| 542 | ProcessSemaphoreRelease(); | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | case BufferMethods::Yield: { | ||
| 546 | // TODO(Kmather73): Research and implement this method. | ||
| 547 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 548 | break; | ||
| 549 | } | ||
| 550 | default: | ||
| 551 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 552 | break; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | /// Calls a GPU engine method. | ||
| 557 | void CallEngineMethod(const GPU::MethodCall& method_call) { | ||
| 558 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 559 | |||
| 560 | switch (engine) { | ||
| 561 | case EngineID::FERMI_TWOD_A: | ||
| 562 | fermi_2d->CallMethod(method_call.method, method_call.argument, | ||
| 563 | method_call.IsLastCall()); | ||
| 564 | break; | ||
| 565 | case EngineID::MAXWELL_B: | ||
| 566 | maxwell_3d->CallMethod(method_call.method, method_call.argument, | ||
| 567 | method_call.IsLastCall()); | ||
| 568 | break; | ||
| 569 | case EngineID::KEPLER_COMPUTE_B: | ||
| 570 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 571 | method_call.IsLastCall()); | ||
| 572 | break; | ||
| 573 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 574 | maxwell_dma->CallMethod(method_call.method, method_call.argument, | ||
| 575 | method_call.IsLastCall()); | ||
| 576 | break; | ||
| 577 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 578 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 579 | method_call.IsLastCall()); | ||
| 580 | break; | ||
| 581 | default: | ||
| 582 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | /// Calls a GPU engine multivalue method. | ||
| 587 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 588 | u32 methods_pending) { | ||
| 589 | const EngineID engine = bound_engines[subchannel]; | ||
| 590 | |||
| 591 | switch (engine) { | ||
| 592 | case EngineID::FERMI_TWOD_A: | ||
| 593 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 594 | break; | ||
| 595 | case EngineID::MAXWELL_B: | ||
| 596 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 597 | break; | ||
| 598 | case EngineID::KEPLER_COMPUTE_B: | ||
| 599 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 600 | break; | ||
| 601 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 602 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 603 | break; | ||
| 604 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 605 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 606 | break; | ||
| 607 | default: | ||
| 608 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | |||
| 612 | /// Determines where the method should be executed. | ||
| 613 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) { | ||
| 614 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 615 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 616 | } | ||
| 617 | |||
| 618 | struct Regs { | ||
| 619 | static constexpr size_t NUM_REGS = 0x40; | ||
| 620 | |||
| 621 | union { | ||
| 622 | struct { | ||
| 623 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 624 | struct { | ||
| 625 | u32 address_high; | ||
| 626 | u32 address_low; | ||
| 627 | |||
| 628 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 629 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 630 | address_low); | ||
| 631 | } | ||
| 632 | } semaphore_address; | ||
| 633 | |||
| 634 | u32 semaphore_sequence; | ||
| 635 | u32 semaphore_trigger; | ||
| 636 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 637 | |||
| 638 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 639 | // access | ||
| 640 | u32 reference_count; | ||
| 641 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 642 | |||
| 643 | u32 semaphore_acquire; | ||
| 644 | u32 semaphore_release; | ||
| 645 | u32 fence_value; | ||
| 646 | GPU::FenceAction fence_action; | ||
| 647 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 648 | |||
| 649 | // Puller state | ||
| 650 | u32 acquire_mode; | ||
| 651 | u32 acquire_source; | ||
| 652 | u32 acquire_active; | ||
| 653 | u32 acquire_timeout; | ||
| 654 | u32 acquire_value; | ||
| 655 | }; | ||
| 656 | std::array<u32, NUM_REGS> reg_array; | ||
| 657 | }; | ||
| 658 | } regs{}; | ||
| 659 | |||
| 660 | GPU& gpu; | 381 | GPU& gpu; |
| 661 | Core::System& system; | 382 | Core::System& system; |
| 662 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 383 | |
| 663 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 664 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; | 384 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; |
| 665 | std::unique_ptr<VideoCore::RendererBase> renderer; | 385 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 666 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 386 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 667 | const bool use_nvdec; | 387 | const bool use_nvdec; |
| 668 | 388 | ||
| 669 | /// Mapping of command subchannels to their bound engine ids | 389 | std::shared_ptr<Control::ChannelState> host1x_channel; |
| 670 | std::array<EngineID, 8> bound_engines{}; | 390 | s32 new_channel_id{1}; |
| 671 | /// 3D engine | ||
| 672 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 673 | /// 2D engine | ||
| 674 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 675 | /// Compute engine | ||
| 676 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 677 | /// DMA engine | ||
| 678 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 679 | /// Inline memory engine | ||
| 680 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 681 | /// Shader build notifier | 391 | /// Shader build notifier |
| 682 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | 392 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; |
| 683 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | 393 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks |
| @@ -710,33 +420,10 @@ struct GPU::Impl { | |||
| 710 | VideoCommon::GPUThread::ThreadManager gpu_thread; | 420 | VideoCommon::GPUThread::ThreadManager gpu_thread; |
| 711 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | 421 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; |
| 712 | 422 | ||
| 713 | #define ASSERT_REG_POSITION(field_name, position) \ | 423 | std::unique_ptr<Tegra::Control::Scheduler> scheduler; |
| 714 | static_assert(offsetof(Regs, field_name) == position * 4, \ | 424 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; |
| 715 | "Field " #field_name " has invalid position") | 425 | Tegra::Control::ChannelState* current_channel; |
| 716 | 426 | s32 bound_channel{-1}; | |
| 717 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 718 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 719 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 720 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 721 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 722 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 723 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 724 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 725 | |||
| 726 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 727 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 728 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 729 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 730 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 731 | |||
| 732 | #undef ASSERT_REG_POSITION | ||
| 733 | |||
| 734 | enum class GpuSemaphoreOperation { | ||
| 735 | AcquireEqual = 0x1, | ||
| 736 | WriteLong = 0x2, | ||
| 737 | AcquireGequal = 0x4, | ||
| 738 | AcquireMask = 0x8, | ||
| 739 | }; | ||
| 740 | }; | 427 | }; |
| 741 | 428 | ||
| 742 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | 429 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) |
| @@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | |||
| 744 | 431 | ||
| 745 | GPU::~GPU() = default; | 432 | GPU::~GPU() = default; |
| 746 | 433 | ||
| 747 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | 434 | std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() { |
| 748 | impl->BindRenderer(std::move(renderer)); | 435 | return impl->AllocateChannel(); |
| 436 | } | ||
| 437 | |||
| 438 | void GPU::InitChannel(Control::ChannelState& to_init) { | ||
| 439 | impl->InitChannel(to_init); | ||
| 749 | } | 440 | } |
| 750 | 441 | ||
| 751 | void GPU::CallMethod(const MethodCall& method_call) { | 442 | void GPU::BindChannel(s32 channel_id) { |
| 752 | impl->CallMethod(method_call); | 443 | impl->BindChannel(channel_id); |
| 753 | } | 444 | } |
| 754 | 445 | ||
| 755 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 446 | void GPU::ReleaseChannel(Control::ChannelState& to_release) { |
| 756 | u32 methods_pending) { | 447 | impl->ReleaseChannel(to_release); |
| 757 | impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); | 448 | } |
| 449 | |||
| 450 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | ||
| 451 | impl->BindRenderer(std::move(renderer)); | ||
| 758 | } | 452 | } |
| 759 | 453 | ||
| 760 | void GPU::FlushCommands() { | 454 | void GPU::FlushCommands() { |
| @@ -881,8 +575,8 @@ void GPU::ReleaseContext() { | |||
| 881 | impl->ReleaseContext(); | 575 | impl->ReleaseContext(); |
| 882 | } | 576 | } |
| 883 | 577 | ||
| 884 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | 578 | void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { |
| 885 | impl->PushGPUEntries(std::move(entries)); | 579 | impl->PushGPUEntries(channel, std::move(entries)); |
| 886 | } | 580 | } |
| 887 | 581 | ||
| 888 | void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { | 582 | void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 42c91954f..74d55e074 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -89,57 +89,20 @@ class Maxwell3D; | |||
| 89 | class KeplerCompute; | 89 | class KeplerCompute; |
| 90 | } // namespace Engines | 90 | } // namespace Engines |
| 91 | 91 | ||
| 92 | enum class EngineID { | 92 | namespace Control { |
| 93 | FERMI_TWOD_A = 0x902D, // 2D Engine | 93 | struct ChannelState; |
| 94 | MAXWELL_B = 0xB197, // 3D Engine | 94 | } |
| 95 | KEPLER_COMPUTE_B = 0xB1C0, | ||
| 96 | KEPLER_INLINE_TO_MEMORY_B = 0xA140, | ||
| 97 | MAXWELL_DMA_COPY_A = 0xB0B5, | ||
| 98 | }; | ||
| 99 | 95 | ||
| 100 | class MemoryManager; | 96 | class MemoryManager; |
| 101 | 97 | ||
| 102 | class GPU final { | 98 | class GPU final { |
| 103 | public: | 99 | public: |
| 104 | struct MethodCall { | ||
| 105 | u32 method{}; | ||
| 106 | u32 argument{}; | ||
| 107 | u32 subchannel{}; | ||
| 108 | u32 method_count{}; | ||
| 109 | |||
| 110 | explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0) | ||
| 111 | : method(method_), argument(argument_), subchannel(subchannel_), | ||
| 112 | method_count(method_count_) {} | ||
| 113 | |||
| 114 | [[nodiscard]] bool IsLastCall() const { | ||
| 115 | return method_count <= 1; | ||
| 116 | } | ||
| 117 | }; | ||
| 118 | |||
| 119 | enum class FenceOperation : u32 { | ||
| 120 | Acquire = 0, | ||
| 121 | Increment = 1, | ||
| 122 | }; | ||
| 123 | |||
| 124 | union FenceAction { | ||
| 125 | u32 raw; | ||
| 126 | BitField<0, 1, FenceOperation> op; | ||
| 127 | BitField<8, 24, u32> syncpoint_id; | ||
| 128 | }; | ||
| 129 | |||
| 130 | explicit GPU(Core::System& system, bool is_async, bool use_nvdec); | 100 | explicit GPU(Core::System& system, bool is_async, bool use_nvdec); |
| 131 | ~GPU(); | 101 | ~GPU(); |
| 132 | 102 | ||
| 133 | /// Binds a renderer to the GPU. | 103 | /// Binds a renderer to the GPU. |
| 134 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); | 104 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); |
| 135 | 105 | ||
| 136 | /// Calls a GPU method. | ||
| 137 | void CallMethod(const MethodCall& method_call); | ||
| 138 | |||
| 139 | /// Calls a GPU multivalue method. | ||
| 140 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 141 | u32 methods_pending); | ||
| 142 | |||
| 143 | /// Flush all current written commands into the host GPU for execution. | 106 | /// Flush all current written commands into the host GPU for execution. |
| 144 | void FlushCommands(); | 107 | void FlushCommands(); |
| 145 | /// Synchronizes CPU writes with Host GPU memory. | 108 | /// Synchronizes CPU writes with Host GPU memory. |
| @@ -147,6 +110,14 @@ public: | |||
| 147 | /// Signal the ending of command list. | 110 | /// Signal the ending of command list. |
| 148 | void OnCommandListEnd(); | 111 | void OnCommandListEnd(); |
| 149 | 112 | ||
| 113 | std::shared_ptr<Control::ChannelState> AllocateChannel(); | ||
| 114 | |||
| 115 | void InitChannel(Control::ChannelState& to_init); | ||
| 116 | |||
| 117 | void BindChannel(s32 channel_id); | ||
| 118 | |||
| 119 | void ReleaseChannel(Control::ChannelState& to_release); | ||
| 120 | |||
| 150 | /// Request a host GPU memory flush from the CPU. | 121 | /// Request a host GPU memory flush from the CPU. |
| 151 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 122 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 152 | 123 | ||
| @@ -226,7 +197,7 @@ public: | |||
| 226 | void ReleaseContext(); | 197 | void ReleaseContext(); |
| 227 | 198 | ||
| 228 | /// Push GPU command entries to be processed | 199 | /// Push GPU command entries to be processed |
| 229 | void PushGPUEntries(Tegra::CommandList&& entries); | 200 | void PushGPUEntries(s32 channel, Tegra::CommandList&& entries); |
| 230 | 201 | ||
| 231 | /// Push GPU command buffer entries to be processed | 202 | /// Push GPU command buffer entries to be processed |
| 232 | void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); | 203 | void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); |
| @@ -248,7 +219,7 @@ public: | |||
| 248 | 219 | ||
| 249 | private: | 220 | private: |
| 250 | struct Impl; | 221 | struct Impl; |
| 251 | std::unique_ptr<Impl> impl; | 222 | mutable std::unique_ptr<Impl> impl; |
| 252 | }; | 223 | }; |
| 253 | 224 | ||
| 254 | } // namespace Tegra | 225 | } // namespace Tegra |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index f0e48cfbd..9844cde43 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/thread.h" | 8 | #include "common/thread.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/frontend/emu_window.h" | 10 | #include "core/frontend/emu_window.h" |
| 11 | #include "video_core/control/scheduler.h" | ||
| 11 | #include "video_core/dma_pusher.h" | 12 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 13 | #include "video_core/gpu_thread.h" | 14 | #include "video_core/gpu_thread.h" |
| @@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread { | |||
| 18 | /// Runs the GPU thread | 19 | /// Runs the GPU thread |
| 19 | static void RunThread(std::stop_token stop_token, Core::System& system, | 20 | static void RunThread(std::stop_token stop_token, Core::System& system, |
| 20 | VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | 21 | VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, |
| 21 | Tegra::DmaPusher& dma_pusher, SynchState& state) { | 22 | Tegra::Control::Scheduler& scheduler, SynchState& state) { |
| 22 | std::string name = "GPU"; | 23 | std::string name = "GPU"; |
| 23 | MicroProfileOnThreadCreate(name.c_str()); | 24 | MicroProfileOnThreadCreate(name.c_str()); |
| 24 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | 25 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); |
| @@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, | |||
| 36 | break; | 37 | break; |
| 37 | } | 38 | } |
| 38 | if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 39 | if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| 39 | dma_pusher.Push(std::move(submit_list->entries)); | 40 | scheduler.Push(submit_list->channel, std::move(submit_list->entries)); |
| 40 | dma_pusher.DispatchCalls(); | ||
| 41 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { | 41 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 42 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 42 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 43 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { | 43 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { |
| @@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default; | |||
| 68 | 68 | ||
| 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 70 | Core::Frontend::GraphicsContext& context, | 70 | Core::Frontend::GraphicsContext& context, |
| 71 | Tegra::DmaPusher& dma_pusher) { | 71 | Tegra::Control::Scheduler& scheduler) { |
| 72 | rasterizer = renderer.ReadRasterizer(); | 72 | rasterizer = renderer.ReadRasterizer(); |
| 73 | thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | 73 | thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), |
| 74 | std::ref(dma_pusher), std::ref(state)); | 74 | std::ref(scheduler), std::ref(state)); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 77 | void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { |
| 78 | PushCommand(SubmitListCommand(std::move(entries))); | 78 | PushCommand(SubmitListCommand(channel, std::move(entries))); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2f8210cb9..c5078a2b3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -15,7 +15,9 @@ | |||
| 15 | 15 | ||
| 16 | namespace Tegra { | 16 | namespace Tegra { |
| 17 | struct FramebufferConfig; | 17 | struct FramebufferConfig; |
| 18 | class DmaPusher; | 18 | namespace Control { |
| 19 | class Scheduler; | ||
| 20 | } | ||
| 19 | } // namespace Tegra | 21 | } // namespace Tegra |
| 20 | 22 | ||
| 21 | namespace Core { | 23 | namespace Core { |
| @@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread { | |||
| 34 | 36 | ||
| 35 | /// Command to signal to the GPU thread that a command list is ready for processing | 37 | /// Command to signal to the GPU thread that a command list is ready for processing |
| 36 | struct SubmitListCommand final { | 38 | struct SubmitListCommand final { |
| 37 | explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} | 39 | explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_) |
| 40 | : channel{channel_}, entries{std::move(entries_)} {} | ||
| 38 | 41 | ||
| 42 | s32 channel; | ||
| 39 | Tegra::CommandList entries; | 43 | Tegra::CommandList entries; |
| 40 | }; | 44 | }; |
| 41 | 45 | ||
| @@ -112,10 +116,10 @@ public: | |||
| 112 | 116 | ||
| 113 | /// Creates and starts the GPU thread. | 117 | /// Creates and starts the GPU thread. |
| 114 | void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | 118 | void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, |
| 115 | Tegra::DmaPusher& dma_pusher); | 119 | Tegra::Control::Scheduler& scheduler); |
| 116 | 120 | ||
| 117 | /// Push GPU command entries to be processed | 121 | /// Push GPU command entries to be processed |
| 118 | void SubmitList(Tegra::CommandList&& entries); | 122 | void SubmitList(s32 channel, Tegra::CommandList&& entries); |
| 119 | 123 | ||
| 120 | /// Swap buffers (render frame) | 124 | /// Swap buffers (render frame) |
| 121 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 125 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index bf9eb735d..a3efd365e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s | |||
| 133 | // TryLockPage(page_entry, size); | 133 | // TryLockPage(page_entry, size); |
| 134 | auto& current_page = page_table[PageEntryIndex(gpu_addr)]; | 134 | auto& current_page = page_table[PageEntryIndex(gpu_addr)]; |
| 135 | 135 | ||
| 136 | if ((!current_page.IsValid() && page_entry.IsValid()) || | ||
| 137 | current_page.ToAddress() != page_entry.ToAddress()) { | ||
| 138 | rasterizer->ModifyGPUMemory(gpu_addr, size); | ||
| 139 | } | ||
| 140 | |||
| 141 | current_page = page_entry; | 136 | current_page = page_entry; |
| 142 | } | 137 | } |
| 143 | 138 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 889b606b3..eb68ea638 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/settings.h" | 19 | #include "common/settings.h" |
| 20 | #include "video_core/control/channel_state_cache.h" | ||
| 20 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 22 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| @@ -90,13 +91,10 @@ private: | |||
| 90 | }; | 91 | }; |
| 91 | 92 | ||
| 92 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 93 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 93 | class QueryCacheBase { | 94 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 94 | public: | 95 | public: |
| 95 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, | 96 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) |
| 96 | Tegra::Engines::Maxwell3D& maxwell3d_, | 97 | : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), |
| 97 | Tegra::MemoryManager& gpu_memory_) | ||
| 98 | : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 99 | gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | ||
| 100 | VideoCore::QueryType::SamplesPassed}}} {} | 98 | VideoCore::QueryType::SamplesPassed}}} {} |
| 101 | 99 | ||
| 102 | void InvalidateRegion(VAddr addr, std::size_t size) { | 100 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -117,13 +115,13 @@ public: | |||
| 117 | */ | 115 | */ |
| 118 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | 116 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { |
| 119 | std::unique_lock lock{mutex}; | 117 | std::unique_lock lock{mutex}; |
| 120 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 118 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 121 | ASSERT(cpu_addr); | 119 | ASSERT(cpu_addr); |
| 122 | 120 | ||
| 123 | CachedQuery* query = TryGet(*cpu_addr); | 121 | CachedQuery* query = TryGet(*cpu_addr); |
| 124 | if (!query) { | 122 | if (!query) { |
| 125 | ASSERT_OR_EXECUTE(cpu_addr, return;); | 123 | ASSERT_OR_EXECUTE(cpu_addr, return;); |
| 126 | u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); | 124 | u8* const host_ptr = gpu_memory->GetPointer(gpu_addr); |
| 127 | 125 | ||
| 128 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 126 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); |
| 129 | } | 127 | } |
| @@ -137,7 +135,7 @@ public: | |||
| 137 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| 138 | void UpdateCounters() { | 136 | void UpdateCounters() { |
| 139 | std::unique_lock lock{mutex}; | 137 | std::unique_lock lock{mutex}; |
| 140 | const auto& regs = maxwell3d.regs; | 138 | const auto& regs = maxwell3d->regs; |
| 141 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | 139 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); |
| 142 | } | 140 | } |
| 143 | 141 | ||
| @@ -264,8 +262,6 @@ private: | |||
| 264 | static constexpr unsigned YUZU_PAGEBITS = 12; | 262 | static constexpr unsigned YUZU_PAGEBITS = 12; |
| 265 | 263 | ||
| 266 | VideoCore::RasterizerInterface& rasterizer; | 264 | VideoCore::RasterizerInterface& rasterizer; |
| 267 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 268 | Tegra::MemoryManager& gpu_memory; | ||
| 269 | 265 | ||
| 270 | std::recursive_mutex mutex; | 266 | std::recursive_mutex mutex; |
| 271 | 267 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a04a76481..8dacb2626 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -16,6 +16,9 @@ class MemoryManager; | |||
| 16 | namespace Engines { | 16 | namespace Engines { |
| 17 | class AccelerateDMAInterface; | 17 | class AccelerateDMAInterface; |
| 18 | } | 18 | } |
| 19 | namespace Control { | ||
| 20 | struct ChannelState; | ||
| 21 | } | ||
| 19 | } // namespace Tegra | 22 | } // namespace Tegra |
| 20 | 23 | ||
| 21 | namespace VideoCore { | 24 | namespace VideoCore { |
| @@ -137,5 +140,11 @@ public: | |||
| 137 | /// Initialize disk cached resources for the game being emulated | 140 | /// Initialize disk cached resources for the game being emulated |
| 138 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 141 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 139 | const DiskResourceLoadCallback& callback) {} | 142 | const DiskResourceLoadCallback& callback) {} |
| 143 | |||
| 144 | virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {} | ||
| 145 | |||
| 146 | virtual void BindChannel(Tegra::Control::ChannelState& channel) {} | ||
| 147 | |||
| 148 | virtual void ReleaseChannel(s32 channel_id) {} | ||
| 140 | }; | 149 | }; |
| 141 | } // namespace VideoCore | 150 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 6e82c2e28..c76446b60 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -12,7 +12,7 @@ namespace OpenGL { | |||
| 12 | 12 | ||
| 13 | GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} | 13 | GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} |
| 14 | 14 | ||
| 15 | GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_) | 15 | GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_) |
| 16 | : FenceBase{address_, payload_, is_stubbed_} {} | 16 | : FenceBase{address_, payload_, is_stubbed_} {} |
| 17 | 17 | ||
| 18 | GLInnerFence::~GLInnerFence() = default; | 18 | GLInnerFence::~GLInnerFence() = default; |
| @@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | |||
| 52 | return std::make_shared<GLInnerFence>(value, is_stubbed); | 52 | return std::make_shared<GLInnerFence>(value, is_stubbed); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | 55 | Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) { |
| 56 | return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | 56 | return std::make_shared<GLInnerFence>(addr, value, is_stubbed); |
| 57 | } | 57 | } |
| 58 | 58 | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 14ff00db2..fced8d002 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -17,7 +17,7 @@ namespace OpenGL { | |||
| 17 | class GLInnerFence : public VideoCommon::FenceBase { | 17 | class GLInnerFence : public VideoCommon::FenceBase { |
| 18 | public: | 18 | public: |
| 19 | explicit GLInnerFence(u32 payload_, bool is_stubbed_); | 19 | explicit GLInnerFence(u32 payload_, bool is_stubbed_); |
| 20 | explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_); | 20 | explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_); |
| 21 | ~GLInnerFence(); | 21 | ~GLInnerFence(); |
| 22 | 22 | ||
| 23 | void Queue(); | 23 | void Queue(); |
| @@ -41,7 +41,7 @@ public: | |||
| 41 | 41 | ||
| 42 | protected: | 42 | protected: |
| 43 | Fence CreateFence(u32 value, bool is_stubbed) override; | 43 | Fence CreateFence(u32 value, bool is_stubbed) override; |
| 44 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | 44 | Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; |
| 45 | void QueueFence(Fence& fence) override; | 45 | void QueueFence(Fence& fence) override; |
| 46 | bool IsFenceSignaled(Fence& fence) const override; | 46 | bool IsFenceSignaled(Fence& fence) const override; |
| 47 | void WaitFence(Fence& fence) override; | 47 | void WaitFence(Fence& fence) override; |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index ed40f5791..5070db441 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 26 | 26 | ||
| 27 | } // Anonymous namespace | 27 | } // Anonymous namespace |
| 28 | 28 | ||
| 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, | 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_) |
| 30 | Tegra::MemoryManager& gpu_memory_) | 30 | : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {} |
| 31 | : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {} | ||
| 32 | 31 | ||
| 33 | QueryCache::~QueryCache() = default; | 32 | QueryCache::~QueryCache() = default; |
| 34 | 33 | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 8a49f1ef0..14ce59990 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| 28 | class QueryCache final | 28 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 30 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, | 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_); |
| 32 | Tegra::MemoryManager& gpu_memory_); | ||
| 33 | ~QueryCache(); | 32 | ~QueryCache(); |
| 34 | 33 | ||
| 35 | OGLQuery AllocateQuery(VideoCore::QueryType type); | 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a0d048b0b..e8d61bd41 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 60 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | 60 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), |
| 61 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | 61 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), |
| 62 | texture_cache_runtime(device, program_manager, state_tracker), | 62 | texture_cache_runtime(device, program_manager, state_tracker), |
| 63 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 63 | texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), |
| 64 | buffer_cache_runtime(device), | 64 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 65 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 65 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, |
| 66 | shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, | 66 | state_tracker, gpu.ShaderNotify()), |
| 67 | buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), | 67 | query_cache(*this), accelerate_dma(buffer_cache), |
| 68 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | ||
| 69 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} | 68 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} |
| 70 | 69 | ||
| 71 | RasterizerOpenGL::~RasterizerOpenGL() = default; | 70 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| @@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | |||
| 392 | gpu_memory.Write<u32>(addr, value); | 391 | gpu_memory.Write<u32>(addr, value); |
| 393 | return; | 392 | return; |
| 394 | } | 393 | } |
| 395 | fence_manager.SignalSemaphore(addr, value); | 394 | auto paddr = gpu_memory.GetPointer(addr); |
| 395 | fence_manager.SignalSemaphore(paddr, value); | ||
| 396 | } | 396 | } |
| 397 | 397 | ||
| 398 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { | 398 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0b8d8ec92..494581d0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs | |||
| 151 | } // Anonymous namespace | 151 | } // Anonymous namespace |
| 152 | 152 | ||
| 153 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 153 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 154 | Tegra::Engines::Maxwell3D& maxwell3d_, | 154 | const Device& device_, TextureCache& texture_cache_, |
| 155 | Tegra::Engines::KeplerCompute& kepler_compute_, | 155 | BufferCache& buffer_cache_, ProgramManager& program_manager_, |
| 156 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 156 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) |
| 157 | TextureCache& texture_cache_, BufferCache& buffer_cache_, | 157 | : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, |
| 158 | ProgramManager& program_manager_, StateTracker& state_tracker_, | 158 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, |
| 159 | VideoCore::ShaderNotify& shader_notify_) | 159 | state_tracker{state_tracker_}, shader_notify{shader_notify_}, |
| 160 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | 160 | use_asynchronous_shaders{device.UseAsynchronousShaders()}, |
| 161 | emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, | ||
| 162 | buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, | ||
| 163 | shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, | ||
| 164 | profile{ | 161 | profile{ |
| 165 | .supported_spirv = 0x00010000, | 162 | .supported_spirv = 0x00010000, |
| 166 | 163 | ||
| @@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { | |||
| 310 | current_pipeline = nullptr; | 307 | current_pipeline = nullptr; |
| 311 | return nullptr; | 308 | return nullptr; |
| 312 | } | 309 | } |
| 313 | const auto& regs{maxwell3d.regs}; | 310 | const auto& regs{maxwell3d->regs}; |
| 314 | graphics_key.raw = 0; | 311 | graphics_key.raw = 0; |
| 315 | graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | 312 | graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); |
| 316 | graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 | 313 | graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 |
| @@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n | |||
| 351 | } | 348 | } |
| 352 | // If something is using depth, we can assume that games are not rendering anything which | 349 | // If something is using depth, we can assume that games are not rendering anything which |
| 353 | // will be used one time. | 350 | // will be used one time. |
| 354 | if (maxwell3d.regs.zeta_enable) { | 351 | if (maxwell3d->regs.zeta_enable) { |
| 355 | return nullptr; | 352 | return nullptr; |
| 356 | } | 353 | } |
| 357 | // If games are using a small index count, we can assume these are full screen quads. | 354 | // If games are using a small index count, we can assume these are full screen quads. |
| 358 | // Usually these shaders are only used once for building textures so we can assume they | 355 | // Usually these shaders are only used once for building textures so we can assume they |
| 359 | // can't be built async | 356 | // can't be built async |
| 360 | if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { | 357 | if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { |
| 361 | return pipeline; | 358 | return pipeline; |
| 362 | } | 359 | } |
| 363 | return nullptr; | 360 | return nullptr; |
| @@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() { | |||
| 368 | if (!shader) { | 365 | if (!shader) { |
| 369 | return nullptr; | 366 | return nullptr; |
| 370 | } | 367 | } |
| 371 | const auto& qmd{kepler_compute.launch_description}; | 368 | const auto& qmd{kepler_compute->launch_description}; |
| 372 | const ComputePipelineKey key{ | 369 | const ComputePipelineKey key{ |
| 373 | .unique_hash = shader->unique_hash, | 370 | .unique_hash = shader->unique_hash, |
| 374 | .shared_memory_size = qmd.shared_alloc, | 371 | .shared_memory_size = qmd.shared_alloc, |
| @@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 481 | } | 478 | } |
| 482 | auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; | 479 | auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; |
| 483 | return std::make_unique<GraphicsPipeline>( | 480 | return std::make_unique<GraphicsPipeline>( |
| 484 | device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, | 481 | device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager, |
| 485 | thread_worker, &shader_notify, sources, sources_spirv, infos, key); | 482 | state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key); |
| 486 | 483 | ||
| 487 | } catch (Shader::Exception& exception) { | 484 | } catch (Shader::Exception& exception) { |
| 488 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); | 485 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); |
| @@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 491 | 488 | ||
| 492 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | 489 | std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( |
| 493 | const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { | 490 | const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { |
| 494 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 491 | const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()}; |
| 495 | const auto& qmd{kepler_compute.launch_description}; | 492 | const auto& qmd{kepler_compute->launch_description}; |
| 496 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | 493 | ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; |
| 497 | env.SetCachedSize(shader->size_bytes); | 494 | env.SetCachedSize(shader->size_bytes); |
| 498 | 495 | ||
| 499 | main_pools.ReleaseContents(); | 496 | main_pools.ReleaseContents(); |
| @@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | |||
| 536 | break; | 533 | break; |
| 537 | } | 534 | } |
| 538 | 535 | ||
| 539 | return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, | 536 | return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory, |
| 540 | kepler_compute, program_manager, program.info, code, | 537 | *kepler_compute, program_manager, program.info, code, |
| 541 | code_spirv); | 538 | code_spirv); |
| 542 | } catch (Shader::Exception& exception) { | 539 | } catch (Shader::Exception& exception) { |
| 543 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); | 540 | LOG_ERROR(Render_OpenGL, "{}", exception.what()); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a14269dea..89f181fe3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | |||
| 30 | class ShaderCache : public VideoCommon::ShaderCache { | 30 | class ShaderCache : public VideoCommon::ShaderCache { |
| 31 | public: | 31 | public: |
| 32 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 32 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 33 | Tegra::Engines::Maxwell3D& maxwell3d_, | 33 | const Device& device_, TextureCache& texture_cache_, |
| 34 | Tegra::Engines::KeplerCompute& kepler_compute_, | 34 | BufferCache& buffer_cache_, ProgramManager& program_manager_, |
| 35 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 35 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); |
| 36 | TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 37 | ProgramManager& program_manager_, StateTracker& state_tracker_, | ||
| 38 | VideoCore::ShaderNotify& shader_notify_); | ||
| 39 | ~ShaderCache(); | 36 | ~ShaderCache(); |
| 40 | 37 | ||
| 41 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 38 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c78d0299..68c2bc34c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 95 | Core::Frontend::EmuWindow& emu_window, | 95 | Core::Frontend::EmuWindow& emu_window, |
| 96 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 96 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |
| 97 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | 97 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |
| 98 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | 98 | : RendererBase(emu_window, std::move(context_)), |
| 99 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), | 99 | telemetry_session(telemetry_session_), |
| 100 | cpu_memory(cpu_memory_), | ||
| 101 | gpu(gpu_), | ||
| 102 | library(OpenLibrary()), | ||
| 100 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 103 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 101 | true, Settings::values.renderer_debug.GetValue())), | 104 | true, Settings::values.renderer_debug.GetValue())), |
| 102 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 105 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), |
| 103 | surface(CreateSurface(instance, render_window)), | 106 | surface(CreateSurface(instance, render_window)), |
| 104 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), | 107 | device(CreateDevice(instance, dld, *surface)), |
| 105 | state_tracker(gpu), scheduler(device, state_tracker), | 108 | memory_allocator(device, false), |
| 109 | state_tracker(gpu), | ||
| 110 | scheduler(device, state_tracker), | ||
| 106 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | 111 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, |
| 107 | render_window.GetFramebufferLayout().height, false), | 112 | render_window.GetFramebufferLayout().height, false), |
| 108 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | 113 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, |
| 109 | screen_info), | 114 | screen_info), |
| 110 | rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, | 115 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, |
| 111 | memory_allocator, state_tracker, scheduler) { | 116 | state_tracker, scheduler) { |
| 112 | Report(); | 117 | Report(); |
| 113 | } catch (const vk::Exception& exception) { | 118 | } catch (const vk::Exception& exception) { |
| 114 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | 119 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index c249b34d4..301cbbabe 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -14,7 +14,7 @@ namespace Vulkan { | |||
| 14 | InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) | 14 | InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) |
| 15 | : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} | 15 | : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} |
| 16 | 16 | ||
| 17 | InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_) | 17 | InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_) |
| 18 | : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} | 18 | : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} |
| 19 | 19 | ||
| 20 | InnerFence::~InnerFence() = default; | 20 | InnerFence::~InnerFence() = default; |
| @@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { | |||
| 52 | return std::make_shared<InnerFence>(scheduler, value, is_stubbed); | 52 | return std::make_shared<InnerFence>(scheduler, value, is_stubbed); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | 55 | Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) { |
| 56 | return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); | 56 | return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); |
| 57 | } | 57 | } |
| 58 | 58 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7c0bbd80a..ea9e88052 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -26,7 +26,7 @@ class Scheduler; | |||
| 26 | class InnerFence : public VideoCommon::FenceBase { | 26 | class InnerFence : public VideoCommon::FenceBase { |
| 27 | public: | 27 | public: |
| 28 | explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); | 28 | explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); |
| 29 | explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_); | 29 | explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_); |
| 30 | ~InnerFence(); | 30 | ~InnerFence(); |
| 31 | 31 | ||
| 32 | void Queue(); | 32 | void Queue(); |
| @@ -51,7 +51,7 @@ public: | |||
| 51 | 51 | ||
| 52 | protected: | 52 | protected: |
| 53 | Fence CreateFence(u32 value, bool is_stubbed) override; | 53 | Fence CreateFence(u32 value, bool is_stubbed) override; |
| 54 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | 54 | Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; |
| 55 | void QueueFence(Fence& fence) override; | 55 | void QueueFence(Fence& fence) override; |
| 56 | bool IsFenceSignaled(Fence& fence) const override; | 56 | bool IsFenceSignaled(Fence& fence) const override; |
| 57 | void WaitFence(Fence& fence) override; | 57 | void WaitFence(Fence& fence) override; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index accbfc8e1..b1e0b96c4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
| 259 | return std::memcmp(&rhs, this, Size()) == 0; | 259 | return std::memcmp(&rhs, this, Size()) == 0; |
| 260 | } | 260 | } |
| 261 | 261 | ||
| 262 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, | 262 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, |
| 263 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 264 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||
| 265 | Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | 263 | Scheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 266 | UpdateDescriptorQueue& update_descriptor_queue_, | 264 | UpdateDescriptorQueue& update_descriptor_queue_, |
| 267 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | 265 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 268 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) | 266 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |
| 269 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | 267 | : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, |
| 270 | device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | 268 | descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, |
| 271 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | 269 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, |
| 272 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, | 270 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, |
| 273 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, | 271 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, |
| 274 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), | 272 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), |
| 275 | serialization_thread(1, "VkPipelineSerialization") { | 273 | serialization_thread(1, "VkPipelineSerialization") { |
| @@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | |||
| 337 | current_pipeline = nullptr; | 335 | current_pipeline = nullptr; |
| 338 | return nullptr; | 336 | return nullptr; |
| 339 | } | 337 | } |
| 340 | graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), | 338 | graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(), |
| 341 | device.IsExtVertexInputDynamicStateSupported()); | 339 | device.IsExtVertexInputDynamicStateSupported()); |
| 342 | 340 | ||
| 343 | if (current_pipeline) { | 341 | if (current_pipeline) { |
| @@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { | |||
| 357 | if (!shader) { | 355 | if (!shader) { |
| 358 | return nullptr; | 356 | return nullptr; |
| 359 | } | 357 | } |
| 360 | const auto& qmd{kepler_compute.launch_description}; | 358 | const auto& qmd{kepler_compute->launch_description}; |
| 361 | const ComputePipelineCacheKey key{ | 359 | const ComputePipelineCacheKey key{ |
| 362 | .unique_hash = shader->unique_hash, | 360 | .unique_hash = shader->unique_hash, |
| 363 | .shared_memory_size = qmd.shared_alloc, | 361 | .shared_memory_size = qmd.shared_alloc, |
| @@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const | |||
| 486 | } | 484 | } |
| 487 | // If something is using depth, we can assume that games are not rendering anything which | 485 | // If something is using depth, we can assume that games are not rendering anything which |
| 488 | // will be used one time. | 486 | // will be used one time. |
| 489 | if (maxwell3d.regs.zeta_enable) { | 487 | if (maxwell3d->regs.zeta_enable) { |
| 490 | return nullptr; | 488 | return nullptr; |
| 491 | } | 489 | } |
| 492 | // If games are using a small index count, we can assume these are full screen quads. | 490 | // If games are using a small index count, we can assume these are full screen quads. |
| 493 | // Usually these shaders are only used once for building textures so we can assume they | 491 | // Usually these shaders are only used once for building textures so we can assume they |
| 494 | // can't be built async | 492 | // can't be built async |
| 495 | if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { | 493 | if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { |
| 496 | return pipeline; | 494 | return pipeline; |
| 497 | } | 495 | } |
| 498 | return nullptr; | 496 | return nullptr; |
| @@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 558 | } | 556 | } |
| 559 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | 557 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; |
| 560 | return std::make_unique<GraphicsPipeline>( | 558 | return std::make_unique<GraphicsPipeline>( |
| 561 | maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, | 559 | *maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, |
| 562 | descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, | 560 | descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, |
| 563 | std::move(modules), infos); | 561 | std::move(modules), infos); |
| 564 | 562 | ||
| @@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 592 | 590 | ||
| 593 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | 591 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 594 | const ComputePipelineCacheKey& key, const ShaderInfo* shader) { | 592 | const ComputePipelineCacheKey& key, const ShaderInfo* shader) { |
| 595 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 593 | const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()}; |
| 596 | const auto& qmd{kepler_compute.launch_description}; | 594 | const auto& qmd{kepler_compute->launch_description}; |
| 597 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | 595 | ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; |
| 598 | env.SetCachedSize(shader->size_bytes); | 596 | env.SetCachedSize(shader->size_bytes); |
| 599 | 597 | ||
| 600 | main_pools.ReleaseContents(); | 598 | main_pools.ReleaseContents(); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 127957dbf..61f9e9366 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -100,10 +100,8 @@ struct ShaderPools { | |||
| 100 | 100 | ||
| 101 | class PipelineCache : public VideoCommon::ShaderCache { | 101 | class PipelineCache : public VideoCommon::ShaderCache { |
| 102 | public: | 102 | public: |
| 103 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, | 103 | explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, |
| 104 | Tegra::Engines::KeplerCompute& kepler_compute, | 104 | DescriptorPool& descriptor_pool, |
| 105 | Tegra::MemoryManager& gpu_memory, const Device& device, | ||
| 106 | Scheduler& scheduler, DescriptorPool& descriptor_pool, | ||
| 107 | UpdateDescriptorQueue& update_descriptor_queue, | 105 | UpdateDescriptorQueue& update_descriptor_queue, |
| 108 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | 106 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 109 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); | 107 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2b859c6b8..393bbdf37 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | |||
| 65 | usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; | 65 | usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, | 68 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, |
| 69 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 69 | Scheduler& scheduler_) |
| 70 | const Device& device_, Scheduler& scheduler_) | 70 | : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, |
| 71 | : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, | ||
| 72 | query_pools{ | 71 | query_pools{ |
| 73 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 72 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, |
| 74 | } {} | 73 | } {} |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index b0d86c4f8..26762ee09 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -52,9 +52,8 @@ private: | |||
| 52 | class QueryCache final | 52 | class QueryCache final |
| 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 54 | public: | 54 | public: |
| 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, | 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, |
| 56 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 56 | Scheduler& scheduler_); |
| 57 | const Device& device_, Scheduler& scheduler_); | ||
| 58 | ~QueryCache(); | 57 | ~QueryCache(); |
| 59 | 58 | ||
| 60 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); | 59 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7e40c2df1..5d9ff0589 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/microprofile.h" | 11 | #include "common/microprofile.h" |
| 12 | #include "common/scope_exit.h" | 12 | #include "common/scope_exit.h" |
| 13 | #include "common/settings.h" | 13 | #include "common/settings.h" |
| 14 | #include "video_core/control/channel_state.h" | ||
| 14 | #include "video_core/engines/kepler_compute.h" | 15 | #include "video_core/engines/kepler_compute.h" |
| 15 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 16 | #include "video_core/renderer_vulkan/blit_image.h" | 17 | #include "video_core/renderer_vulkan/blit_image.h" |
| @@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan | |||
| 148 | } // Anonymous namespace | 149 | } // Anonymous namespace |
| 149 | 150 | ||
| 150 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 151 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 151 | Tegra::MemoryManager& gpu_memory_, | ||
| 152 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | 152 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, |
| 153 | const Device& device_, MemoryAllocator& memory_allocator_, | 153 | const Device& device_, MemoryAllocator& memory_allocator_, |
| 154 | StateTracker& state_tracker_, Scheduler& scheduler_) | 154 | StateTracker& state_tracker_, Scheduler& scheduler_) |
| 155 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, | 155 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, |
| 156 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, | 156 | memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 157 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, | ||
| 158 | state_tracker{state_tracker_}, scheduler{scheduler_}, | ||
| 159 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 157 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 160 | update_descriptor_queue(device, scheduler), | 158 | update_descriptor_queue(device, scheduler), |
| 161 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 159 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| @@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 165 | memory_allocator, staging_pool, | 163 | memory_allocator, staging_pool, |
| 166 | blit_image, astc_decoder_pass, | 164 | blit_image, astc_decoder_pass, |
| 167 | render_pass_cache}, | 165 | render_pass_cache}, |
| 168 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 166 | texture_cache(texture_cache_runtime, *this), |
| 169 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 167 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 170 | update_descriptor_queue, descriptor_pool), | 168 | update_descriptor_queue, descriptor_pool), |
| 171 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 169 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 172 | pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 170 | pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 173 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, | 171 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 174 | texture_cache, gpu.ShaderNotify()), | 172 | query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache}, |
| 175 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | ||
| 176 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 173 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 177 | wfi_event(device.GetLogical().CreateEvent()) { | 174 | wfi_event(device.GetLogical().CreateEvent()) { |
| 178 | scheduler.SetQueryCache(query_cache); | 175 | scheduler.SetQueryCache(query_cache); |
| @@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 199 | 196 | ||
| 200 | UpdateDynamicStates(); | 197 | UpdateDynamicStates(); |
| 201 | 198 | ||
| 202 | const auto& regs{maxwell3d.regs}; | 199 | const auto& regs{maxwell3d->regs}; |
| 203 | const u32 num_instances{maxwell3d.mme_draw.instance_count}; | 200 | const u32 num_instances{maxwell3d->mme_draw.instance_count}; |
| 204 | const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; | 201 | const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; |
| 205 | scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { | 202 | scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { |
| 206 | if (draw_params.is_indexed) { | 203 | if (draw_params.is_indexed) { |
| @@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 218 | void RasterizerVulkan::Clear() { | 215 | void RasterizerVulkan::Clear() { |
| 219 | MICROPROFILE_SCOPE(Vulkan_Clearing); | 216 | MICROPROFILE_SCOPE(Vulkan_Clearing); |
| 220 | 217 | ||
| 221 | if (!maxwell3d.ShouldExecute()) { | 218 | if (!maxwell3d->ShouldExecute()) { |
| 222 | return; | 219 | return; |
| 223 | } | 220 | } |
| 224 | FlushWork(); | 221 | FlushWork(); |
| 225 | 222 | ||
| 226 | query_cache.UpdateCounters(); | 223 | query_cache.UpdateCounters(); |
| 227 | 224 | ||
| 228 | auto& regs = maxwell3d.regs; | 225 | auto& regs = maxwell3d->regs; |
| 229 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 226 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 230 | regs.clear_buffers.A; | 227 | regs.clear_buffers.A; |
| 231 | const bool use_depth = regs.clear_buffers.Z; | 228 | const bool use_depth = regs.clear_buffers.Z; |
| @@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() { | |||
| 339 | return; | 336 | return; |
| 340 | } | 337 | } |
| 341 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | 338 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; |
| 342 | pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); | 339 | pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); |
| 343 | 340 | ||
| 344 | const auto& qmd{kepler_compute.launch_description}; | 341 | const auto& qmd{kepler_compute->launch_description}; |
| 345 | const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; | 342 | const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; |
| 346 | scheduler.RequestOutsideRenderPassOperationContext(); | 343 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 347 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | 344 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); |
| @@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { | |||
| 451 | 448 | ||
| 452 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 449 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 453 | if (!gpu.IsAsync()) { | 450 | if (!gpu.IsAsync()) { |
| 454 | gpu_memory.Write<u32>(addr, value); | 451 | gpu_memory->Write<u32>(addr, value); |
| 455 | return; | 452 | return; |
| 456 | } | 453 | } |
| 457 | fence_manager.SignalSemaphore(addr, value); | 454 | auto paddr = gpu_memory->GetPointer(addr); |
| 455 | fence_manager.SignalSemaphore(paddr, value); | ||
| 458 | } | 456 | } |
| 459 | 457 | ||
| 460 | void RasterizerVulkan::SignalSyncPoint(u32 value) { | 458 | void RasterizerVulkan::SignalSyncPoint(u32 value) { |
| @@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() | |||
| 553 | 551 | ||
| 554 | void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 552 | void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 555 | std::span<u8> memory) { | 553 | std::span<u8> memory) { |
| 556 | auto cpu_addr = gpu_memory.GpuToCpuAddress(address); | 554 | auto cpu_addr = gpu_memory->GpuToCpuAddress(address); |
| 557 | if (!cpu_addr) [[unlikely]] { | 555 | if (!cpu_addr) [[unlikely]] { |
| 558 | gpu_memory.WriteBlock(address, memory.data(), copy_size); | 556 | gpu_memory->WriteBlock(address, memory.data(), copy_size); |
| 559 | return; | 557 | return; |
| 560 | } | 558 | } |
| 561 | gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); | 559 | gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); |
| 562 | { | 560 | { |
| 563 | std::unique_lock<std::mutex> lock{buffer_cache.mutex}; | 561 | std::unique_lock<std::mutex> lock{buffer_cache.mutex}; |
| 564 | if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { | 562 | if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { |
| @@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | |||
| 627 | } | 625 | } |
| 628 | 626 | ||
| 629 | void RasterizerVulkan::UpdateDynamicStates() { | 627 | void RasterizerVulkan::UpdateDynamicStates() { |
| 630 | auto& regs = maxwell3d.regs; | 628 | auto& regs = maxwell3d->regs; |
| 631 | UpdateViewportsState(regs); | 629 | UpdateViewportsState(regs); |
| 632 | UpdateScissorsState(regs); | 630 | UpdateScissorsState(regs); |
| 633 | UpdateDepthBias(regs); | 631 | UpdateDepthBias(regs); |
| @@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 651 | } | 649 | } |
| 652 | 650 | ||
| 653 | void RasterizerVulkan::BeginTransformFeedback() { | 651 | void RasterizerVulkan::BeginTransformFeedback() { |
| 654 | const auto& regs = maxwell3d.regs; | 652 | const auto& regs = maxwell3d->regs; |
| 655 | if (regs.tfb_enabled == 0) { | 653 | if (regs.tfb_enabled == 0) { |
| 656 | return; | 654 | return; |
| 657 | } | 655 | } |
| @@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 667 | } | 665 | } |
| 668 | 666 | ||
| 669 | void RasterizerVulkan::EndTransformFeedback() { | 667 | void RasterizerVulkan::EndTransformFeedback() { |
| 670 | const auto& regs = maxwell3d.regs; | 668 | const auto& regs = maxwell3d->regs; |
| 671 | if (regs.tfb_enabled == 0) { | 669 | if (regs.tfb_enabled == 0) { |
| 672 | return; | 670 | return; |
| 673 | } | 671 | } |
| @@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 917 | } | 915 | } |
| 918 | 916 | ||
| 919 | void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { | 917 | void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 920 | auto& dirty{maxwell3d.dirty.flags}; | 918 | auto& dirty{maxwell3d->dirty.flags}; |
| 921 | if (!dirty[Dirty::VertexInput]) { | 919 | if (!dirty[Dirty::VertexInput]) { |
| 922 | return; | 920 | return; |
| 923 | } | 921 | } |
| @@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 974 | }); | 972 | }); |
| 975 | } | 973 | } |
| 976 | 974 | ||
| 975 | void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { | ||
| 976 | CreateChannel(channel); | ||
| 977 | { | ||
| 978 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 979 | texture_cache.CreateChannel(channel); | ||
| 980 | buffer_cache.CreateChannel(channel); | ||
| 981 | } | ||
| 982 | pipeline_cache.CreateChannel(channel); | ||
| 983 | query_cache.CreateChannel(channel); | ||
| 984 | state_tracker.SetupTables(channel); | ||
| 985 | } | ||
| 986 | |||
| 987 | void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { | ||
| 988 | const s32 channel_id = channel.bind_id; | ||
| 989 | BindToChannel(channel_id); | ||
| 990 | { | ||
| 991 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 992 | texture_cache.BindToChannel(channel_id); | ||
| 993 | buffer_cache.BindToChannel(channel_id); | ||
| 994 | } | ||
| 995 | pipeline_cache.BindToChannel(channel_id); | ||
| 996 | query_cache.BindToChannel(channel_id); | ||
| 997 | state_tracker.ChangeChannel(channel); | ||
| 998 | scheduler.InvalidateState(); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | void RasterizerVulkan::ReleaseChannel(s32 channel_id) { | ||
| 1002 | EraseChannel(channel_id); | ||
| 1003 | { | ||
| 1004 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 1005 | texture_cache.EraseChannel(channel_id); | ||
| 1006 | buffer_cache.EraseChannel(channel_id); | ||
| 1007 | } | ||
| 1008 | pipeline_cache.EraseChannel(channel_id); | ||
| 1009 | query_cache.EraseChannel(channel_id); | ||
| 1010 | } | ||
| 1011 | |||
| 977 | } // namespace Vulkan | 1012 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 0370ea39b..642fe6576 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <boost/container/static_vector.hpp> | 8 | #include <boost/container/static_vector.hpp> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/control/channel_state_cache.h" | ||
| 11 | #include "video_core/engines/maxwell_dma.h" | 12 | #include "video_core/engines/maxwell_dma.h" |
| 12 | #include "video_core/rasterizer_accelerated.h" | 13 | #include "video_core/rasterizer_accelerated.h" |
| 13 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| @@ -54,13 +55,13 @@ private: | |||
| 54 | BufferCache& buffer_cache; | 55 | BufferCache& buffer_cache; |
| 55 | }; | 56 | }; |
| 56 | 57 | ||
| 57 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 58 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, |
| 59 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 58 | public: | 60 | public: |
| 59 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 61 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 60 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 62 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, |
| 61 | ScreenInfo& screen_info_, const Device& device_, | 63 | const Device& device_, MemoryAllocator& memory_allocator_, |
| 62 | MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, | 64 | StateTracker& state_tracker_, Scheduler& scheduler_); |
| 63 | Scheduler& scheduler_); | ||
| 64 | ~RasterizerVulkan() override; | 65 | ~RasterizerVulkan() override; |
| 65 | 66 | ||
| 66 | void Draw(bool is_indexed, bool is_instanced) override; | 67 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -99,6 +100,12 @@ public: | |||
| 99 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 100 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 100 | const VideoCore::DiskResourceLoadCallback& callback) override; | 101 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 101 | 102 | ||
| 103 | void InitializeChannel(Tegra::Control::ChannelState& channel) override; | ||
| 104 | |||
| 105 | void BindChannel(Tegra::Control::ChannelState& channel) override; | ||
| 106 | |||
| 107 | void ReleaseChannel(s32 channel_id) override; | ||
| 108 | |||
| 102 | private: | 109 | private: |
| 103 | static constexpr size_t MAX_TEXTURES = 192; | 110 | static constexpr size_t MAX_TEXTURES = 192; |
| 104 | static constexpr size_t MAX_IMAGES = 48; | 111 | static constexpr size_t MAX_IMAGES = 48; |
| @@ -134,9 +141,6 @@ private: | |||
| 134 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | 141 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); |
| 135 | 142 | ||
| 136 | Tegra::GPU& gpu; | 143 | Tegra::GPU& gpu; |
| 137 | Tegra::MemoryManager& gpu_memory; | ||
| 138 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 139 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 140 | 144 | ||
| 141 | ScreenInfo& screen_info; | 145 | ScreenInfo& screen_info; |
| 142 | const Device& device; | 146 | const Device& device; |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 9ad096431..a87bf8dd3 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "video_core/control/channel_state.h" | ||
| 10 | #include "video_core/dirty_flags.h" | 11 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| @@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) { | |||
| 174 | } | 175 | } |
| 175 | } // Anonymous namespace | 176 | } // Anonymous namespace |
| 176 | 177 | ||
| 177 | StateTracker::StateTracker(Tegra::GPU& gpu) | 178 | void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) { |
| 178 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 179 | auto& tables{channel_state.maxwell_3d->dirty.tables}; |
| 179 | auto& tables{gpu.Maxwell3D().dirty.tables}; | ||
| 180 | SetupDirtyFlags(tables); | 180 | SetupDirtyFlags(tables); |
| 181 | SetupDirtyViewports(tables); | 181 | SetupDirtyViewports(tables); |
| 182 | SetupDirtyScissors(tables); | 182 | SetupDirtyScissors(tables); |
| @@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) | |||
| 199 | SetupDirtyVertexBindings(tables); | 199 | SetupDirtyVertexBindings(tables); |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) { | ||
| 203 | flags = &channel_state.maxwell_3d->dirty.flags; | ||
| 204 | } | ||
| 205 | |||
| 206 | StateTracker::StateTracker(Tegra::GPU& gpu) | ||
| 207 | : flags{}, invalidation_flags{MakeInvalidationFlags()} {} | ||
| 208 | |||
| 202 | } // namespace Vulkan | 209 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index a85bc1c10..9f8a887f9 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -10,6 +10,12 @@ | |||
| 10 | #include "video_core/dirty_flags.h" | 10 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | ||
| 14 | namespace Control { | ||
| 15 | struct ChannelState; | ||
| 16 | } | ||
| 17 | } // namespace Tegra | ||
| 18 | |||
| 13 | namespace Vulkan { | 19 | namespace Vulkan { |
| 14 | 20 | ||
| 15 | namespace Dirty { | 21 | namespace Dirty { |
| @@ -56,16 +62,16 @@ public: | |||
| 56 | explicit StateTracker(Tegra::GPU& gpu); | 62 | explicit StateTracker(Tegra::GPU& gpu); |
| 57 | 63 | ||
| 58 | void InvalidateCommandBufferState() { | 64 | void InvalidateCommandBufferState() { |
| 59 | flags |= invalidation_flags; | 65 | (*flags) |= invalidation_flags; |
| 60 | current_topology = INVALID_TOPOLOGY; | 66 | current_topology = INVALID_TOPOLOGY; |
| 61 | } | 67 | } |
| 62 | 68 | ||
| 63 | void InvalidateViewports() { | 69 | void InvalidateViewports() { |
| 64 | flags[Dirty::Viewports] = true; | 70 | (*flags)[Dirty::Viewports] = true; |
| 65 | } | 71 | } |
| 66 | 72 | ||
| 67 | void InvalidateScissors() { | 73 | void InvalidateScissors() { |
| 68 | flags[Dirty::Scissors] = true; | 74 | (*flags)[Dirty::Scissors] = true; |
| 69 | } | 75 | } |
| 70 | 76 | ||
| 71 | bool TouchViewports() { | 77 | bool TouchViewports() { |
| @@ -139,16 +145,20 @@ public: | |||
| 139 | return has_changed; | 145 | return has_changed; |
| 140 | } | 146 | } |
| 141 | 147 | ||
| 148 | void SetupTables(Tegra::Control::ChannelState& channel_state); | ||
| 149 | |||
| 150 | void ChangeChannel(Tegra::Control::ChannelState& channel_state); | ||
| 151 | |||
| 142 | private: | 152 | private: |
| 143 | static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); | 153 | static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); |
| 144 | 154 | ||
| 145 | bool Exchange(std::size_t id, bool new_value) const noexcept { | 155 | bool Exchange(std::size_t id, bool new_value) const noexcept { |
| 146 | const bool is_dirty = flags[id]; | 156 | const bool is_dirty = (*flags)[id]; |
| 147 | flags[id] = new_value; | 157 | (*flags)[id] = new_value; |
| 148 | return is_dirty; | 158 | return is_dirty; |
| 149 | } | 159 | } |
| 150 | 160 | ||
| 151 | Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; | 161 | Tegra::Engines::Maxwell3D::DirtyState::Flags* flags; |
| 152 | Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; | 162 | Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; |
| 153 | Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; | 163 | Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; |
| 154 | }; | 164 | }; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 164e4ee0e..f53066579 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | 9 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 10 | #include "shader_recompiler/object_pool.h" | 10 | #include "shader_recompiler/object_pool.h" |
| 11 | #include "video_core/control/channel_state.h" | ||
| 11 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 12 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 13 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| @@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() { | |||
| 33 | RemovePendingShaders(); | 34 | RemovePendingShaders(); |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | 37 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} |
| 37 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 38 | Tegra::Engines::KeplerCompute& kepler_compute_) | ||
| 39 | : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||
| 40 | rasterizer{rasterizer_} {} | ||
| 41 | 38 | ||
| 42 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | 39 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { |
| 43 | auto& dirty{maxwell3d.dirty.flags}; | 40 | auto& dirty{maxwell3d->dirty.flags}; |
| 44 | if (!dirty[VideoCommon::Dirty::Shaders]) { | 41 | if (!dirty[VideoCommon::Dirty::Shaders]) { |
| 45 | return last_shaders_valid; | 42 | return last_shaders_valid; |
| 46 | } | 43 | } |
| 47 | dirty[VideoCommon::Dirty::Shaders] = false; | 44 | dirty[VideoCommon::Dirty::Shaders] = false; |
| 48 | 45 | ||
| 49 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 46 | const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()}; |
| 50 | for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { | 47 | for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { |
| 51 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | 48 | if (!maxwell3d->regs.IsShaderConfigEnabled(index)) { |
| 52 | unique_hashes[index] = 0; | 49 | unique_hashes[index] = 0; |
| 53 | continue; | 50 | continue; |
| 54 | } | 51 | } |
| 55 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | 52 | const auto& shader_config{maxwell3d->regs.shader_config[index]}; |
| 56 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | 53 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; |
| 57 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | 54 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; |
| 58 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | 55 | const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)}; |
| 59 | if (!cpu_shader_addr) { | 56 | if (!cpu_shader_addr) { |
| 60 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | 57 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); |
| 61 | last_shaders_valid = false; | 58 | last_shaders_valid = false; |
| @@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | |||
| 64 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | 61 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; |
| 65 | if (!shader_info) { | 62 | if (!shader_info) { |
| 66 | const u32 start_address{shader_config.offset}; | 63 | const u32 start_address{shader_config.offset}; |
| 67 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | 64 | GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address}; |
| 68 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); | 65 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); |
| 69 | } | 66 | } |
| 70 | shader_infos[index] = shader_info; | 67 | shader_infos[index] = shader_info; |
| @@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | |||
| 75 | } | 72 | } |
| 76 | 73 | ||
| 77 | const ShaderInfo* ShaderCache::ComputeShader() { | 74 | const ShaderInfo* ShaderCache::ComputeShader() { |
| 78 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 75 | const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()}; |
| 79 | const auto& qmd{kepler_compute.launch_description}; | 76 | const auto& qmd{kepler_compute->launch_description}; |
| 80 | const GPUVAddr shader_addr{program_base + qmd.program_start}; | 77 | const GPUVAddr shader_addr{program_base + qmd.program_start}; |
| 81 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | 78 | const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)}; |
| 82 | if (!cpu_shader_addr) { | 79 | if (!cpu_shader_addr) { |
| 83 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | 80 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); |
| 84 | return nullptr; | 81 | return nullptr; |
| @@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() { | |||
| 86 | if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { | 83 | if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { |
| 87 | return shader; | 84 | return shader; |
| 88 | } | 85 | } |
| 89 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | 86 | ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start}; |
| 90 | return MakeShaderInfo(env, *cpu_shader_addr); | 87 | return MakeShaderInfo(env, *cpu_shader_addr); |
| 91 | } | 88 | } |
| 92 | 89 | ||
| 93 | void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, | 90 | void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, |
| 94 | const std::array<u64, NUM_PROGRAMS>& unique_hashes) { | 91 | const std::array<u64, NUM_PROGRAMS>& unique_hashes) { |
| 95 | size_t env_index{}; | 92 | size_t env_index{}; |
| 96 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 93 | const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()}; |
| 97 | for (size_t index = 0; index < NUM_PROGRAMS; ++index) { | 94 | for (size_t index = 0; index < NUM_PROGRAMS; ++index) { |
| 98 | if (unique_hashes[index] == 0) { | 95 | if (unique_hashes[index] == 0) { |
| 99 | continue; | 96 | continue; |
| 100 | } | 97 | } |
| 101 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | 98 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; |
| 102 | auto& env{result.envs[index]}; | 99 | auto& env{result.envs[index]}; |
| 103 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | 100 | const u32 start_address{maxwell3d->regs.shader_config[index].offset}; |
| 104 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | 101 | env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address}; |
| 105 | env.SetCachedSize(shader_infos[index]->size_bytes); | 102 | env.SetCachedSize(shader_infos[index]->size_bytes); |
| 106 | result.env_ptrs[env_index++] = &env; | 103 | result.env_ptrs[env_index++] = &env; |
| 107 | } | 104 | } |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index f67cea8c4..a4391202d 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | 13 | ||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "video_core/control/channel_state_cache.h" | ||
| 15 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 16 | #include "video_core/shader_environment.h" | 17 | #include "video_core/shader_environment.h" |
| 17 | 18 | ||
| @@ -19,6 +20,10 @@ namespace Tegra { | |||
| 19 | class MemoryManager; | 20 | class MemoryManager; |
| 20 | } | 21 | } |
| 21 | 22 | ||
| 23 | namespace Tegra::Control { | ||
| 24 | struct ChannelState; | ||
| 25 | } | ||
| 26 | |||
| 22 | namespace VideoCommon { | 27 | namespace VideoCommon { |
| 23 | 28 | ||
| 24 | class GenericEnvironment; | 29 | class GenericEnvironment; |
| @@ -28,7 +33,7 @@ struct ShaderInfo { | |||
| 28 | size_t size_bytes{}; | 33 | size_t size_bytes{}; |
| 29 | }; | 34 | }; |
| 30 | 35 | ||
| 31 | class ShaderCache { | 36 | class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 32 | static constexpr u64 YUZU_PAGEBITS = 14; | 37 | static constexpr u64 YUZU_PAGEBITS = 14; |
| 33 | static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS; | 38 | static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS; |
| 34 | 39 | ||
| @@ -71,9 +76,7 @@ protected: | |||
| 71 | } | 76 | } |
| 72 | }; | 77 | }; |
| 73 | 78 | ||
| 74 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | 79 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); |
| 75 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 76 | Tegra::Engines::KeplerCompute& kepler_compute_); | ||
| 77 | 80 | ||
| 78 | /// @brief Update the hashes and information of shader stages | 81 | /// @brief Update the hashes and information of shader stages |
| 79 | /// @param unique_hashes Shader hashes to store into when a stage is enabled | 82 | /// @param unique_hashes Shader hashes to store into when a stage is enabled |
| @@ -88,10 +91,6 @@ protected: | |||
| 88 | void GetGraphicsEnvironments(GraphicsEnvironments& result, | 91 | void GetGraphicsEnvironments(GraphicsEnvironments& result, |
| 89 | const std::array<u64, NUM_PROGRAMS>& unique_hashes); | 92 | const std::array<u64, NUM_PROGRAMS>& unique_hashes); |
| 90 | 93 | ||
| 91 | Tegra::MemoryManager& gpu_memory; | ||
| 92 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 93 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 94 | |||
| 95 | std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; | 94 | std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; |
| 96 | bool last_shaders_valid = false; | 95 | bool last_shaders_valid = false; |
| 97 | 96 | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1f85ec9da..620565684 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -88,6 +88,9 @@ struct ImageBase { | |||
| 88 | u32 scale_rating = 0; | 88 | u32 scale_rating = 0; |
| 89 | u64 scale_tick = 0; | 89 | u64 scale_tick = 0; |
| 90 | bool has_scaled = false; | 90 | bool has_scaled = false; |
| 91 | |||
| 92 | size_t channel = 0; | ||
| 93 | |||
| 91 | ImageFlagBits flags = ImageFlagBits::CpuModified; | 94 | ImageFlagBits flags = ImageFlagBits::CpuModified; |
| 92 | 95 | ||
| 93 | GPUVAddr gpu_addr = 0; | 96 | GPUVAddr gpu_addr = 0; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1dbe01bc0..2731aead0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/alignment.h" | 8 | #include "common/alignment.h" |
| 9 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 10 | #include "video_core/control/channel_state.h" | ||
| 10 | #include "video_core/dirty_flags.h" | 11 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | 12 | #include "video_core/engines/kepler_compute.h" |
| 12 | #include "video_core/texture_cache/image_view_base.h" | 13 | #include "video_core/texture_cache/image_view_base.h" |
| @@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 29 | using namespace Common::Literals; | 30 | using namespace Common::Literals; |
| 30 | 31 | ||
| 31 | template <class P> | 32 | template <class P> |
| 32 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | 33 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) |
| 33 | Tegra::Engines::Maxwell3D& maxwell3d_, | 34 | : runtime{runtime_}, rasterizer{rasterizer_} { |
| 34 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 35 | Tegra::MemoryManager& gpu_memory_) | ||
| 36 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 37 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 38 | // Configure null sampler | 35 | // Configure null sampler |
| 39 | TSCEntry sampler_descriptor{}; | 36 | TSCEntry sampler_descriptor{}; |
| 40 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | 37 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
| @@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 42 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | 39 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); |
| 43 | sampler_descriptor.cubemap_anisotropy.Assign(1); | 40 | sampler_descriptor.cubemap_anisotropy.Assign(1); |
| 44 | 41 | ||
| 42 | // Setup channels | ||
| 43 | current_channel_id = UNSET_CHANNEL; | ||
| 44 | state = nullptr; | ||
| 45 | maxwell3d = nullptr; | ||
| 46 | kepler_compute = nullptr; | ||
| 47 | gpu_memory = nullptr; | ||
| 48 | |||
| 45 | // Make sure the first index is reserved for the null resources | 49 | // Make sure the first index is reserved for the null resources |
| 46 | // This way the null resource becomes a compile time constant | 50 | // This way the null resource becomes a compile time constant |
| 47 | void(slot_images.insert(NullImageParams{})); | 51 | void(slot_images.insert(NullImageParams{})); |
| @@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 93 | const auto copies = FullDownloadCopies(image.info); | 97 | const auto copies = FullDownloadCopies(image.info); |
| 94 | image.DownloadMemory(map, copies); | 98 | image.DownloadMemory(map, copies); |
| 95 | runtime.Finish(); | 99 | runtime.Finish(); |
| 96 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 100 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 97 | } | 101 | } |
| 98 | if (True(image.flags & ImageFlagBits::Tracked)) { | 102 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 99 | UntrackImage(image, image_id); | 103 | UntrackImage(image, image_id); |
| @@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept { | |||
| 152 | template <class P> | 156 | template <class P> |
| 153 | template <bool has_blacklists> | 157 | template <bool has_blacklists> |
| 154 | void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { | 158 | void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { |
| 155 | FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); | 159 | FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids, |
| 160 | views); | ||
| 156 | } | 161 | } |
| 157 | 162 | ||
| 158 | template <class P> | 163 | template <class P> |
| 159 | void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { | 164 | void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { |
| 160 | FillImageViews<true>(compute_image_table, compute_image_view_ids, views); | 165 | FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views); |
| 161 | } | 166 | } |
| 162 | 167 | ||
| 163 | template <class P> | 168 | template <class P> |
| 164 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | 169 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
| 165 | if (index > graphics_sampler_table.Limit()) { | 170 | if (index > state->graphics_sampler_table.Limit()) { |
| 166 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | 171 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
| 167 | return &slot_samplers[NULL_SAMPLER_ID]; | 172 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 168 | } | 173 | } |
| 169 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | 174 | const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); |
| 170 | SamplerId& id = graphics_sampler_ids[index]; | 175 | SamplerId& id = state->graphics_sampler_ids[index]; |
| 171 | if (is_new) { | 176 | if (is_new) { |
| 172 | id = FindSampler(descriptor); | 177 | id = FindSampler(descriptor); |
| 173 | } | 178 | } |
| @@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | |||
| 176 | 181 | ||
| 177 | template <class P> | 182 | template <class P> |
| 178 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | 183 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 179 | if (index > compute_sampler_table.Limit()) { | 184 | if (index > state->compute_sampler_table.Limit()) { |
| 180 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | 185 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
| 181 | return &slot_samplers[NULL_SAMPLER_ID]; | 186 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 182 | } | 187 | } |
| 183 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | 188 | const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); |
| 184 | SamplerId& id = compute_sampler_ids[index]; | 189 | SamplerId& id = state->compute_sampler_ids[index]; |
| 185 | if (is_new) { | 190 | if (is_new) { |
| 186 | id = FindSampler(descriptor); | 191 | id = FindSampler(descriptor); |
| 187 | } | 192 | } |
| @@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | |||
| 191 | template <class P> | 196 | template <class P> |
| 192 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { | 197 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
| 193 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; | 198 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; |
| 194 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; | 199 | const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
| 195 | const u32 tic_limit = maxwell3d.regs.tic.limit; | 200 | const u32 tic_limit = maxwell3d->regs.tic.limit; |
| 196 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; | 201 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; |
| 197 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { | 202 | if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { |
| 198 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | 203 | state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 199 | } | 204 | } |
| 200 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | 205 | if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { |
| 201 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | 206 | state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
| 202 | } | 207 | } |
| 203 | } | 208 | } |
| 204 | 209 | ||
| 205 | template <class P> | 210 | template <class P> |
| 206 | void TextureCache<P>::SynchronizeComputeDescriptors() { | 211 | void TextureCache<P>::SynchronizeComputeDescriptors() { |
| 207 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; | 212 | const bool linked_tsc = kepler_compute->launch_description.linked_tsc; |
| 208 | const u32 tic_limit = kepler_compute.regs.tic.limit; | 213 | const u32 tic_limit = kepler_compute->regs.tic.limit; |
| 209 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; | 214 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; |
| 210 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); | 215 | const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); |
| 211 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { | 216 | if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
| 212 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | 217 | state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 213 | } | 218 | } |
| 214 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | 219 | if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { |
| 215 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | 220 | state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
| 216 | } | 221 | } |
| 217 | } | 222 | } |
| 218 | 223 | ||
| 219 | template <class P> | 224 | template <class P> |
| 220 | bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | 225 | bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { |
| 221 | auto& flags = maxwell3d.dirty.flags; | 226 | auto& flags = maxwell3d->dirty.flags; |
| 222 | u32 scale_rating = 0; | 227 | u32 scale_rating = 0; |
| 223 | bool rescaled = false; | 228 | bool rescaled = false; |
| 224 | std::array<ImageId, NUM_RT> tmp_color_images{}; | 229 | std::array<ImageId, NUM_RT> tmp_color_images{}; |
| @@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | |||
| 315 | template <class P> | 320 | template <class P> |
| 316 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | 321 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 317 | using namespace VideoCommon::Dirty; | 322 | using namespace VideoCommon::Dirty; |
| 318 | auto& flags = maxwell3d.dirty.flags; | 323 | auto& flags = maxwell3d->dirty.flags; |
| 319 | if (!flags[Dirty::RenderTargets]) { | 324 | if (!flags[Dirty::RenderTargets]) { |
| 320 | for (size_t index = 0; index < NUM_RT; ++index) { | 325 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 321 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | 326 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| @@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 342 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | 347 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); |
| 343 | 348 | ||
| 344 | for (size_t index = 0; index < NUM_RT; ++index) { | 349 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 345 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); | 350 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index)); |
| 346 | } | 351 | } |
| 347 | u32 up_scale = 1; | 352 | u32 up_scale = 1; |
| 348 | u32 down_shift = 0; | 353 | u32 down_shift = 0; |
| @@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 351 | down_shift = Settings::values.resolution_info.down_shift; | 356 | down_shift = Settings::values.resolution_info.down_shift; |
| 352 | } | 357 | } |
| 353 | render_targets.size = Extent2D{ | 358 | render_targets.size = Extent2D{ |
| 354 | (maxwell3d.regs.render_area.width * up_scale) >> down_shift, | 359 | (maxwell3d->regs.render_area.width * up_scale) >> down_shift, |
| 355 | (maxwell3d.regs.render_area.height * up_scale) >> down_shift, | 360 | (maxwell3d->regs.render_area.height * up_scale) >> down_shift, |
| 356 | }; | 361 | }; |
| 357 | 362 | ||
| 358 | flags[Dirty::DepthBiasGlobal] = true; | 363 | flags[Dirty::DepthBiasGlobal] = true; |
| @@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 458 | const auto copies = FullDownloadCopies(image.info); | 463 | const auto copies = FullDownloadCopies(image.info); |
| 459 | image.DownloadMemory(map, copies); | 464 | image.DownloadMemory(map, copies); |
| 460 | runtime.Finish(); | 465 | runtime.Finish(); |
| 461 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 466 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 462 | } | 467 | } |
| 463 | } | 468 | } |
| 464 | 469 | ||
| @@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 655 | for (const ImageId image_id : download_ids) { | 660 | for (const ImageId image_id : download_ids) { |
| 656 | const ImageBase& image = slot_images[image_id]; | 661 | const ImageBase& image = slot_images[image_id]; |
| 657 | const auto copies = FullDownloadCopies(image.info); | 662 | const auto copies = FullDownloadCopies(image.info); |
| 658 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | 663 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); |
| 659 | download_map.offset += image.unswizzled_size_bytes; | 664 | download_map.offset += image.unswizzled_size_bytes; |
| 660 | download_span = download_span.subspan(image.unswizzled_size_bytes); | 665 | download_span = download_span.subspan(image.unswizzled_size_bytes); |
| 661 | } | 666 | } |
| @@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 714 | const GPUVAddr gpu_addr = image.gpu_addr; | 719 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 715 | 720 | ||
| 716 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | 721 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
| 717 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | 722 | gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
| 718 | const auto uploads = FullUploadSwizzles(image.info); | 723 | const auto uploads = FullUploadSwizzles(image.info); |
| 719 | runtime.AccelerateImageUpload(image, staging, uploads); | 724 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 720 | } else if (True(image.flags & ImageFlagBits::Converted)) { | 725 | } else if (True(image.flags & ImageFlagBits::Converted)) { |
| 721 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | 726 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |
| 722 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | 727 | auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); |
| 723 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 728 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
| 724 | image.UploadMemory(staging, copies); | 729 | image.UploadMemory(staging, copies); |
| 725 | } else { | 730 | } else { |
| 726 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | 731 | const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); |
| 727 | image.UploadMemory(staging, copies); | 732 | image.UploadMemory(staging, copies); |
| 728 | } | 733 | } |
| 729 | } | 734 | } |
| 730 | 735 | ||
| 731 | template <class P> | 736 | template <class P> |
| 732 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | 737 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 733 | if (!IsValidEntry(gpu_memory, config)) { | 738 | if (!IsValidEntry(*gpu_memory, config)) { |
| 734 | return NULL_IMAGE_VIEW_ID; | 739 | return NULL_IMAGE_VIEW_ID; |
| 735 | } | 740 | } |
| 736 | const auto [pair, is_new] = image_views.try_emplace(config); | 741 | const auto [pair, is_new] = state->image_views.try_emplace(config); |
| 737 | ImageViewId& image_view_id = pair->second; | 742 | ImageViewId& image_view_id = pair->second; |
| 738 | if (is_new) { | 743 | if (is_new) { |
| 739 | image_view_id = CreateImageView(config); | 744 | image_view_id = CreateImageView(config); |
| @@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| 777 | template <class P> | 782 | template <class P> |
| 778 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 783 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 779 | RelaxedOptions options) { | 784 | RelaxedOptions options) { |
| 780 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 785 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 781 | if (!cpu_addr) { | 786 | if (!cpu_addr) { |
| 782 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | 787 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 783 | if (!cpu_addr) { | 788 | if (!cpu_addr) { |
| 784 | return ImageId{}; | 789 | return ImageId{}; |
| 785 | } | 790 | } |
| @@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) { | |||
| 860 | image.scale_tick = frame_tick + 1; | 865 | image.scale_tick = frame_tick + 1; |
| 861 | } | 866 | } |
| 862 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | 867 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; |
| 863 | auto& dirty = maxwell3d.dirty.flags; | 868 | auto& dirty = maxwell3d->dirty.flags; |
| 864 | dirty[Dirty::RenderTargets] = true; | 869 | dirty[Dirty::RenderTargets] = true; |
| 865 | dirty[Dirty::ZetaBuffer] = true; | 870 | dirty[Dirty::ZetaBuffer] = true; |
| 866 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | 871 | for (size_t rt = 0; rt < NUM_RT; ++rt) { |
| @@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) { | |||
| 881 | image.image_view_ids.clear(); | 886 | image.image_view_ids.clear(); |
| 882 | image.image_view_infos.clear(); | 887 | image.image_view_infos.clear(); |
| 883 | if constexpr (ENABLE_VALIDATION) { | 888 | if constexpr (ENABLE_VALIDATION) { |
| 884 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | 889 | std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID); |
| 885 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | 890 | std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID); |
| 886 | } | 891 | } |
| 887 | graphics_image_table.Invalidate(); | 892 | state->graphics_image_table.Invalidate(); |
| 888 | compute_image_table.Invalidate(); | 893 | state->compute_image_table.Invalidate(); |
| 889 | has_deleted_images = true; | 894 | has_deleted_images = true; |
| 890 | } | 895 | } |
| 891 | 896 | ||
| @@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) { | |||
| 929 | template <class P> | 934 | template <class P> |
| 930 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 935 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 931 | RelaxedOptions options) { | 936 | RelaxedOptions options) { |
| 932 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 937 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 933 | if (!cpu_addr) { | 938 | if (!cpu_addr) { |
| 934 | const auto size = CalculateGuestSizeInBytes(info); | 939 | const auto size = CalculateGuestSizeInBytes(info); |
| 935 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | 940 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); |
| 936 | if (!cpu_addr) { | 941 | if (!cpu_addr) { |
| 937 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | 942 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; |
| 938 | virtual_invalid_space += Common::AlignUp(size, 32); | 943 | virtual_invalid_space += Common::AlignUp(size, 32); |
| @@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1050 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1055 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| 1051 | Image& new_image = slot_images[new_image_id]; | 1056 | Image& new_image = slot_images[new_image_id]; |
| 1052 | 1057 | ||
| 1053 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { | 1058 | if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { |
| 1054 | new_image.flags |= ImageFlagBits::Sparse; | 1059 | new_image.flags |= ImageFlagBits::Sparse; |
| 1055 | } | 1060 | } |
| 1056 | 1061 | ||
| @@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | |||
| 1192 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | 1197 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { |
| 1193 | return NULL_SAMPLER_ID; | 1198 | return NULL_SAMPLER_ID; |
| 1194 | } | 1199 | } |
| 1195 | const auto [pair, is_new] = samplers.try_emplace(config); | 1200 | const auto [pair, is_new] = state->samplers.try_emplace(config); |
| 1196 | if (is_new) { | 1201 | if (is_new) { |
| 1197 | pair->second = slot_samplers.insert(runtime, config); | 1202 | pair->second = slot_samplers.insert(runtime, config); |
| 1198 | } | 1203 | } |
| @@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | |||
| 1201 | 1206 | ||
| 1202 | template <class P> | 1207 | template <class P> |
| 1203 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | 1208 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
| 1204 | const auto& regs = maxwell3d.regs; | 1209 | const auto& regs = maxwell3d->regs; |
| 1205 | if (index >= regs.rt_control.count) { | 1210 | if (index >= regs.rt_control.count) { |
| 1206 | return ImageViewId{}; | 1211 | return ImageViewId{}; |
| 1207 | } | 1212 | } |
| @@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | |||
| 1219 | 1224 | ||
| 1220 | template <class P> | 1225 | template <class P> |
| 1221 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | 1226 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
| 1222 | const auto& regs = maxwell3d.regs; | 1227 | const auto& regs = maxwell3d->regs; |
| 1223 | if (!regs.zeta_enable) { | 1228 | if (!regs.zeta_enable) { |
| 1224 | return ImageViewId{}; | 1229 | return ImageViewId{}; |
| 1225 | } | 1230 | } |
| @@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu | |||
| 1321 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1326 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1322 | boost::container::small_vector<ImageId, 8> images; | 1327 | boost::container::small_vector<ImageId, 8> images; |
| 1323 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | 1328 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { |
| 1324 | const auto it = gpu_page_table.find(page); | 1329 | const auto it = state->gpu_page_table.find(page); |
| 1325 | if (it == gpu_page_table.end()) { | 1330 | if (it == state->gpu_page_table.end()) { |
| 1326 | if constexpr (BOOL_BREAK) { | 1331 | if constexpr (BOOL_BREAK) { |
| 1327 | return false; | 1332 | return false; |
| 1328 | } else { | 1333 | } else { |
| @@ -1403,9 +1408,9 @@ template <typename Func> | |||
| 1403 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | 1408 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { |
| 1404 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | 1409 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; |
| 1405 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | 1410 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; |
| 1406 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | 1411 | const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |
| 1407 | for (const auto& [gpu_addr, size] : segments) { | 1412 | for (const auto& [gpu_addr, size] : segments) { |
| 1408 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1413 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1409 | ASSERT(cpu_addr); | 1414 | ASSERT(cpu_addr); |
| 1410 | if constexpr (RETURNS_BOOL) { | 1415 | if constexpr (RETURNS_BOOL) { |
| 1411 | if (func(gpu_addr, *cpu_addr, size)) { | 1416 | if (func(gpu_addr, *cpu_addr, size)) { |
| @@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1449 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | 1454 | image.lru_index = lru_cache.Insert(image_id, frame_tick); |
| 1450 | 1455 | ||
| 1451 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1456 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| 1452 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | 1457 | [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); |
| 1453 | if (False(image.flags & ImageFlagBits::Sparse)) { | 1458 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1454 | auto map_id = | 1459 | auto map_id = |
| 1455 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | 1460 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); |
| @@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1497 | } | 1502 | } |
| 1498 | image_ids.erase(vector_it); | 1503 | image_ids.erase(vector_it); |
| 1499 | }; | 1504 | }; |
| 1500 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1505 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { |
| 1501 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | 1506 | clear_page_table(page, state->gpu_page_table); |
| 1507 | }); | ||
| 1502 | if (False(image.flags & ImageFlagBits::Sparse)) { | 1508 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1503 | const auto map_id = image.map_view_id; | 1509 | const auto map_id = image.map_view_id; |
| 1504 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | 1510 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { |
| @@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { | |||
| 1631 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | 1637 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); |
| 1632 | 1638 | ||
| 1633 | // Mark render targets as dirty | 1639 | // Mark render targets as dirty |
| 1634 | auto& dirty = maxwell3d.dirty.flags; | 1640 | auto& dirty = maxwell3d->dirty.flags; |
| 1635 | dirty[Dirty::RenderTargets] = true; | 1641 | dirty[Dirty::RenderTargets] = true; |
| 1636 | dirty[Dirty::ZetaBuffer] = true; | 1642 | dirty[Dirty::ZetaBuffer] = true; |
| 1637 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | 1643 | for (size_t rt = 0; rt < NUM_RT; ++rt) { |
| @@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { | |||
| 1681 | if (alloc_images.empty()) { | 1687 | if (alloc_images.empty()) { |
| 1682 | image_allocs_table.erase(alloc_it); | 1688 | image_allocs_table.erase(alloc_it); |
| 1683 | } | 1689 | } |
| 1684 | if constexpr (ENABLE_VALIDATION) { | 1690 | for (auto& this_state : channel_storage) { |
| 1685 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | 1691 | if constexpr (ENABLE_VALIDATION) { |
| 1686 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | 1692 | std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); |
| 1693 | std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); | ||
| 1694 | } | ||
| 1695 | this_state.graphics_image_table.Invalidate(); | ||
| 1696 | this_state.compute_image_table.Invalidate(); | ||
| 1687 | } | 1697 | } |
| 1688 | graphics_image_table.Invalidate(); | ||
| 1689 | compute_image_table.Invalidate(); | ||
| 1690 | has_deleted_images = true; | 1698 | has_deleted_images = true; |
| 1691 | } | 1699 | } |
| 1692 | 1700 | ||
| 1693 | template <class P> | 1701 | template <class P> |
| 1694 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { | 1702 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
| 1695 | auto it = image_views.begin(); | 1703 | auto it = state->image_views.begin(); |
| 1696 | while (it != image_views.end()) { | 1704 | while (it != state->image_views.end()) { |
| 1697 | const auto found = std::ranges::find(removed_views, it->second); | 1705 | const auto found = std::ranges::find(removed_views, it->second); |
| 1698 | if (found != removed_views.end()) { | 1706 | if (found != removed_views.end()) { |
| 1699 | it = image_views.erase(it); | 1707 | it = state->image_views.erase(it); |
| 1700 | } else { | 1708 | } else { |
| 1701 | ++it; | 1709 | ++it; |
| 1702 | } | 1710 | } |
| @@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) { | |||
| 1943 | const ImageViewBase& image_view = slot_image_views[id]; | 1951 | const ImageViewBase& image_view = slot_image_views[id]; |
| 1944 | const ImageBase& image = slot_images[image_view.image_id]; | 1952 | const ImageBase& image = slot_images[image_view.image_id]; |
| 1945 | const Extent3D size = image_view.size; | 1953 | const Extent3D size = image_view.size; |
| 1946 | const auto& regs = maxwell3d.regs; | 1954 | const auto& regs = maxwell3d->regs; |
| 1947 | const auto& scissor = regs.scissor_test[0]; | 1955 | const auto& scissor = regs.scissor_test[0]; |
| 1948 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | 1956 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { |
| 1949 | // Images with multiple resources can't be cleared in a single call | 1957 | // Images with multiple resources can't be cleared in a single call |
| @@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) { | |||
| 1958 | scissor.max_y >= size.height; | 1966 | scissor.max_y >= size.height; |
| 1959 | } | 1967 | } |
| 1960 | 1968 | ||
| 1969 | template <class P> | ||
| 1970 | TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept | ||
| 1971 | : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, | ||
| 1972 | gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, | ||
| 1973 | graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ | ||
| 1974 | gpu_memory} {} | ||
| 1975 | |||
| 1976 | template <class P> | ||
| 1977 | void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) { | ||
| 1978 | ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); | ||
| 1979 | auto new_id = [this, &channel]() { | ||
| 1980 | if (!free_channel_ids.empty()) { | ||
| 1981 | auto id = free_channel_ids.front(); | ||
| 1982 | free_channel_ids.pop_front(); | ||
| 1983 | new (&channel_storage[id]) ChannelInfo(channel); | ||
| 1984 | return id; | ||
| 1985 | } | ||
| 1986 | channel_storage.emplace_back(channel); | ||
| 1987 | return channel_storage.size() - 1; | ||
| 1988 | }(); | ||
| 1989 | channel_map.emplace(channel.bind_id, new_id); | ||
| 1990 | if (current_channel_id != UNSET_CHANNEL) { | ||
| 1991 | state = &channel_storage[current_channel_id]; | ||
| 1992 | } | ||
| 1993 | } | ||
| 1994 | |||
| 1995 | /// Bind a channel for execution. | ||
| 1996 | template <class P> | ||
| 1997 | void TextureCache<P>::BindToChannel(s32 id) { | ||
| 1998 | auto it = channel_map.find(id); | ||
| 1999 | ASSERT(it != channel_map.end() && id >= 0); | ||
| 2000 | current_channel_id = it->second; | ||
| 2001 | state = &channel_storage[current_channel_id]; | ||
| 2002 | maxwell3d = &state->maxwell3d; | ||
| 2003 | kepler_compute = &state->kepler_compute; | ||
| 2004 | gpu_memory = &state->gpu_memory; | ||
| 2005 | } | ||
| 2006 | |||
| 2007 | /// Erase channel's state. | ||
| 2008 | template <class P> | ||
| 2009 | void TextureCache<P>::EraseChannel(s32 id) { | ||
| 2010 | const auto it = channel_map.find(id); | ||
| 2011 | ASSERT(it != channel_map.end() && id >= 0); | ||
| 2012 | const auto this_id = it->second; | ||
| 2013 | free_channel_ids.push_back(this_id); | ||
| 2014 | channel_map.erase(it); | ||
| 2015 | if (this_id == current_channel_id) { | ||
| 2016 | current_channel_id = UNSET_CHANNEL; | ||
| 2017 | state = nullptr; | ||
| 2018 | maxwell3d = nullptr; | ||
| 2019 | kepler_compute = nullptr; | ||
| 2020 | gpu_memory = nullptr; | ||
| 2021 | } else if (current_channel_id != UNSET_CHANNEL) { | ||
| 2022 | state = &channel_storage[current_channel_id]; | ||
| 2023 | } | ||
| 2024 | } | ||
| 2025 | |||
| 1961 | } // namespace VideoCommon | 2026 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 7e6c6cef2..69efcb718 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <deque> | ||
| 7 | #include <limits> | ||
| 6 | #include <mutex> | 8 | #include <mutex> |
| 7 | #include <span> | 9 | #include <span> |
| 8 | #include <type_traits> | 10 | #include <type_traits> |
| @@ -26,6 +28,10 @@ | |||
| 26 | #include "video_core/texture_cache/types.h" | 28 | #include "video_core/texture_cache/types.h" |
| 27 | #include "video_core/textures/texture.h" | 29 | #include "video_core/textures/texture.h" |
| 28 | 30 | ||
| 31 | namespace Tegra::Control { | ||
| 32 | struct ChannelState; | ||
| 33 | } | ||
| 34 | |||
| 29 | namespace VideoCommon { | 35 | namespace VideoCommon { |
| 30 | 36 | ||
| 31 | using Tegra::Texture::SwizzleSource; | 37 | using Tegra::Texture::SwizzleSource; |
| @@ -58,6 +64,8 @@ class TextureCache { | |||
| 58 | /// True when the API can provide info about the memory of the device. | 64 | /// True when the API can provide info about the memory of the device. |
| 59 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 65 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 60 | 66 | ||
| 67 | static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; | ||
| 68 | |||
| 61 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | 69 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| 62 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; | 70 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; |
| 63 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; | 71 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; |
| @@ -85,8 +93,7 @@ class TextureCache { | |||
| 85 | }; | 93 | }; |
| 86 | 94 | ||
| 87 | public: | 95 | public: |
| 88 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, | 96 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); |
| 89 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); | ||
| 90 | 97 | ||
| 91 | /// Notify the cache that a new frame has been queued | 98 | /// Notify the cache that a new frame has been queued |
| 92 | void TickFrame(); | 99 | void TickFrame(); |
| @@ -171,6 +178,15 @@ public: | |||
| 171 | 178 | ||
| 172 | [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; | 179 | [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; |
| 173 | 180 | ||
| 181 | /// Create channel state. | ||
| 182 | void CreateChannel(struct Tegra::Control::ChannelState& channel); | ||
| 183 | |||
| 184 | /// Bind a channel for execution. | ||
| 185 | void BindToChannel(s32 id); | ||
| 186 | |||
| 187 | /// Erase channel's state. | ||
| 188 | void EraseChannel(s32 id); | ||
| 189 | |||
| 174 | std::mutex mutex; | 190 | std::mutex mutex; |
| 175 | 191 | ||
| 176 | private: | 192 | private: |
| @@ -338,31 +354,52 @@ private: | |||
| 338 | u64 GetScaledImageSizeBytes(ImageBase& image); | 354 | u64 GetScaledImageSizeBytes(ImageBase& image); |
| 339 | 355 | ||
| 340 | Runtime& runtime; | 356 | Runtime& runtime; |
| 341 | VideoCore::RasterizerInterface& rasterizer; | ||
| 342 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 343 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 344 | Tegra::MemoryManager& gpu_memory; | ||
| 345 | 357 | ||
| 346 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | 358 | struct ChannelInfo { |
| 347 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | 359 | ChannelInfo() = delete; |
| 348 | std::vector<SamplerId> graphics_sampler_ids; | 360 | ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept; |
| 349 | std::vector<ImageViewId> graphics_image_view_ids; | 361 | ChannelInfo(const ChannelInfo& state) = delete; |
| 362 | ChannelInfo& operator=(const ChannelInfo&) = delete; | ||
| 363 | ChannelInfo(ChannelInfo&& other) noexcept = default; | ||
| 364 | ChannelInfo& operator=(ChannelInfo&& other) noexcept = default; | ||
| 365 | |||
| 366 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 367 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 368 | Tegra::MemoryManager& gpu_memory; | ||
| 369 | |||
| 370 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | ||
| 371 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | ||
| 372 | std::vector<SamplerId> graphics_sampler_ids; | ||
| 373 | std::vector<ImageViewId> graphics_image_view_ids; | ||
| 350 | 374 | ||
| 351 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | 375 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; |
| 352 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | 376 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; |
| 353 | std::vector<SamplerId> compute_sampler_ids; | 377 | std::vector<SamplerId> compute_sampler_ids; |
| 354 | std::vector<ImageViewId> compute_image_view_ids; | 378 | std::vector<ImageViewId> compute_image_view_ids; |
| 379 | |||
| 380 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 381 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 382 | |||
| 383 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | ||
| 384 | }; | ||
| 385 | |||
| 386 | std::deque<ChannelInfo> channel_storage; | ||
| 387 | std::deque<size_t> free_channel_ids; | ||
| 388 | std::unordered_map<s32, size_t> channel_map; | ||
| 389 | |||
| 390 | ChannelInfo* state; | ||
| 391 | size_t current_channel_id{UNSET_CHANNEL}; | ||
| 392 | VideoCore::RasterizerInterface& rasterizer; | ||
| 393 | Tegra::Engines::Maxwell3D* maxwell3d; | ||
| 394 | Tegra::Engines::KeplerCompute* kepler_compute; | ||
| 395 | Tegra::MemoryManager* gpu_memory; | ||
| 355 | 396 | ||
| 356 | RenderTargets render_targets; | 397 | RenderTargets render_targets; |
| 357 | 398 | ||
| 358 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 359 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 360 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 399 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; |
| 361 | 400 | ||
| 362 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; | 401 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; |
| 363 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | ||
| 364 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; | 402 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; |
| 365 | |||
| 366 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 403 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; |
| 367 | 404 | ||
| 368 | VAddr virtual_invalid_space{}; | 405 | VAddr virtual_invalid_space{}; |