diff options
Diffstat (limited to 'src')
121 files changed, 2741 insertions, 1414 deletions
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp index 3c214ec00..2a1ae1bb3 100644 --- a/src/audio_core/device/device_session.cpp +++ b/src/audio_core/device/device_session.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "audio_core/sink/sink_stream.h" | 8 | #include "audio_core/sink/sink_stream.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| 11 | #include "core/guest_memory.h" | ||
| 11 | #include "core/memory.h" | 12 | #include "core/memory.h" |
| 12 | 13 | ||
| 13 | #include "core/hle/kernel/k_process.h" | 14 | #include "core/hle/kernel/k_process.h" |
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index 911dae3c1..905613a5a 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/fixed_point.h" | 9 | #include "common/fixed_point.h" |
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| 12 | #include "core/guest_memory.h" | ||
| 12 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 13 | 14 | ||
| 14 | namespace AudioCore::Renderer { | 15 | namespace AudioCore::Renderer { |
diff --git a/src/common/common_types.h b/src/common/common_types.h index 0fc225aff..ae04c4d60 100644 --- a/src/common/common_types.h +++ b/src/common/common_types.h | |||
| @@ -45,6 +45,7 @@ using f32 = float; ///< 32-bit floating point | |||
| 45 | using f64 = double; ///< 64-bit floating point | 45 | using f64 = double; ///< 64-bit floating point |
| 46 | 46 | ||
| 47 | using VAddr = u64; ///< Represents a pointer in the userspace virtual address space. | 47 | using VAddr = u64; ///< Represents a pointer in the userspace virtual address space. |
| 48 | using DAddr = u64; ///< Represents a pointer in the device specific virtual address space. | ||
| 48 | using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space. | 49 | using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space. |
| 49 | using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space. | 50 | using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space. |
| 50 | 51 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 16ddb5e90..4ff2c1bb7 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -37,6 +37,8 @@ add_library(core STATIC | |||
| 37 | debugger/gdbstub_arch.h | 37 | debugger/gdbstub_arch.h |
| 38 | debugger/gdbstub.cpp | 38 | debugger/gdbstub.cpp |
| 39 | debugger/gdbstub.h | 39 | debugger/gdbstub.h |
| 40 | device_memory_manager.h | ||
| 41 | device_memory_manager.inc | ||
| 40 | device_memory.cpp | 42 | device_memory.cpp |
| 41 | device_memory.h | 43 | device_memory.h |
| 42 | file_sys/fssystem/fs_i_storage.h | 44 | file_sys/fssystem/fs_i_storage.h |
| @@ -609,6 +611,8 @@ add_library(core STATIC | |||
| 609 | hle/service/ns/pdm_qry.h | 611 | hle/service/ns/pdm_qry.h |
| 610 | hle/service/nvdrv/core/container.cpp | 612 | hle/service/nvdrv/core/container.cpp |
| 611 | hle/service/nvdrv/core/container.h | 613 | hle/service/nvdrv/core/container.h |
| 614 | hle/service/nvdrv/core/heap_mapper.cpp | ||
| 615 | hle/service/nvdrv/core/heap_mapper.h | ||
| 612 | hle/service/nvdrv/core/nvmap.cpp | 616 | hle/service/nvdrv/core/nvmap.cpp |
| 613 | hle/service/nvdrv/core/nvmap.h | 617 | hle/service/nvdrv/core/nvmap.h |
| 614 | hle/service/nvdrv/core/syncpoint_manager.cpp | 618 | hle/service/nvdrv/core/syncpoint_manager.cpp |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 461eea9c8..2392fe136 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include "core/file_sys/savedata_factory.h" | 28 | #include "core/file_sys/savedata_factory.h" |
| 29 | #include "core/file_sys/vfs_concat.h" | 29 | #include "core/file_sys/vfs_concat.h" |
| 30 | #include "core/file_sys/vfs_real.h" | 30 | #include "core/file_sys/vfs_real.h" |
| 31 | #include "core/gpu_dirty_memory_manager.h" | ||
| 31 | #include "core/hle/kernel/k_memory_manager.h" | 32 | #include "core/hle/kernel/k_memory_manager.h" |
| 32 | #include "core/hle/kernel/k_process.h" | 33 | #include "core/hle/kernel/k_process.h" |
| 33 | #include "core/hle/kernel/k_resource_limit.h" | 34 | #include "core/hle/kernel/k_resource_limit.h" |
| @@ -565,6 +566,9 @@ struct System::Impl { | |||
| 565 | std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; | 566 | std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; |
| 566 | std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; | 567 | std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; |
| 567 | 568 | ||
| 569 | std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> | ||
| 570 | gpu_dirty_memory_managers; | ||
| 571 | |||
| 568 | std::deque<std::vector<u8>> user_channel; | 572 | std::deque<std::vector<u8>> user_channel; |
| 569 | }; | 573 | }; |
| 570 | 574 | ||
| @@ -651,8 +655,14 @@ size_t System::GetCurrentHostThreadID() const { | |||
| 651 | return impl->kernel.GetCurrentHostThreadID(); | 655 | return impl->kernel.GetCurrentHostThreadID(); |
| 652 | } | 656 | } |
| 653 | 657 | ||
| 654 | void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | 658 | std::span<GPUDirtyMemoryManager> System::GetGPUDirtyMemoryManager() { |
| 655 | return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); | 659 | return impl->gpu_dirty_memory_managers; |
| 660 | } | ||
| 661 | |||
| 662 | void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) { | ||
| 663 | for (auto& manager : impl->gpu_dirty_memory_managers) { | ||
| 664 | manager.Gather(callback); | ||
| 665 | } | ||
| 656 | } | 666 | } |
| 657 | 667 | ||
| 658 | PerfStatsResults System::GetAndResetPerfStats() { | 668 | PerfStatsResults System::GetAndResetPerfStats() { |
diff --git a/src/core/core.h b/src/core/core.h index ba5add0dc..80446f385 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <mutex> | 10 | #include <mutex> |
| 11 | #include <span> | ||
| 11 | #include <string> | 12 | #include <string> |
| 12 | #include <vector> | 13 | #include <vector> |
| 13 | 14 | ||
| @@ -116,6 +117,7 @@ class CpuManager; | |||
| 116 | class Debugger; | 117 | class Debugger; |
| 117 | class DeviceMemory; | 118 | class DeviceMemory; |
| 118 | class ExclusiveMonitor; | 119 | class ExclusiveMonitor; |
| 120 | class GPUDirtyMemoryManager; | ||
| 119 | class PerfStats; | 121 | class PerfStats; |
| 120 | class Reporter; | 122 | class Reporter; |
| 121 | class SpeedLimiter; | 123 | class SpeedLimiter; |
| @@ -224,7 +226,9 @@ public: | |||
| 224 | /// Prepare the core emulation for a reschedule | 226 | /// Prepare the core emulation for a reschedule |
| 225 | void PrepareReschedule(u32 core_index); | 227 | void PrepareReschedule(u32 core_index); |
| 226 | 228 | ||
| 227 | void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | 229 | std::span<GPUDirtyMemoryManager> GetGPUDirtyMemoryManager(); |
| 230 | |||
| 231 | void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback); | ||
| 228 | 232 | ||
| 229 | [[nodiscard]] size_t GetCurrentHostThreadID() const; | 233 | [[nodiscard]] size_t GetCurrentHostThreadID() const; |
| 230 | 234 | ||
diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 13388b73e..11bf0e326 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h | |||
| @@ -32,6 +32,12 @@ public: | |||
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | template <typename T> | 34 | template <typename T> |
| 35 | PAddr GetRawPhysicalAddr(const T* ptr) const { | ||
| 36 | return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - | ||
| 37 | reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())); | ||
| 38 | } | ||
| 39 | |||
| 40 | template <typename T> | ||
| 35 | T* GetPointer(Common::PhysicalAddress addr) { | 41 | T* GetPointer(Common::PhysicalAddress addr) { |
| 36 | return reinterpret_cast<T*>(buffer.BackingBasePointer() + | 42 | return reinterpret_cast<T*>(buffer.BackingBasePointer() + |
| 37 | (GetInteger(addr) - DramMemoryMap::Base)); | 43 | (GetInteger(addr) - DramMemoryMap::Base)); |
| @@ -43,6 +49,16 @@ public: | |||
| 43 | (GetInteger(addr) - DramMemoryMap::Base)); | 49 | (GetInteger(addr) - DramMemoryMap::Base)); |
| 44 | } | 50 | } |
| 45 | 51 | ||
| 52 | template <typename T> | ||
| 53 | T* GetPointerFromRaw(PAddr addr) { | ||
| 54 | return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr); | ||
| 55 | } | ||
| 56 | |||
| 57 | template <typename T> | ||
| 58 | const T* GetPointerFromRaw(PAddr addr) const { | ||
| 59 | return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr); | ||
| 60 | } | ||
| 61 | |||
| 46 | Common::HostMemory buffer; | 62 | Common::HostMemory buffer; |
| 47 | }; | 63 | }; |
| 48 | 64 | ||
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h new file mode 100644 index 000000000..ffeed46cc --- /dev/null +++ b/src/core/device_memory_manager.h | |||
| @@ -0,0 +1,211 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <atomic> | ||
| 8 | #include <deque> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "common/scratch_buffer.h" | ||
| 14 | #include "common/virtual_buffer.h" | ||
| 15 | |||
| 16 | namespace Core { | ||
| 17 | |||
| 18 | constexpr size_t DEVICE_PAGEBITS = 12ULL; | ||
| 19 | constexpr size_t DEVICE_PAGESIZE = 1ULL << DEVICE_PAGEBITS; | ||
| 20 | constexpr size_t DEVICE_PAGEMASK = DEVICE_PAGESIZE - 1ULL; | ||
| 21 | |||
| 22 | class DeviceMemory; | ||
| 23 | |||
| 24 | namespace Memory { | ||
| 25 | class Memory; | ||
| 26 | } | ||
| 27 | |||
| 28 | template <typename DTraits> | ||
| 29 | struct DeviceMemoryManagerAllocator; | ||
| 30 | |||
| 31 | struct Asid { | ||
| 32 | size_t id; | ||
| 33 | }; | ||
| 34 | |||
| 35 | template <typename Traits> | ||
| 36 | class DeviceMemoryManager { | ||
| 37 | using DeviceInterface = typename Traits::DeviceInterface; | ||
| 38 | using DeviceMethods = typename Traits::DeviceMethods; | ||
| 39 | |||
| 40 | public: | ||
| 41 | DeviceMemoryManager(const DeviceMemory& device_memory); | ||
| 42 | ~DeviceMemoryManager(); | ||
| 43 | |||
| 44 | void BindInterface(DeviceInterface* device_inter); | ||
| 45 | |||
| 46 | DAddr Allocate(size_t size); | ||
| 47 | void AllocateFixed(DAddr start, size_t size); | ||
| 48 | void Free(DAddr start, size_t size); | ||
| 49 | |||
| 50 | void Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, bool track = false); | ||
| 51 | |||
| 52 | void Unmap(DAddr address, size_t size); | ||
| 53 | |||
| 54 | void TrackContinuityImpl(DAddr address, VAddr virtual_address, size_t size, Asid asid); | ||
| 55 | void TrackContinuity(DAddr address, VAddr virtual_address, size_t size, Asid asid) { | ||
| 56 | std::scoped_lock lk(mapping_guard); | ||
| 57 | TrackContinuityImpl(address, virtual_address, size, asid); | ||
| 58 | } | ||
| 59 | |||
| 60 | // Write / Read | ||
| 61 | template <typename T> | ||
| 62 | T* GetPointer(DAddr address); | ||
| 63 | |||
| 64 | template <typename T> | ||
| 65 | const T* GetPointer(DAddr address) const; | ||
| 66 | |||
| 67 | template <typename Func> | ||
| 68 | void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) { | ||
| 69 | DAddr subbits = static_cast<DAddr>(address & page_mask); | ||
| 70 | const u32 base = compressed_device_addr[(address >> page_bits)]; | ||
| 71 | if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] { | ||
| 72 | const DAddr d_address = (static_cast<DAddr>(base) << page_bits) + subbits; | ||
| 73 | operation(d_address); | ||
| 74 | return; | ||
| 75 | } | ||
| 76 | InnerGatherDeviceAddresses(buffer, address); | ||
| 77 | for (u32 value : buffer) { | ||
| 78 | operation((static_cast<DAddr>(value) << page_bits) + subbits); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | template <typename Func> | ||
| 83 | void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) { | ||
| 84 | PAddr address = GetRawPhysicalAddr<u8>(p); | ||
| 85 | ApplyOpOnPAddr(address, buffer, operation); | ||
| 86 | } | ||
| 87 | |||
| 88 | PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { | ||
| 89 | PAddr subbits = static_cast<PAddr>(address & page_mask); | ||
| 90 | auto paddr = compressed_physical_ptr[(address >> page_bits)]; | ||
| 91 | if (paddr == 0) { | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits; | ||
| 95 | } | ||
| 96 | |||
| 97 | template <typename T> | ||
| 98 | void Write(DAddr address, T value); | ||
| 99 | |||
| 100 | template <typename T> | ||
| 101 | T Read(DAddr address) const; | ||
| 102 | |||
| 103 | u8* GetSpan(const DAddr src_addr, const std::size_t size); | ||
| 104 | const u8* GetSpan(const DAddr src_addr, const std::size_t size) const; | ||
| 105 | |||
| 106 | void ReadBlock(DAddr address, void* dest_pointer, size_t size); | ||
| 107 | void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); | ||
| 108 | void WriteBlock(DAddr address, const void* src_pointer, size_t size); | ||
| 109 | void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); | ||
| 110 | |||
| 111 | Asid RegisterProcess(Memory::Memory* memory); | ||
| 112 | void UnregisterProcess(Asid id); | ||
| 113 | |||
| 114 | void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | ||
| 115 | |||
| 116 | static constexpr size_t AS_BITS = Traits::device_virtual_bits; | ||
| 117 | |||
| 118 | private: | ||
| 119 | static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | ||
| 120 | static constexpr size_t device_as_size = 1ULL << device_virtual_bits; | ||
| 121 | static constexpr size_t physical_min_bits = 32; | ||
| 122 | static constexpr size_t physical_max_bits = 33; | ||
| 123 | static constexpr size_t page_bits = 12; | ||
| 124 | static constexpr size_t page_size = 1ULL << page_bits; | ||
| 125 | static constexpr size_t page_mask = page_size - 1ULL; | ||
| 126 | static constexpr u32 physical_address_base = 1U << page_bits; | ||
| 127 | static constexpr u32 MULTI_FLAG_BITS = 31; | ||
| 128 | static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS; | ||
| 129 | static constexpr u32 MULTI_MASK = ~MULTI_FLAG; | ||
| 130 | |||
| 131 | template <typename T> | ||
| 132 | T* GetPointerFromRaw(PAddr addr) { | ||
| 133 | return reinterpret_cast<T*>(physical_base + addr); | ||
| 134 | } | ||
| 135 | |||
| 136 | template <typename T> | ||
| 137 | const T* GetPointerFromRaw(PAddr addr) const { | ||
| 138 | return reinterpret_cast<T*>(physical_base + addr); | ||
| 139 | } | ||
| 140 | |||
| 141 | template <typename T> | ||
| 142 | PAddr GetRawPhysicalAddr(const T* ptr) const { | ||
| 143 | return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - physical_base); | ||
| 144 | } | ||
| 145 | |||
| 146 | void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, | ||
| 147 | auto increment); | ||
| 148 | |||
| 149 | void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address); | ||
| 150 | |||
| 151 | std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl; | ||
| 152 | |||
| 153 | const uintptr_t physical_base; | ||
| 154 | DeviceInterface* device_inter; | ||
| 155 | Common::VirtualBuffer<u32> compressed_physical_ptr; | ||
| 156 | Common::VirtualBuffer<u32> compressed_device_addr; | ||
| 157 | Common::VirtualBuffer<u32> continuity_tracker; | ||
| 158 | |||
| 159 | // Process memory interfaces | ||
| 160 | |||
| 161 | std::deque<size_t> id_pool; | ||
| 162 | std::deque<Memory::Memory*> registered_processes; | ||
| 163 | |||
| 164 | // Memory protection management | ||
| 165 | |||
| 166 | static constexpr size_t guest_max_as_bits = 39; | ||
| 167 | static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits; | ||
| 168 | static constexpr size_t guest_mask = guest_as_size - 1ULL; | ||
| 169 | static constexpr size_t asid_start_bit = guest_max_as_bits; | ||
| 170 | |||
| 171 | std::pair<Asid, VAddr> ExtractCPUBacking(size_t page_index) { | ||
| 172 | auto content = cpu_backing_address[page_index]; | ||
| 173 | const VAddr address = content & guest_mask; | ||
| 174 | const Asid asid{static_cast<size_t>(content >> asid_start_bit)}; | ||
| 175 | return std::make_pair(asid, address); | ||
| 176 | } | ||
| 177 | |||
| 178 | void InsertCPUBacking(size_t page_index, VAddr address, Asid asid) { | ||
| 179 | cpu_backing_address[page_index] = address | (asid.id << asid_start_bit); | ||
| 180 | } | ||
| 181 | |||
| 182 | Common::VirtualBuffer<VAddr> cpu_backing_address; | ||
| 183 | static constexpr size_t subentries = 8 / sizeof(u8); | ||
| 184 | static constexpr size_t subentries_mask = subentries - 1; | ||
| 185 | class CounterEntry final { | ||
| 186 | public: | ||
| 187 | CounterEntry() = default; | ||
| 188 | |||
| 189 | std::atomic_uint8_t& Count(std::size_t page) { | ||
| 190 | return values[page & subentries_mask]; | ||
| 191 | } | ||
| 192 | |||
| 193 | const std::atomic_uint8_t& Count(std::size_t page) const { | ||
| 194 | return values[page & subentries_mask]; | ||
| 195 | } | ||
| 196 | |||
| 197 | private: | ||
| 198 | std::array<std::atomic_uint8_t, subentries> values{}; | ||
| 199 | }; | ||
| 200 | static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), | ||
| 201 | "CounterEntry should be 8 bytes!"); | ||
| 202 | |||
| 203 | static constexpr size_t num_counter_entries = | ||
| 204 | (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||
| 205 | using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||
| 206 | std::unique_ptr<CachedPages> cached_pages; | ||
| 207 | std::mutex counter_guard; | ||
| 208 | std::mutex mapping_guard; | ||
| 209 | }; | ||
| 210 | |||
| 211 | } // namespace Core | ||
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc new file mode 100644 index 000000000..8ce122872 --- /dev/null +++ b/src/core/device_memory_manager.inc | |||
| @@ -0,0 +1,582 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <atomic> | ||
| 5 | #include <limits> | ||
| 6 | #include <memory> | ||
| 7 | #include <type_traits> | ||
| 8 | |||
| 9 | #include "common/address_space.h" | ||
| 10 | #include "common/address_space.inc" | ||
| 11 | #include "common/alignment.h" | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 14 | #include "common/scope_exit.h" | ||
| 15 | #include "common/settings.h" | ||
| 16 | #include "core/device_memory.h" | ||
| 17 | #include "core/device_memory_manager.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | |||
| 20 | namespace Core { | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | |||
| 24 | class MultiAddressContainer { | ||
| 25 | public: | ||
| 26 | MultiAddressContainer() = default; | ||
| 27 | ~MultiAddressContainer() = default; | ||
| 28 | |||
| 29 | void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) { | ||
| 30 | buffer.resize(8); | ||
| 31 | buffer.resize(0); | ||
| 32 | size_t index = 0; | ||
| 33 | const auto add_value = [&](u32 value) { | ||
| 34 | buffer[index] = value; | ||
| 35 | index++; | ||
| 36 | buffer.resize(index); | ||
| 37 | }; | ||
| 38 | |||
| 39 | u32 iter_entry = start_entry; | ||
| 40 | Entry* current = &storage[iter_entry - 1]; | ||
| 41 | add_value(current->value); | ||
| 42 | while (current->next_entry != 0) { | ||
| 43 | iter_entry = current->next_entry; | ||
| 44 | current = &storage[iter_entry - 1]; | ||
| 45 | add_value(current->value); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | u32 Register(u32 value) { | ||
| 50 | return RegisterImplementation(value); | ||
| 51 | } | ||
| 52 | |||
| 53 | void Register(u32 value, u32 start_entry) { | ||
| 54 | auto entry_id = RegisterImplementation(value); | ||
| 55 | u32 iter_entry = start_entry; | ||
| 56 | Entry* current = &storage[iter_entry - 1]; | ||
| 57 | while (current->next_entry != 0) { | ||
| 58 | iter_entry = current->next_entry; | ||
| 59 | current = &storage[iter_entry - 1]; | ||
| 60 | } | ||
| 61 | current->next_entry = entry_id; | ||
| 62 | } | ||
| 63 | |||
| 64 | std::pair<bool, u32> Unregister(u32 value, u32 start_entry) { | ||
| 65 | u32 iter_entry = start_entry; | ||
| 66 | Entry* previous{}; | ||
| 67 | Entry* current = &storage[iter_entry - 1]; | ||
| 68 | Entry* next{}; | ||
| 69 | bool more_than_one_remaining = false; | ||
| 70 | u32 result_start{start_entry}; | ||
| 71 | size_t count = 0; | ||
| 72 | while (current->value != value) { | ||
| 73 | count++; | ||
| 74 | previous = current; | ||
| 75 | iter_entry = current->next_entry; | ||
| 76 | current = &storage[iter_entry - 1]; | ||
| 77 | } | ||
| 78 | // Find next | ||
| 79 | u32 next_entry = current->next_entry; | ||
| 80 | if (next_entry != 0) { | ||
| 81 | next = &storage[next_entry - 1]; | ||
| 82 | more_than_one_remaining = next->next_entry != 0 || previous != nullptr; | ||
| 83 | } | ||
| 84 | if (previous) { | ||
| 85 | previous->next_entry = next_entry; | ||
| 86 | } else { | ||
| 87 | result_start = next_entry; | ||
| 88 | } | ||
| 89 | free_entries.emplace_back(iter_entry); | ||
| 90 | return std::make_pair(more_than_one_remaining || count > 1, result_start); | ||
| 91 | } | ||
| 92 | |||
| 93 | u32 ReleaseEntry(u32 start_entry) { | ||
| 94 | Entry* current = &storage[start_entry - 1]; | ||
| 95 | free_entries.emplace_back(start_entry); | ||
| 96 | return current->value; | ||
| 97 | } | ||
| 98 | |||
| 99 | private: | ||
| 100 | u32 RegisterImplementation(u32 value) { | ||
| 101 | auto entry_id = GetNewEntry(); | ||
| 102 | auto& entry = storage[entry_id - 1]; | ||
| 103 | entry.next_entry = 0; | ||
| 104 | entry.value = value; | ||
| 105 | return entry_id; | ||
| 106 | } | ||
| 107 | u32 GetNewEntry() { | ||
| 108 | if (!free_entries.empty()) { | ||
| 109 | u32 result = free_entries.front(); | ||
| 110 | free_entries.pop_front(); | ||
| 111 | return result; | ||
| 112 | } | ||
| 113 | storage.emplace_back(); | ||
| 114 | u32 new_entry = static_cast<u32>(storage.size()); | ||
| 115 | return new_entry; | ||
| 116 | } | ||
| 117 | |||
| 118 | struct Entry { | ||
| 119 | u32 next_entry{}; | ||
| 120 | u32 value{}; | ||
| 121 | }; | ||
| 122 | |||
| 123 | std::deque<Entry> storage; | ||
| 124 | std::deque<u32> free_entries; | ||
| 125 | }; | ||
| 126 | |||
| 127 | struct EmptyAllocator { | ||
| 128 | EmptyAllocator([[maybe_unused]] DAddr address) {} | ||
| 129 | }; | ||
| 130 | |||
| 131 | } // namespace | ||
| 132 | |||
| 133 | template <typename DTraits> | ||
| 134 | struct DeviceMemoryManagerAllocator { | ||
| 135 | static constexpr size_t device_virtual_bits = DTraits::device_virtual_bits; | ||
| 136 | static constexpr DAddr first_address = 1ULL << Memory::YUZU_PAGEBITS; | ||
| 137 | static constexpr DAddr max_device_area = 1ULL << device_virtual_bits; | ||
| 138 | |||
| 139 | DeviceMemoryManagerAllocator() : main_allocator(first_address) {} | ||
| 140 | |||
| 141 | Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; | ||
| 142 | MultiAddressContainer multi_dev_address; | ||
| 143 | |||
| 144 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer | ||
| 145 | template <bool pin_area> | ||
| 146 | [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { | ||
| 147 | return addr >= 0 && addr + size <= max_device_area; | ||
| 148 | } | ||
| 149 | |||
| 150 | DAddr Allocate(size_t size) { | ||
| 151 | return main_allocator.Allocate(size); | ||
| 152 | } | ||
| 153 | |||
| 154 | void AllocateFixed(DAddr b_address, size_t b_size) { | ||
| 155 | main_allocator.AllocateFixed(b_address, b_size); | ||
| 156 | } | ||
| 157 | |||
| 158 | void Free(DAddr b_address, size_t b_size) { | ||
| 159 | main_allocator.Free(b_address, b_size); | ||
| 160 | } | ||
| 161 | }; | ||
| 162 | |||
| 163 | template <typename Traits> | ||
| 164 | DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) | ||
| 165 | : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, | ||
| 166 | device_inter{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | ||
| 167 | compressed_device_addr(1ULL << ((Settings::values.memory_layout_mode.GetValue() == | ||
| 168 | Settings::MemoryLayout::Memory_4Gb | ||
| 169 | ? physical_min_bits | ||
| 170 | : physical_max_bits) - | ||
| 171 | Memory::YUZU_PAGEBITS)), | ||
| 172 | continuity_tracker(device_as_size >> Memory::YUZU_PAGEBITS), | ||
| 173 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | ||
| 174 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | ||
| 175 | cached_pages = std::make_unique<CachedPages>(); | ||
| 176 | |||
| 177 | const size_t total_virtual = device_as_size >> Memory::YUZU_PAGEBITS; | ||
| 178 | for (size_t i = 0; i < total_virtual; i++) { | ||
| 179 | compressed_physical_ptr[i] = 0; | ||
| 180 | continuity_tracker[i] = 1; | ||
| 181 | cpu_backing_address[i] = 0; | ||
| 182 | } | ||
| 183 | const size_t total_phys = 1ULL << ((Settings::values.memory_layout_mode.GetValue() == | ||
| 184 | Settings::MemoryLayout::Memory_4Gb | ||
| 185 | ? physical_min_bits | ||
| 186 | : physical_max_bits) - | ||
| 187 | Memory::YUZU_PAGEBITS); | ||
| 188 | for (size_t i = 0; i < total_phys; i++) { | ||
| 189 | compressed_device_addr[i] = 0; | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | template <typename Traits> | ||
| 194 | DeviceMemoryManager<Traits>::~DeviceMemoryManager() = default; | ||
| 195 | |||
| 196 | template <typename Traits> | ||
| 197 | void DeviceMemoryManager<Traits>::BindInterface(DeviceInterface* device_inter_) { | ||
| 198 | device_inter = device_inter_; | ||
| 199 | } | ||
| 200 | |||
| 201 | template <typename Traits> | ||
| 202 | DAddr DeviceMemoryManager<Traits>::Allocate(size_t size) { | ||
| 203 | return impl->Allocate(size); | ||
| 204 | } | ||
| 205 | |||
| 206 | template <typename Traits> | ||
| 207 | void DeviceMemoryManager<Traits>::AllocateFixed(DAddr start, size_t size) { | ||
| 208 | return impl->AllocateFixed(start, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | template <typename Traits> | ||
| 212 | void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { | ||
| 213 | impl->Free(start, size); | ||
| 214 | } | ||
| 215 | |||
| 216 | template <typename Traits> | ||
| 217 | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | ||
| 218 | Asid asid, bool track) { | ||
| 219 | Core::Memory::Memory* process_memory = registered_processes[asid.id]; | ||
| 220 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||
| 221 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||
| 222 | std::scoped_lock lk(mapping_guard); | ||
| 223 | for (size_t i = 0; i < num_pages; i++) { | ||
| 224 | const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; | ||
| 225 | auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); | ||
| 226 | if (ptr == nullptr) [[unlikely]] { | ||
| 227 | compressed_physical_ptr[start_page_d + i] = 0; | ||
| 228 | continue; | ||
| 229 | } | ||
| 230 | auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; | ||
| 231 | compressed_physical_ptr[start_page_d + i] = phys_addr; | ||
| 232 | InsertCPUBacking(start_page_d + i, new_vaddress, asid); | ||
| 233 | const u32 base_dev = compressed_device_addr[phys_addr - 1U]; | ||
| 234 | const u32 new_dev = static_cast<u32>(start_page_d + i); | ||
| 235 | if (base_dev == 0) [[likely]] { | ||
| 236 | compressed_device_addr[phys_addr - 1U] = new_dev; | ||
| 237 | continue; | ||
| 238 | } | ||
| 239 | u32 start_id = base_dev & MULTI_MASK; | ||
| 240 | if ((base_dev >> MULTI_FLAG_BITS) == 0) { | ||
| 241 | start_id = impl->multi_dev_address.Register(base_dev); | ||
| 242 | compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id; | ||
| 243 | } | ||
| 244 | impl->multi_dev_address.Register(new_dev, start_id); | ||
| 245 | } | ||
| 246 | if (track) { | ||
| 247 | TrackContinuityImpl(address, virtual_address, size, asid); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | template <typename Traits> | ||
| 252 | void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | ||
| 253 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||
| 254 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||
| 255 | device_inter->InvalidateRegion(address, size); | ||
| 256 | std::scoped_lock lk(mapping_guard); | ||
| 257 | for (size_t i = 0; i < num_pages; i++) { | ||
| 258 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; | ||
| 259 | compressed_physical_ptr[start_page_d + i] = 0; | ||
| 260 | cpu_backing_address[start_page_d + i] = 0; | ||
| 261 | if (phys_addr != 0) [[likely]] { | ||
| 262 | const u32 base_dev = compressed_device_addr[phys_addr - 1U]; | ||
| 263 | if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] { | ||
| 264 | compressed_device_addr[phys_addr - 1] = 0; | ||
| 265 | continue; | ||
| 266 | } | ||
| 267 | const auto [more_entries, new_start] = impl->multi_dev_address.Unregister( | ||
| 268 | static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK); | ||
| 269 | if (!more_entries) { | ||
| 270 | compressed_device_addr[phys_addr - 1] = | ||
| 271 | impl->multi_dev_address.ReleaseEntry(new_start); | ||
| 272 | continue; | ||
| 273 | } | ||
| 274 | compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG; | ||
| 275 | } | ||
| 276 | } | ||
| 277 | } | ||
| 278 | template <typename Traits> | ||
| 279 | void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address, | ||
| 280 | size_t size, Asid asid) { | ||
| 281 | Core::Memory::Memory* process_memory = registered_processes[asid.id]; | ||
| 282 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||
| 283 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||
| 284 | uintptr_t last_ptr = 0; | ||
| 285 | size_t page_count = 1; | ||
| 286 | for (size_t i = num_pages; i > 0; i--) { | ||
| 287 | size_t index = i - 1; | ||
| 288 | const VAddr new_vaddress = virtual_address + index * Memory::YUZU_PAGESIZE; | ||
| 289 | const uintptr_t new_ptr = reinterpret_cast<uintptr_t>( | ||
| 290 | process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress))); | ||
| 291 | if (new_ptr + page_size == last_ptr) { | ||
| 292 | page_count++; | ||
| 293 | } else { | ||
| 294 | page_count = 1; | ||
| 295 | } | ||
| 296 | last_ptr = new_ptr; | ||
| 297 | continuity_tracker[start_page_d + index] = static_cast<u32>(page_count); | ||
| 298 | } | ||
| 299 | } | ||
| 300 | template <typename Traits> | ||
| 301 | u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) { | ||
| 302 | size_t page_index = src_addr >> page_bits; | ||
| 303 | size_t subbits = src_addr & page_mask; | ||
| 304 | if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) { | ||
| 305 | return GetPointer<u8>(src_addr); | ||
| 306 | } | ||
| 307 | return nullptr; | ||
| 308 | } | ||
| 309 | |||
| 310 | template <typename Traits> | ||
| 311 | const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) const { | ||
| 312 | size_t page_index = src_addr >> page_bits; | ||
| 313 | size_t subbits = src_addr & page_mask; | ||
| 314 | if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) { | ||
| 315 | return GetPointer<u8>(src_addr); | ||
| 316 | } | ||
| 317 | return nullptr; | ||
| 318 | } | ||
| 319 | |||
| 320 | template <typename Traits> | ||
| 321 | void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, | ||
| 322 | PAddr address) { | ||
| 323 | size_t phys_addr = address >> page_bits; | ||
| 324 | std::scoped_lock lk(mapping_guard); | ||
| 325 | u32 backing = compressed_device_addr[phys_addr]; | ||
| 326 | if ((backing >> MULTI_FLAG_BITS) != 0) { | ||
| 327 | impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer); | ||
| 328 | return; | ||
| 329 | } | ||
| 330 | buffer.resize(1); | ||
| 331 | buffer[0] = backing; | ||
| 332 | } | ||
| 333 | |||
| 334 | template <typename Traits> | ||
| 335 | template <typename T> | ||
| 336 | T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) { | ||
| 337 | const size_t index = address >> Memory::YUZU_PAGEBITS; | ||
| 338 | const size_t offset = address & Memory::YUZU_PAGEMASK; | ||
| 339 | auto phys_addr = compressed_physical_ptr[index]; | ||
| 340 | if (phys_addr == 0) [[unlikely]] { | ||
| 341 | return nullptr; | ||
| 342 | } | ||
| 343 | return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + | ||
| 344 | offset); | ||
| 345 | } | ||
| 346 | |||
| 347 | template <typename Traits> | ||
| 348 | template <typename T> | ||
| 349 | const T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) const { | ||
| 350 | const size_t index = address >> Memory::YUZU_PAGEBITS; | ||
| 351 | const size_t offset = address & Memory::YUZU_PAGEMASK; | ||
| 352 | auto phys_addr = compressed_physical_ptr[index]; | ||
| 353 | if (phys_addr == 0) [[unlikely]] { | ||
| 354 | return nullptr; | ||
| 355 | } | ||
| 356 | return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + | ||
| 357 | offset); | ||
| 358 | } | ||
| 359 | |||
| 360 | template <typename Traits> | ||
| 361 | template <typename T> | ||
| 362 | void DeviceMemoryManager<Traits>::Write(DAddr address, T value) { | ||
| 363 | T* ptr = GetPointer<T>(address); | ||
| 364 | if (!ptr) [[unlikely]] { | ||
| 365 | return; | ||
| 366 | } | ||
| 367 | std::memcpy(ptr, &value, sizeof(T)); | ||
| 368 | } | ||
| 369 | |||
| 370 | template <typename Traits> | ||
| 371 | template <typename T> | ||
| 372 | T DeviceMemoryManager<Traits>::Read(DAddr address) const { | ||
| 373 | const T* ptr = GetPointer<T>(address); | ||
| 374 | T result{}; | ||
| 375 | if (!ptr) [[unlikely]] { | ||
| 376 | return result; | ||
| 377 | } | ||
| 378 | std::memcpy(&result, ptr, sizeof(T)); | ||
| 379 | return result; | ||
| 380 | } | ||
| 381 | |||
| 382 | template <typename Traits> | ||
| 383 | void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto on_unmapped, | ||
| 384 | auto on_memory, auto increment) { | ||
| 385 | std::size_t remaining_size = size; | ||
| 386 | std::size_t page_index = addr >> Memory::YUZU_PAGEBITS; | ||
| 387 | std::size_t page_offset = addr & Memory::YUZU_PAGEMASK; | ||
| 388 | |||
| 389 | while (remaining_size) { | ||
| 390 | const size_t next_pages = static_cast<std::size_t>(continuity_tracker[page_index]); | ||
| 391 | const std::size_t copy_amount = | ||
| 392 | std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size); | ||
| 393 | const auto current_vaddr = | ||
| 394 | static_cast<u64>((page_index << Memory::YUZU_PAGEBITS) + page_offset); | ||
| 395 | SCOPE_EXIT({ | ||
| 396 | page_index += next_pages; | ||
| 397 | page_offset = 0; | ||
| 398 | increment(copy_amount); | ||
| 399 | remaining_size -= copy_amount; | ||
| 400 | }); | ||
| 401 | |||
| 402 | auto phys_addr = compressed_physical_ptr[page_index]; | ||
| 403 | if (phys_addr == 0) { | ||
| 404 | on_unmapped(copy_amount, current_vaddr); | ||
| 405 | continue; | ||
| 406 | } | ||
| 407 | auto* mem_ptr = GetPointerFromRaw<u8>( | ||
| 408 | (static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + page_offset); | ||
| 409 | on_memory(copy_amount, mem_ptr); | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | template <typename Traits> | ||
| 414 | void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { | ||
| 415 | device_inter->FlushRegion(address, size); | ||
| 416 | WalkBlock( | ||
| 417 | address, size, | ||
| 418 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 419 | LOG_ERROR( | ||
| 420 | HW_Memory, | ||
| 421 | "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 422 | current_vaddr, address, size); | ||
| 423 | std::memset(dest_pointer, 0, copy_amount); | ||
| 424 | }, | ||
| 425 | [&](size_t copy_amount, const u8* const src_ptr) { | ||
| 426 | std::memcpy(dest_pointer, src_ptr, copy_amount); | ||
| 427 | }, | ||
| 428 | [&](const std::size_t copy_amount) { | ||
| 429 | dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; | ||
| 430 | }); | ||
| 431 | } | ||
| 432 | |||
| 433 | template <typename Traits> | ||
| 434 | void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) { | ||
| 435 | WalkBlock( | ||
| 436 | address, size, | ||
| 437 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 438 | LOG_ERROR( | ||
| 439 | HW_Memory, | ||
| 440 | "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 441 | current_vaddr, address, size); | ||
| 442 | }, | ||
| 443 | [&](size_t copy_amount, u8* const dst_ptr) { | ||
| 444 | std::memcpy(dst_ptr, src_pointer, copy_amount); | ||
| 445 | }, | ||
| 446 | [&](const std::size_t copy_amount) { | ||
| 447 | src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||
| 448 | }); | ||
| 449 | device_inter->InvalidateRegion(address, size); | ||
| 450 | } | ||
| 451 | |||
| 452 | template <typename Traits> | ||
| 453 | void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { | ||
| 454 | WalkBlock( | ||
| 455 | address, size, | ||
| 456 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 457 | LOG_ERROR( | ||
| 458 | HW_Memory, | ||
| 459 | "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 460 | current_vaddr, address, size); | ||
| 461 | std::memset(dest_pointer, 0, copy_amount); | ||
| 462 | }, | ||
| 463 | [&](size_t copy_amount, const u8* const src_ptr) { | ||
| 464 | std::memcpy(dest_pointer, src_ptr, copy_amount); | ||
| 465 | }, | ||
| 466 | [&](const std::size_t copy_amount) { | ||
| 467 | dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; | ||
| 468 | }); | ||
| 469 | } | ||
| 470 | |||
| 471 | template <typename Traits> | ||
| 472 | void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer, | ||
| 473 | size_t size) { | ||
| 474 | WalkBlock( | ||
| 475 | address, size, | ||
| 476 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 477 | LOG_ERROR( | ||
| 478 | HW_Memory, | ||
| 479 | "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 480 | current_vaddr, address, size); | ||
| 481 | }, | ||
| 482 | [&](size_t copy_amount, u8* const dst_ptr) { | ||
| 483 | std::memcpy(dst_ptr, src_pointer, copy_amount); | ||
| 484 | }, | ||
| 485 | [&](const std::size_t copy_amount) { | ||
| 486 | src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||
| 487 | }); | ||
| 488 | } | ||
| 489 | |||
| 490 | template <typename Traits> | ||
| 491 | Asid DeviceMemoryManager<Traits>::RegisterProcess(Memory::Memory* memory_device_inter) { | ||
| 492 | size_t new_id{}; | ||
| 493 | if (!id_pool.empty()) { | ||
| 494 | new_id = id_pool.front(); | ||
| 495 | id_pool.pop_front(); | ||
| 496 | registered_processes[new_id] = memory_device_inter; | ||
| 497 | } else { | ||
| 498 | registered_processes.emplace_back(memory_device_inter); | ||
| 499 | new_id = registered_processes.size() - 1U; | ||
| 500 | } | ||
| 501 | return Asid{new_id}; | ||
| 502 | } | ||
| 503 | |||
| 504 | template <typename Traits> | ||
| 505 | void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) { | ||
| 506 | registered_processes[asid.id] = nullptr; | ||
| 507 | id_pool.push_front(asid.id); | ||
| 508 | } | ||
| 509 | |||
| 510 | template <typename Traits> | ||
| 511 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||
| 512 | std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock); | ||
| 513 | const auto Lock = [&] { | ||
| 514 | if (!lk) { | ||
| 515 | lk.lock(); | ||
| 516 | } | ||
| 517 | }; | ||
| 518 | u64 uncache_begin = 0; | ||
| 519 | u64 cache_begin = 0; | ||
| 520 | u64 uncache_bytes = 0; | ||
| 521 | u64 cache_bytes = 0; | ||
| 522 | const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; | ||
| 523 | |||
| 524 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 525 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | ||
| 526 | size_t page = addr >> Memory::YUZU_PAGEBITS; | ||
| 527 | auto [asid, base_vaddress] = ExtractCPUBacking(page); | ||
| 528 | size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; | ||
| 529 | auto* memory_device_inter = registered_processes[asid.id]; | ||
| 530 | for (; page != page_end; ++page) { | ||
| 531 | std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); | ||
| 532 | |||
| 533 | if (delta > 0) { | ||
| 534 | ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(), | ||
| 535 | "Count may overflow!"); | ||
| 536 | } else if (delta < 0) { | ||
| 537 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||
| 538 | } else { | ||
| 539 | ASSERT_MSG(false, "Delta must be non-zero!"); | ||
| 540 | } | ||
| 541 | |||
| 542 | // Adds or subtracts 1, as count is a unsigned 8-bit value | ||
| 543 | count.fetch_add(static_cast<u8>(delta), std::memory_order_release); | ||
| 544 | |||
| 545 | // Assume delta is either -1 or 1 | ||
| 546 | if (count.load(std::memory_order::relaxed) == 0) { | ||
| 547 | if (uncache_bytes == 0) { | ||
| 548 | uncache_begin = vpage; | ||
| 549 | } | ||
| 550 | uncache_bytes += Memory::YUZU_PAGESIZE; | ||
| 551 | } else if (uncache_bytes > 0) { | ||
| 552 | Lock(); | ||
| 553 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, | ||
| 554 | uncache_bytes, false); | ||
| 555 | uncache_bytes = 0; | ||
| 556 | } | ||
| 557 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||
| 558 | if (cache_bytes == 0) { | ||
| 559 | cache_begin = vpage; | ||
| 560 | } | ||
| 561 | cache_bytes += Memory::YUZU_PAGESIZE; | ||
| 562 | } else if (cache_bytes > 0) { | ||
| 563 | Lock(); | ||
| 564 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||
| 565 | true); | ||
| 566 | cache_bytes = 0; | ||
| 567 | } | ||
| 568 | vpage++; | ||
| 569 | } | ||
| 570 | if (uncache_bytes > 0) { | ||
| 571 | Lock(); | ||
| 572 | MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||
| 573 | false); | ||
| 574 | } | ||
| 575 | if (cache_bytes > 0) { | ||
| 576 | Lock(); | ||
| 577 | MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||
| 578 | true); | ||
| 579 | } | ||
| 580 | } | ||
| 581 | |||
| 582 | } // namespace Core | ||
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9687531e8..cc8fc176f 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | #include <utility> | 10 | #include <utility> |
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | 12 | ||
| 13 | #include "core/memory.h" | 13 | #include "core/device_memory_manager.h" |
| 14 | 14 | ||
| 15 | namespace Core { | 15 | namespace Core { |
| 16 | 16 | ||
| @@ -23,7 +23,7 @@ public: | |||
| 23 | 23 | ||
| 24 | ~GPUDirtyMemoryManager() = default; | 24 | ~GPUDirtyMemoryManager() = default; |
| 25 | 25 | ||
| 26 | void Collect(VAddr address, size_t size) { | 26 | void Collect(PAddr address, size_t size) { |
| 27 | TransformAddress t = BuildTransform(address, size); | 27 | TransformAddress t = BuildTransform(address, size); |
| 28 | TransformAddress tmp, original; | 28 | TransformAddress tmp, original; |
| 29 | do { | 29 | do { |
| @@ -47,7 +47,7 @@ public: | |||
| 47 | std::memory_order_relaxed)); | 47 | std::memory_order_relaxed)); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | void Gather(std::function<void(VAddr, size_t)>& callback) { | 50 | void Gather(std::function<void(PAddr, size_t)>& callback) { |
| 51 | { | 51 | { |
| 52 | std::scoped_lock lk(guard); | 52 | std::scoped_lock lk(guard); |
| 53 | TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | 53 | TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); |
| @@ -65,7 +65,7 @@ public: | |||
| 65 | mask = mask >> empty_bits; | 65 | mask = mask >> empty_bits; |
| 66 | 66 | ||
| 67 | const size_t continuous_bits = std::countr_one(mask); | 67 | const size_t continuous_bits = std::countr_one(mask); |
| 68 | callback((static_cast<VAddr>(transform.address) << page_bits) + offset, | 68 | callback((static_cast<PAddr>(transform.address) << page_bits) + offset, |
| 69 | continuous_bits << align_bits); | 69 | continuous_bits << align_bits); |
| 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
| 71 | offset += continuous_bits << align_bits; | 71 | offset += continuous_bits << align_bits; |
| @@ -80,7 +80,7 @@ private: | |||
| 80 | u32 mask; | 80 | u32 mask; |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; | 83 | constexpr static size_t page_bits = DEVICE_PAGEBITS - 1; |
| 84 | constexpr static size_t page_size = 1ULL << page_bits; | 84 | constexpr static size_t page_size = 1ULL << page_bits; |
| 85 | constexpr static size_t page_mask = page_size - 1; | 85 | constexpr static size_t page_mask = page_size - 1; |
| 86 | 86 | ||
| @@ -89,7 +89,7 @@ private: | |||
| 89 | constexpr static size_t align_mask = align_size - 1; | 89 | constexpr static size_t align_mask = align_size - 1; |
| 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; |
| 91 | 91 | ||
| 92 | bool IsValid(VAddr address) { | 92 | bool IsValid(PAddr address) { |
| 93 | return address < (1ULL << 39); | 93 | return address < (1ULL << 39); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| @@ -103,7 +103,7 @@ private: | |||
| 103 | return mask; | 103 | return mask; |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | TransformAddress BuildTransform(VAddr address, size_t size) { | 106 | TransformAddress BuildTransform(PAddr address, size_t size) { |
| 107 | const size_t minor_address = address & page_mask; | 107 | const size_t minor_address = address & page_mask; |
| 108 | const size_t minor_bit = minor_address >> align_bits; | 108 | const size_t minor_bit = minor_address >> align_bits; |
| 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h new file mode 100644 index 000000000..7ee18c126 --- /dev/null +++ b/src/core/guest_memory.h | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <iterator> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/scratch_buffer.h" | ||
| 14 | |||
| 15 | namespace Core::Memory { | ||
| 16 | |||
| 17 | enum GuestMemoryFlags : u32 { | ||
| 18 | Read = 1 << 0, | ||
| 19 | Write = 1 << 1, | ||
| 20 | Safe = 1 << 2, | ||
| 21 | Cached = 1 << 3, | ||
| 22 | |||
| 23 | SafeRead = Read | Safe, | ||
| 24 | SafeWrite = Write | Safe, | ||
| 25 | SafeReadWrite = SafeRead | SafeWrite, | ||
| 26 | SafeReadCachedWrite = SafeReadWrite | Cached, | ||
| 27 | |||
| 28 | UnsafeRead = Read, | ||
| 29 | UnsafeWrite = Write, | ||
| 30 | UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||
| 31 | UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||
| 32 | }; | ||
| 33 | |||
| 34 | namespace { | ||
| 35 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 36 | class GuestMemory { | ||
| 37 | using iterator = T*; | ||
| 38 | using const_iterator = const T*; | ||
| 39 | using value_type = T; | ||
| 40 | using element_type = T; | ||
| 41 | using iterator_category = std::contiguous_iterator_tag; | ||
| 42 | |||
| 43 | public: | ||
| 44 | GuestMemory() = delete; | ||
| 45 | explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||
| 46 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 47 | : m_memory{memory}, m_addr{addr}, m_size{size} { | ||
| 48 | static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||
| 49 | if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||
| 50 | Read(addr, size, backup); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | ~GuestMemory() = default; | ||
| 55 | |||
| 56 | T* data() noexcept { | ||
| 57 | return m_data_span.data(); | ||
| 58 | } | ||
| 59 | |||
| 60 | const T* data() const noexcept { | ||
| 61 | return m_data_span.data(); | ||
| 62 | } | ||
| 63 | |||
| 64 | size_t size() const noexcept { | ||
| 65 | return m_size; | ||
| 66 | } | ||
| 67 | |||
| 68 | size_t size_bytes() const noexcept { | ||
| 69 | return this->size() * sizeof(T); | ||
| 70 | } | ||
| 71 | |||
| 72 | [[nodiscard]] T* begin() noexcept { | ||
| 73 | return this->data(); | ||
| 74 | } | ||
| 75 | |||
| 76 | [[nodiscard]] const T* begin() const noexcept { | ||
| 77 | return this->data(); | ||
| 78 | } | ||
| 79 | |||
| 80 | [[nodiscard]] T* end() noexcept { | ||
| 81 | return this->data() + this->size(); | ||
| 82 | } | ||
| 83 | |||
| 84 | [[nodiscard]] const T* end() const noexcept { | ||
| 85 | return this->data() + this->size(); | ||
| 86 | } | ||
| 87 | |||
| 88 | T& operator[](size_t index) noexcept { | ||
| 89 | return m_data_span[index]; | ||
| 90 | } | ||
| 91 | |||
| 92 | const T& operator[](size_t index) const noexcept { | ||
| 93 | return m_data_span[index]; | ||
| 94 | } | ||
| 95 | |||
| 96 | void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||
| 97 | m_addr = addr; | ||
| 98 | m_size = size; | ||
| 99 | m_addr_changed = true; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::span<T> Read(u64 addr, std::size_t size, | ||
| 103 | Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||
| 104 | m_addr = addr; | ||
| 105 | m_size = size; | ||
| 106 | if (m_size == 0) { | ||
| 107 | m_is_data_copy = true; | ||
| 108 | return {}; | ||
| 109 | } | ||
| 110 | |||
| 111 | if (this->TrySetSpan()) { | ||
| 112 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 113 | m_memory.FlushRegion(m_addr, this->size_bytes()); | ||
| 114 | } | ||
| 115 | } else { | ||
| 116 | if (backup) { | ||
| 117 | backup->resize_destructive(this->size()); | ||
| 118 | m_data_span = *backup; | ||
| 119 | } else { | ||
| 120 | m_data_copy.resize(this->size()); | ||
| 121 | m_data_span = std::span(m_data_copy); | ||
| 122 | } | ||
| 123 | m_is_data_copy = true; | ||
| 124 | m_span_valid = true; | ||
| 125 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 126 | m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||
| 127 | } else { | ||
| 128 | m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | return m_data_span; | ||
| 132 | } | ||
| 133 | |||
| 134 | void Write(std::span<T> write_data) noexcept { | ||
| 135 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 136 | m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||
| 137 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 138 | m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||
| 139 | } else { | ||
| 140 | m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | bool TrySetSpan() noexcept { | ||
| 145 | if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||
| 146 | m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||
| 147 | m_span_valid = true; | ||
| 148 | return true; | ||
| 149 | } | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | |||
| 153 | protected: | ||
| 154 | bool IsDataCopy() const noexcept { | ||
| 155 | return m_is_data_copy; | ||
| 156 | } | ||
| 157 | |||
| 158 | bool AddressChanged() const noexcept { | ||
| 159 | return m_addr_changed; | ||
| 160 | } | ||
| 161 | |||
| 162 | M& m_memory; | ||
| 163 | u64 m_addr{}; | ||
| 164 | size_t m_size{}; | ||
| 165 | std::span<T> m_data_span{}; | ||
| 166 | std::vector<T> m_data_copy{}; | ||
| 167 | bool m_span_valid{false}; | ||
| 168 | bool m_is_data_copy{false}; | ||
| 169 | bool m_addr_changed{false}; | ||
| 170 | }; | ||
| 171 | |||
| 172 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 173 | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||
| 174 | public: | ||
| 175 | GuestMemoryScoped() = delete; | ||
| 176 | explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||
| 177 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 178 | : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||
| 179 | if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||
| 180 | if (!this->TrySetSpan()) { | ||
| 181 | if (backup) { | ||
| 182 | this->m_data_span = *backup; | ||
| 183 | this->m_span_valid = true; | ||
| 184 | this->m_is_data_copy = true; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | ~GuestMemoryScoped() { | ||
| 191 | if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||
| 192 | if (this->size() == 0) [[unlikely]] { | ||
| 193 | return; | ||
| 194 | } | ||
| 195 | |||
| 196 | if (this->AddressChanged() || this->IsDataCopy()) { | ||
| 197 | ASSERT(this->m_span_valid); | ||
| 198 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 199 | this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||
| 200 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 201 | this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||
| 202 | } else { | ||
| 203 | this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||
| 204 | } | ||
| 205 | } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || | ||
| 206 | (FLAGS & GuestMemoryFlags::Cached)) { | ||
| 207 | this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | } | ||
| 211 | }; | ||
| 212 | } // namespace | ||
| 213 | |||
| 214 | } // namespace Core::Memory | ||
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 53735a225..0b08e877e 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "common/scope_exit.h" | 5 | #include "common/scope_exit.h" |
| 6 | #include "common/settings.h" | 6 | #include "common/settings.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/gpu_dirty_memory_manager.h" | ||
| 8 | #include "core/hle/kernel/k_process.h" | 9 | #include "core/hle/kernel/k_process.h" |
| 9 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | 10 | #include "core/hle/kernel/k_scoped_resource_reservation.h" |
| 10 | #include "core/hle/kernel/k_shared_memory.h" | 11 | #include "core/hle/kernel/k_shared_memory.h" |
| @@ -320,7 +321,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa | |||
| 320 | 321 | ||
| 321 | // Ensure our memory is initialized. | 322 | // Ensure our memory is initialized. |
| 322 | m_memory.SetCurrentPageTable(*this); | 323 | m_memory.SetCurrentPageTable(*this); |
| 323 | m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); | 324 | m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager()); |
| 324 | 325 | ||
| 325 | // Ensure we can insert the code region. | 326 | // Ensure we can insert the code region. |
| 326 | R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, | 327 | R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, |
| @@ -417,7 +418,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, | |||
| 417 | 418 | ||
| 418 | // Ensure our memory is initialized. | 419 | // Ensure our memory is initialized. |
| 419 | m_memory.SetCurrentPageTable(*this); | 420 | m_memory.SetCurrentPageTable(*this); |
| 420 | m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); | 421 | m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager()); |
| 421 | 422 | ||
| 422 | // Ensure we can insert the code region. | 423 | // Ensure we can insert the code region. |
| 423 | R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), | 424 | R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), |
| @@ -1141,8 +1142,7 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {} | |||
| 1141 | KProcess::KProcess(KernelCore& kernel) | 1142 | KProcess::KProcess(KernelCore& kernel) |
| 1142 | : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, | 1143 | : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, |
| 1143 | m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, | 1144 | m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, |
| 1144 | m_handle_table{kernel}, m_dirty_memory_managers{}, | 1145 | m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {} |
| 1145 | m_exclusive_monitor{}, m_memory{kernel.System()} {} | ||
| 1146 | KProcess::~KProcess() = default; | 1146 | KProcess::~KProcess() = default; |
| 1147 | 1147 | ||
| 1148 | Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, | 1148 | Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, |
| @@ -1324,10 +1324,4 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT | |||
| 1324 | return true; | 1324 | return true; |
| 1325 | } | 1325 | } |
| 1326 | 1326 | ||
| 1327 | void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | ||
| 1328 | for (auto& manager : m_dirty_memory_managers) { | ||
| 1329 | manager.Gather(callback); | ||
| 1330 | } | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | } // namespace Kernel | 1327 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 53c0e3316..ab1358a12 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | 7 | ||
| 8 | #include "core/arm/arm_interface.h" | 8 | #include "core/arm/arm_interface.h" |
| 9 | #include "core/file_sys/program_metadata.h" | 9 | #include "core/file_sys/program_metadata.h" |
| 10 | #include "core/gpu_dirty_memory_manager.h" | ||
| 11 | #include "core/hle/kernel/code_set.h" | 10 | #include "core/hle/kernel/code_set.h" |
| 12 | #include "core/hle/kernel/k_address_arbiter.h" | 11 | #include "core/hle/kernel/k_address_arbiter.h" |
| 13 | #include "core/hle/kernel/k_capabilities.h" | 12 | #include "core/hle/kernel/k_capabilities.h" |
| @@ -128,7 +127,6 @@ private: | |||
| 128 | #ifdef HAS_NCE | 127 | #ifdef HAS_NCE |
| 129 | std::unordered_map<u64, u64> m_post_handlers{}; | 128 | std::unordered_map<u64, u64> m_post_handlers{}; |
| 130 | #endif | 129 | #endif |
| 131 | std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers; | ||
| 132 | std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; | 130 | std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; |
| 133 | Core::Memory::Memory m_memory; | 131 | Core::Memory::Memory m_memory; |
| 134 | 132 | ||
| @@ -511,8 +509,6 @@ public: | |||
| 511 | return m_memory; | 509 | return m_memory; |
| 512 | } | 510 | } |
| 513 | 511 | ||
| 514 | void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | ||
| 515 | |||
| 516 | Core::ExclusiveMonitor& GetExclusiveMonitor() const { | 512 | Core::ExclusiveMonitor& GetExclusiveMonitor() const { |
| 517 | return *m_exclusive_monitor; | 513 | return *m_exclusive_monitor; |
| 518 | } | 514 | } |
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index 3f38ceb03..e491dd260 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "common/scratch_buffer.h" | 14 | #include "common/scratch_buffer.h" |
| 15 | #include "core/guest_memory.h" | ||
| 15 | #include "core/hle/kernel/k_auto_object.h" | 16 | #include "core/hle/kernel/k_auto_object.h" |
| 16 | #include "core/hle/kernel/k_handle_table.h" | 17 | #include "core/hle/kernel/k_handle_table.h" |
| 17 | #include "core/hle/kernel/k_process.h" | 18 | #include "core/hle/kernel/k_process.h" |
| @@ -23,19 +24,6 @@ | |||
| 23 | #include "core/hle/service/ipc_helpers.h" | 24 | #include "core/hle/service/ipc_helpers.h" |
| 24 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 25 | 26 | ||
| 26 | namespace { | ||
| 27 | static thread_local std::array read_buffer_data_a{ | ||
| 28 | Common::ScratchBuffer<u8>(), | ||
| 29 | Common::ScratchBuffer<u8>(), | ||
| 30 | Common::ScratchBuffer<u8>(), | ||
| 31 | }; | ||
| 32 | static thread_local std::array read_buffer_data_x{ | ||
| 33 | Common::ScratchBuffer<u8>(), | ||
| 34 | Common::ScratchBuffer<u8>(), | ||
| 35 | Common::ScratchBuffer<u8>(), | ||
| 36 | }; | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | namespace Service { | 27 | namespace Service { |
| 40 | 28 | ||
| 41 | SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) | 29 | SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) |
| @@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons | |||
| 343 | } | 331 | } |
| 344 | 332 | ||
| 345 | std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { | 333 | std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { |
| 346 | static thread_local std::array read_buffer_a{ | 334 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 347 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 348 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 349 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 350 | }; | ||
| 351 | 335 | ||
| 352 | ASSERT_OR_EXECUTE_MSG( | 336 | ASSERT_OR_EXECUTE_MSG( |
| 353 | BufferDescriptorA().size() > buffer_index, { return {}; }, | 337 | BufferDescriptorA().size() > buffer_index, { return {}; }, |
| 354 | "BufferDescriptorA invalid buffer_index {}", buffer_index); | 338 | "BufferDescriptorA invalid buffer_index {}", buffer_index); |
| 355 | auto& read_buffer = read_buffer_a[buffer_index]; | 339 | return gm.Read(BufferDescriptorA()[buffer_index].Address(), |
| 356 | return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | 340 | BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); |
| 357 | BufferDescriptorA()[buffer_index].Size(), | ||
| 358 | &read_buffer_data_a[buffer_index]); | ||
| 359 | } | 341 | } |
| 360 | 342 | ||
| 361 | std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { | 343 | std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { |
| 362 | static thread_local std::array read_buffer_x{ | 344 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 363 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 364 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 365 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 366 | }; | ||
| 367 | 345 | ||
| 368 | ASSERT_OR_EXECUTE_MSG( | 346 | ASSERT_OR_EXECUTE_MSG( |
| 369 | BufferDescriptorX().size() > buffer_index, { return {}; }, | 347 | BufferDescriptorX().size() > buffer_index, { return {}; }, |
| 370 | "BufferDescriptorX invalid buffer_index {}", buffer_index); | 348 | "BufferDescriptorX invalid buffer_index {}", buffer_index); |
| 371 | auto& read_buffer = read_buffer_x[buffer_index]; | 349 | return gm.Read(BufferDescriptorX()[buffer_index].Address(), |
| 372 | return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | 350 | BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); |
| 373 | BufferDescriptorX()[buffer_index].Size(), | ||
| 374 | &read_buffer_data_x[buffer_index]); | ||
| 375 | } | 351 | } |
| 376 | 352 | ||
| 377 | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | 353 | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { |
| 378 | static thread_local std::array read_buffer_a{ | 354 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 379 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 380 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 381 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 382 | }; | ||
| 383 | static thread_local std::array read_buffer_x{ | ||
| 384 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 385 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 386 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 387 | }; | ||
| 388 | 355 | ||
| 389 | const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && | 356 | const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && |
| 390 | BufferDescriptorA()[buffer_index].Size()}; | 357 | BufferDescriptorA()[buffer_index].Size()}; |
| @@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons | |||
| 401 | ASSERT_OR_EXECUTE_MSG( | 368 | ASSERT_OR_EXECUTE_MSG( |
| 402 | BufferDescriptorA().size() > buffer_index, { return {}; }, | 369 | BufferDescriptorA().size() > buffer_index, { return {}; }, |
| 403 | "BufferDescriptorA invalid buffer_index {}", buffer_index); | 370 | "BufferDescriptorA invalid buffer_index {}", buffer_index); |
| 404 | auto& read_buffer = read_buffer_a[buffer_index]; | 371 | return gm.Read(BufferDescriptorA()[buffer_index].Address(), |
| 405 | return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | 372 | BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); |
| 406 | BufferDescriptorA()[buffer_index].Size(), | ||
| 407 | &read_buffer_data_a[buffer_index]); | ||
| 408 | } else { | 373 | } else { |
| 409 | ASSERT_OR_EXECUTE_MSG( | 374 | ASSERT_OR_EXECUTE_MSG( |
| 410 | BufferDescriptorX().size() > buffer_index, { return {}; }, | 375 | BufferDescriptorX().size() > buffer_index, { return {}; }, |
| 411 | "BufferDescriptorX invalid buffer_index {}", buffer_index); | 376 | "BufferDescriptorX invalid buffer_index {}", buffer_index); |
| 412 | auto& read_buffer = read_buffer_x[buffer_index]; | 377 | return gm.Read(BufferDescriptorX()[buffer_index].Address(), |
| 413 | return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | 378 | BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); |
| 414 | BufferDescriptorX()[buffer_index].Size(), | ||
| 415 | &read_buffer_data_x[buffer_index]); | ||
| 416 | } | 379 | } |
| 417 | } | 380 | } |
| 418 | 381 | ||
diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h index 440737db5..8329d7265 100644 --- a/src/core/hle/service/hle_ipc.h +++ b/src/core/hle/service/hle_ipc.h | |||
| @@ -41,6 +41,8 @@ class KernelCore; | |||
| 41 | class KHandleTable; | 41 | class KHandleTable; |
| 42 | class KProcess; | 42 | class KProcess; |
| 43 | class KServerSession; | 43 | class KServerSession; |
| 44 | template <typename T> | ||
| 45 | class KScopedAutoObject; | ||
| 44 | class KThread; | 46 | class KThread; |
| 45 | } // namespace Kernel | 47 | } // namespace Kernel |
| 46 | 48 | ||
| @@ -424,6 +426,9 @@ private: | |||
| 424 | 426 | ||
| 425 | Kernel::KernelCore& kernel; | 427 | Kernel::KernelCore& kernel; |
| 426 | Core::Memory::Memory& memory; | 428 | Core::Memory::Memory& memory; |
| 429 | |||
| 430 | mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{}; | ||
| 431 | mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{}; | ||
| 427 | }; | 432 | }; |
| 428 | 433 | ||
| 429 | } // namespace Service | 434 | } // namespace Service |
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp index 37ca24f5d..21ef57d27 100644 --- a/src/core/hle/service/nvdrv/core/container.cpp +++ b/src/core/hle/service/nvdrv/core/container.cpp | |||
| @@ -2,27 +2,135 @@ | |||
| 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors | 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later | 3 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <deque> | ||
| 7 | #include <mutex> | ||
| 8 | |||
| 9 | #include "core/hle/kernel/k_process.h" | ||
| 5 | #include "core/hle/service/nvdrv/core/container.h" | 10 | #include "core/hle/service/nvdrv/core/container.h" |
| 11 | #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||
| 6 | #include "core/hle/service/nvdrv/core/nvmap.h" | 12 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 7 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 13 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| 14 | #include "core/memory.h" | ||
| 8 | #include "video_core/host1x/host1x.h" | 15 | #include "video_core/host1x/host1x.h" |
| 9 | 16 | ||
| 10 | namespace Service::Nvidia::NvCore { | 17 | namespace Service::Nvidia::NvCore { |
| 11 | 18 | ||
| 19 | Session::Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_) | ||
| 20 | : id{id_}, process{process_}, asid{asid_}, has_preallocated_area{}, mapper{}, is_active{} {} | ||
| 21 | |||
| 22 | Session::~Session() = default; | ||
| 23 | |||
| 12 | struct ContainerImpl { | 24 | struct ContainerImpl { |
| 13 | explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) | 25 | explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_) |
| 14 | : file{host1x_}, manager{host1x_}, device_file_data{} {} | 26 | : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {} |
| 27 | Tegra::Host1x::Host1x& host1x; | ||
| 15 | NvMap file; | 28 | NvMap file; |
| 16 | SyncpointManager manager; | 29 | SyncpointManager manager; |
| 17 | Container::Host1xDeviceFileData device_file_data; | 30 | Container::Host1xDeviceFileData device_file_data; |
| 31 | std::deque<Session> sessions; | ||
| 32 | size_t new_ids{}; | ||
| 33 | std::deque<size_t> id_pool; | ||
| 34 | std::mutex session_guard; | ||
| 18 | }; | 35 | }; |
| 19 | 36 | ||
| 20 | Container::Container(Tegra::Host1x::Host1x& host1x_) { | 37 | Container::Container(Tegra::Host1x::Host1x& host1x_) { |
| 21 | impl = std::make_unique<ContainerImpl>(host1x_); | 38 | impl = std::make_unique<ContainerImpl>(*this, host1x_); |
| 22 | } | 39 | } |
| 23 | 40 | ||
| 24 | Container::~Container() = default; | 41 | Container::~Container() = default; |
| 25 | 42 | ||
| 43 | SessionId Container::OpenSession(Kernel::KProcess* process) { | ||
| 44 | using namespace Common::Literals; | ||
| 45 | |||
| 46 | std::scoped_lock lk(impl->session_guard); | ||
| 47 | for (auto& session : impl->sessions) { | ||
| 48 | if (!session.is_active) { | ||
| 49 | continue; | ||
| 50 | } | ||
| 51 | if (session.process == process) { | ||
| 52 | return session.id; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | size_t new_id{}; | ||
| 56 | auto* memory_interface = &process->GetMemory(); | ||
| 57 | auto& smmu = impl->host1x.MemoryManager(); | ||
| 58 | auto asid = smmu.RegisterProcess(memory_interface); | ||
| 59 | if (!impl->id_pool.empty()) { | ||
| 60 | new_id = impl->id_pool.front(); | ||
| 61 | impl->id_pool.pop_front(); | ||
| 62 | impl->sessions[new_id] = Session{SessionId{new_id}, process, asid}; | ||
| 63 | } else { | ||
| 64 | new_id = impl->new_ids++; | ||
| 65 | impl->sessions.emplace_back(SessionId{new_id}, process, asid); | ||
| 66 | } | ||
| 67 | auto& session = impl->sessions[new_id]; | ||
| 68 | session.is_active = true; | ||
| 69 | // Optimization | ||
| 70 | if (process->IsApplication()) { | ||
| 71 | auto& page_table = process->GetPageTable().GetBasePageTable(); | ||
| 72 | auto heap_start = page_table.GetHeapRegionStart(); | ||
| 73 | |||
| 74 | Kernel::KProcessAddress cur_addr = heap_start; | ||
| 75 | size_t region_size = 0; | ||
| 76 | VAddr region_start = 0; | ||
| 77 | while (true) { | ||
| 78 | Kernel::KMemoryInfo mem_info{}; | ||
| 79 | Kernel::Svc::PageInfo page_info{}; | ||
| 80 | R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info), | ||
| 81 | cur_addr)); | ||
| 82 | auto svc_mem_info = mem_info.GetSvcMemoryInfo(); | ||
| 83 | |||
| 84 | // Check if this memory block is heap. | ||
| 85 | if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { | ||
| 86 | if (svc_mem_info.size > region_size) { | ||
| 87 | region_size = svc_mem_info.size; | ||
| 88 | region_start = svc_mem_info.base_address; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | // Check if we're done. | ||
| 93 | const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size; | ||
| 94 | if (next_address <= GetInteger(cur_addr)) { | ||
| 95 | break; | ||
| 96 | } | ||
| 97 | |||
| 98 | cur_addr = next_address; | ||
| 99 | } | ||
| 100 | session.has_preallocated_area = false; | ||
| 101 | auto start_region = region_size >= 32_MiB ? smmu.Allocate(region_size) : 0; | ||
| 102 | if (start_region != 0) { | ||
| 103 | session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size, | ||
| 104 | asid, impl->host1x); | ||
| 105 | smmu.TrackContinuity(start_region, region_start, region_size, asid); | ||
| 106 | session.has_preallocated_area = true; | ||
| 107 | LOG_DEBUG(Debug, "Preallocation created!"); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | return SessionId{new_id}; | ||
| 111 | } | ||
| 112 | |||
| 113 | void Container::CloseSession(SessionId session_id) { | ||
| 114 | std::scoped_lock lk(impl->session_guard); | ||
| 115 | auto& session = impl->sessions[session_id.id]; | ||
| 116 | auto& smmu = impl->host1x.MemoryManager(); | ||
| 117 | if (session.has_preallocated_area) { | ||
| 118 | const DAddr region_start = session.mapper->GetRegionStart(); | ||
| 119 | const size_t region_size = session.mapper->GetRegionSize(); | ||
| 120 | session.mapper.reset(); | ||
| 121 | smmu.Free(region_start, region_size); | ||
| 122 | session.has_preallocated_area = false; | ||
| 123 | } | ||
| 124 | session.is_active = false; | ||
| 125 | smmu.UnregisterProcess(impl->sessions[session_id.id].asid); | ||
| 126 | impl->id_pool.emplace_front(session_id.id); | ||
| 127 | } | ||
| 128 | |||
| 129 | Session* Container::GetSession(SessionId session_id) { | ||
| 130 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 131 | return &impl->sessions[session_id.id]; | ||
| 132 | } | ||
| 133 | |||
| 26 | NvMap& Container::GetNvMapFile() { | 134 | NvMap& Container::GetNvMapFile() { |
| 27 | return impl->file; | 135 | return impl->file; |
| 28 | } | 136 | } |
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h index b4b63ac90..b4d3938a8 100644 --- a/src/core/hle/service/nvdrv/core/container.h +++ b/src/core/hle/service/nvdrv/core/container.h | |||
| @@ -8,24 +8,56 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <unordered_map> | 9 | #include <unordered_map> |
| 10 | 10 | ||
| 11 | #include "core/device_memory_manager.h" | ||
| 11 | #include "core/hle/service/nvdrv/nvdata.h" | 12 | #include "core/hle/service/nvdrv/nvdata.h" |
| 12 | 13 | ||
| 14 | namespace Kernel { | ||
| 15 | class KProcess; | ||
| 16 | } | ||
| 17 | |||
| 13 | namespace Tegra::Host1x { | 18 | namespace Tegra::Host1x { |
| 14 | class Host1x; | 19 | class Host1x; |
| 15 | } // namespace Tegra::Host1x | 20 | } // namespace Tegra::Host1x |
| 16 | 21 | ||
| 17 | namespace Service::Nvidia::NvCore { | 22 | namespace Service::Nvidia::NvCore { |
| 18 | 23 | ||
| 24 | class HeapMapper; | ||
| 19 | class NvMap; | 25 | class NvMap; |
| 20 | class SyncpointManager; | 26 | class SyncpointManager; |
| 21 | 27 | ||
| 22 | struct ContainerImpl; | 28 | struct ContainerImpl; |
| 23 | 29 | ||
| 30 | struct SessionId { | ||
| 31 | size_t id; | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct Session { | ||
| 35 | Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_); | ||
| 36 | ~Session(); | ||
| 37 | |||
| 38 | Session(const Session&) = delete; | ||
| 39 | Session& operator=(const Session&) = delete; | ||
| 40 | Session(Session&&) = default; | ||
| 41 | Session& operator=(Session&&) = default; | ||
| 42 | |||
| 43 | SessionId id; | ||
| 44 | Kernel::KProcess* process; | ||
| 45 | Core::Asid asid; | ||
| 46 | bool has_preallocated_area{}; | ||
| 47 | std::unique_ptr<HeapMapper> mapper{}; | ||
| 48 | bool is_active{}; | ||
| 49 | }; | ||
| 50 | |||
| 24 | class Container { | 51 | class Container { |
| 25 | public: | 52 | public: |
| 26 | explicit Container(Tegra::Host1x::Host1x& host1x); | 53 | explicit Container(Tegra::Host1x::Host1x& host1x); |
| 27 | ~Container(); | 54 | ~Container(); |
| 28 | 55 | ||
| 56 | SessionId OpenSession(Kernel::KProcess* process); | ||
| 57 | void CloseSession(SessionId id); | ||
| 58 | |||
| 59 | Session* GetSession(SessionId id); | ||
| 60 | |||
| 29 | NvMap& GetNvMapFile(); | 61 | NvMap& GetNvMapFile(); |
| 30 | 62 | ||
| 31 | const NvMap& GetNvMapFile() const; | 63 | const NvMap& GetNvMapFile() const; |
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp new file mode 100644 index 000000000..096dc5deb --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp | |||
| @@ -0,0 +1,175 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <mutex> | ||
| 5 | |||
| 6 | #include <boost/container/small_vector.hpp> | ||
| 7 | #define BOOST_NO_MT | ||
| 8 | #include <boost/pool/detail/mutex.hpp> | ||
| 9 | #undef BOOST_NO_MT | ||
| 10 | #include <boost/icl/interval.hpp> | ||
| 11 | #include <boost/icl/interval_base_set.hpp> | ||
| 12 | #include <boost/icl/interval_set.hpp> | ||
| 13 | #include <boost/icl/split_interval_map.hpp> | ||
| 14 | #include <boost/pool/pool.hpp> | ||
| 15 | #include <boost/pool/pool_alloc.hpp> | ||
| 16 | #include <boost/pool/poolfwd.hpp> | ||
| 17 | |||
| 18 | #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||
| 19 | #include "video_core/host1x/host1x.h" | ||
| 20 | |||
| 21 | namespace boost { | ||
| 22 | template <typename T> | ||
| 23 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Service::Nvidia::NvCore { | ||
| 27 | |||
| 28 | using IntervalCompare = std::less<DAddr>; | ||
| 29 | using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; | ||
| 30 | using IntervalAllocator = boost::fast_pool_allocator<DAddr>; | ||
| 31 | using IntervalSet = boost::icl::interval_set<DAddr>; | ||
| 32 | using IntervalType = typename IntervalSet::interval_type; | ||
| 33 | |||
| 34 | template <typename Type> | ||
| 35 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 36 | // types | ||
| 37 | typedef counter_add_functor<Type> type; | ||
| 38 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 39 | |||
| 40 | // public member functions | ||
| 41 | void operator()(Type& current, const Type& added) const { | ||
| 42 | current += added; | ||
| 43 | if (current < base_type::identity_element()) { | ||
| 44 | current = base_type::identity_element(); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | // public static functions | ||
| 49 | static void version(Type&){}; | ||
| 50 | }; | ||
| 51 | |||
| 52 | using OverlapCombine = counter_add_functor<int>; | ||
| 53 | using OverlapSection = boost::icl::inter_section<int>; | ||
| 54 | using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; | ||
| 55 | |||
| 56 | struct HeapMapper::HeapMapperInternal { | ||
| 57 | HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {} | ||
| 58 | ~HeapMapperInternal() = default; | ||
| 59 | |||
| 60 | template <typename Func> | ||
| 61 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||
| 62 | Func&& func) { | ||
| 63 | const DAddr start_address = cpu_addr; | ||
| 64 | const DAddr end_address = start_address + size; | ||
| 65 | const IntervalType search_interval{start_address, end_address}; | ||
| 66 | auto it = current_range.lower_bound(search_interval); | ||
| 67 | if (it == current_range.end()) { | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | auto end_it = current_range.upper_bound(search_interval); | ||
| 71 | for (; it != end_it; it++) { | ||
| 72 | auto& inter = it->first; | ||
| 73 | DAddr inter_addr_end = inter.upper(); | ||
| 74 | DAddr inter_addr = inter.lower(); | ||
| 75 | if (inter_addr_end > end_address) { | ||
| 76 | inter_addr_end = end_address; | ||
| 77 | } | ||
| 78 | if (inter_addr < start_address) { | ||
| 79 | inter_addr = start_address; | ||
| 80 | } | ||
| 81 | func(inter_addr, inter_addr_end, it->second); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||
| 86 | const IntervalType search_interval, int subtract_value) { | ||
| 87 | bool any_removals = false; | ||
| 88 | current_range.add(std::make_pair(search_interval, subtract_value)); | ||
| 89 | do { | ||
| 90 | any_removals = false; | ||
| 91 | auto it = current_range.lower_bound(search_interval); | ||
| 92 | if (it == current_range.end()) { | ||
| 93 | return; | ||
| 94 | } | ||
| 95 | auto end_it = current_range.upper_bound(search_interval); | ||
| 96 | for (; it != end_it; it++) { | ||
| 97 | if (it->second <= 0) { | ||
| 98 | any_removals = true; | ||
| 99 | current_range.erase(it); | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } while (any_removals); | ||
| 104 | } | ||
| 105 | |||
| 106 | IntervalSet base_set; | ||
| 107 | OverlapCounter mapping_overlaps; | ||
| 108 | Tegra::MaxwellDeviceMemoryManager& device_memory; | ||
| 109 | std::mutex guard; | ||
| 110 | }; | ||
| 111 | |||
| 112 | HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid, | ||
| 113 | Tegra::Host1x::Host1x& host1x) | ||
| 114 | : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_asid{asid} { | ||
| 115 | m_internal = std::make_unique<HeapMapperInternal>(host1x); | ||
| 116 | } | ||
| 117 | |||
| 118 | HeapMapper::~HeapMapper() { | ||
| 119 | m_internal->device_memory.Unmap(m_daddress, m_size); | ||
| 120 | } | ||
| 121 | |||
| 122 | DAddr HeapMapper::Map(VAddr start, size_t size) { | ||
| 123 | std::scoped_lock lk(m_internal->guard); | ||
| 124 | m_internal->base_set.clear(); | ||
| 125 | const IntervalType interval{start, start + size}; | ||
| 126 | m_internal->base_set.insert(interval); | ||
| 127 | m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, | ||
| 128 | [this](VAddr start_addr, VAddr end_addr, int) { | ||
| 129 | const IntervalType other{start_addr, end_addr}; | ||
| 130 | m_internal->base_set.subtract(other); | ||
| 131 | }); | ||
| 132 | if (!m_internal->base_set.empty()) { | ||
| 133 | auto it = m_internal->base_set.begin(); | ||
| 134 | auto end_it = m_internal->base_set.end(); | ||
| 135 | for (; it != end_it; it++) { | ||
| 136 | const VAddr inter_addr_end = it->upper(); | ||
| 137 | const VAddr inter_addr = it->lower(); | ||
| 138 | const size_t offset = inter_addr - m_vaddress; | ||
| 139 | const size_t sub_size = inter_addr_end - inter_addr; | ||
| 140 | m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, | ||
| 141 | m_asid); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | m_internal->mapping_overlaps += std::make_pair(interval, 1); | ||
| 145 | m_internal->base_set.clear(); | ||
| 146 | return m_daddress + (start - m_vaddress); | ||
| 147 | } | ||
| 148 | |||
| 149 | void HeapMapper::Unmap(VAddr start, size_t size) { | ||
| 150 | std::scoped_lock lk(m_internal->guard); | ||
| 151 | m_internal->base_set.clear(); | ||
| 152 | m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, | ||
| 153 | [this](VAddr start_addr, VAddr end_addr, int value) { | ||
| 154 | if (value <= 1) { | ||
| 155 | const IntervalType other{start_addr, end_addr}; | ||
| 156 | m_internal->base_set.insert(other); | ||
| 157 | } | ||
| 158 | }); | ||
| 159 | if (!m_internal->base_set.empty()) { | ||
| 160 | auto it = m_internal->base_set.begin(); | ||
| 161 | auto end_it = m_internal->base_set.end(); | ||
| 162 | for (; it != end_it; it++) { | ||
| 163 | const VAddr inter_addr_end = it->upper(); | ||
| 164 | const VAddr inter_addr = it->lower(); | ||
| 165 | const size_t offset = inter_addr - m_vaddress; | ||
| 166 | const size_t sub_size = inter_addr_end - inter_addr; | ||
| 167 | m_internal->device_memory.Unmap(m_daddress + offset, sub_size); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | const IntervalType to_remove{start, start + size}; | ||
| 171 | m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1); | ||
| 172 | m_internal->base_set.clear(); | ||
| 173 | } | ||
| 174 | |||
| 175 | } // namespace Service::Nvidia::NvCore | ||
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h new file mode 100644 index 000000000..491a12e4f --- /dev/null +++ b/src/core/hle/service/nvdrv/core/heap_mapper.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/device_memory_manager.h" | ||
| 10 | |||
| 11 | namespace Tegra::Host1x { | ||
| 12 | class Host1x; | ||
| 13 | } // namespace Tegra::Host1x | ||
| 14 | |||
| 15 | namespace Service::Nvidia::NvCore { | ||
| 16 | |||
| 17 | class HeapMapper { | ||
| 18 | public: | ||
| 19 | HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid, | ||
| 20 | Tegra::Host1x::Host1x& host1x); | ||
| 21 | ~HeapMapper(); | ||
| 22 | |||
| 23 | bool IsInBounds(VAddr start, size_t size) const { | ||
| 24 | VAddr end = start + size; | ||
| 25 | return start >= m_vaddress && end <= (m_vaddress + m_size); | ||
| 26 | } | ||
| 27 | |||
| 28 | DAddr Map(VAddr start, size_t size); | ||
| 29 | |||
| 30 | void Unmap(VAddr start, size_t size); | ||
| 31 | |||
| 32 | DAddr GetRegionStart() const { | ||
| 33 | return m_daddress; | ||
| 34 | } | ||
| 35 | |||
| 36 | size_t GetRegionSize() const { | ||
| 37 | return m_size; | ||
| 38 | } | ||
| 39 | |||
| 40 | private: | ||
| 41 | struct HeapMapperInternal; | ||
| 42 | VAddr m_vaddress; | ||
| 43 | DAddr m_daddress; | ||
| 44 | size_t m_size; | ||
| 45 | Core::Asid m_asid; | ||
| 46 | std::unique_ptr<HeapMapperInternal> m_internal; | ||
| 47 | }; | ||
| 48 | |||
| 49 | } // namespace Service::Nvidia::NvCore | ||
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index 0ca05257e..1b59c6b15 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp | |||
| @@ -2,14 +2,19 @@ | |||
| 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors | 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later | 3 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 4 | 4 | ||
| 5 | #include <functional> | ||
| 6 | |||
| 5 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/hle/service/nvdrv/core/container.h" | ||
| 11 | #include "core/hle/service/nvdrv/core/heap_mapper.h" | ||
| 8 | #include "core/hle/service/nvdrv/core/nvmap.h" | 12 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 9 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 10 | #include "video_core/host1x/host1x.h" | 14 | #include "video_core/host1x/host1x.h" |
| 11 | 15 | ||
| 12 | using Core::Memory::YUZU_PAGESIZE; | 16 | using Core::Memory::YUZU_PAGESIZE; |
| 17 | constexpr size_t BIG_PAGE_SIZE = YUZU_PAGESIZE * 16; | ||
| 13 | 18 | ||
| 14 | namespace Service::Nvidia::NvCore { | 19 | namespace Service::Nvidia::NvCore { |
| 15 | NvMap::Handle::Handle(u64 size_, Id id_) | 20 | NvMap::Handle::Handle(u64 size_, Id id_) |
| @@ -17,9 +22,9 @@ NvMap::Handle::Handle(u64 size_, Id id_) | |||
| 17 | flags.raw = 0; | 22 | flags.raw = 0; |
| 18 | } | 23 | } |
| 19 | 24 | ||
| 20 | NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { | 25 | NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, |
| 26 | NvCore::SessionId pSessionId) { | ||
| 21 | std::scoped_lock lock(mutex); | 27 | std::scoped_lock lock(mutex); |
| 22 | |||
| 23 | // Handles cannot be allocated twice | 28 | // Handles cannot be allocated twice |
| 24 | if (allocated) { | 29 | if (allocated) { |
| 25 | return NvResult::AccessDenied; | 30 | return NvResult::AccessDenied; |
| @@ -28,6 +33,7 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) | |||
| 28 | flags = pFlags; | 33 | flags = pFlags; |
| 29 | kind = pKind; | 34 | kind = pKind; |
| 30 | align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; | 35 | align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; |
| 36 | session_id = pSessionId; | ||
| 31 | 37 | ||
| 32 | // This flag is only applicable for handles with an address passed | 38 | // This flag is only applicable for handles with an address passed |
| 33 | if (pAddress) { | 39 | if (pAddress) { |
| @@ -63,7 +69,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) { | |||
| 63 | return NvResult::Success; | 69 | return NvResult::Success; |
| 64 | } | 70 | } |
| 65 | 71 | ||
| 66 | NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} | 72 | NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {} |
| 67 | 73 | ||
| 68 | void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { | 74 | void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { |
| 69 | std::scoped_lock lock(handles_lock); | 75 | std::scoped_lock lock(handles_lock); |
| @@ -78,12 +84,30 @@ void NvMap::UnmapHandle(Handle& handle_description) { | |||
| 78 | handle_description.unmap_queue_entry.reset(); | 84 | handle_description.unmap_queue_entry.reset(); |
| 79 | } | 85 | } |
| 80 | 86 | ||
| 87 | // Free and unmap the handle from Host1x GMMU | ||
| 88 | if (handle_description.pin_virt_address) { | ||
| 89 | host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), | ||
| 90 | handle_description.aligned_size); | ||
| 91 | host1x.Allocator().Free(handle_description.pin_virt_address, | ||
| 92 | static_cast<u32>(handle_description.aligned_size)); | ||
| 93 | handle_description.pin_virt_address = 0; | ||
| 94 | } | ||
| 95 | |||
| 81 | // Free and unmap the handle from the SMMU | 96 | // Free and unmap the handle from the SMMU |
| 82 | host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), | 97 | const size_t map_size = handle_description.aligned_size; |
| 83 | handle_description.aligned_size); | 98 | if (!handle_description.in_heap) { |
| 84 | host1x.Allocator().Free(handle_description.pin_virt_address, | 99 | auto& smmu = host1x.MemoryManager(); |
| 85 | static_cast<u32>(handle_description.aligned_size)); | 100 | size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE); |
| 86 | handle_description.pin_virt_address = 0; | 101 | smmu.Unmap(handle_description.d_address, map_size); |
| 102 | smmu.Free(handle_description.d_address, static_cast<size_t>(aligned_up)); | ||
| 103 | handle_description.d_address = 0; | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | const VAddr vaddress = handle_description.address; | ||
| 107 | auto* session = core.GetSession(handle_description.session_id); | ||
| 108 | session->mapper->Unmap(vaddress, map_size); | ||
| 109 | handle_description.d_address = 0; | ||
| 110 | handle_description.in_heap = false; | ||
| 87 | } | 111 | } |
| 88 | 112 | ||
| 89 | bool NvMap::TryRemoveHandle(const Handle& handle_description) { | 113 | bool NvMap::TryRemoveHandle(const Handle& handle_description) { |
| @@ -124,22 +148,33 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) { | |||
| 124 | } | 148 | } |
| 125 | } | 149 | } |
| 126 | 150 | ||
| 127 | VAddr NvMap::GetHandleAddress(Handle::Id handle) { | 151 | DAddr NvMap::GetHandleAddress(Handle::Id handle) { |
| 128 | std::scoped_lock lock(handles_lock); | 152 | std::scoped_lock lock(handles_lock); |
| 129 | try { | 153 | try { |
| 130 | return handles.at(handle)->address; | 154 | return handles.at(handle)->d_address; |
| 131 | } catch (std::out_of_range&) { | 155 | } catch (std::out_of_range&) { |
| 132 | return 0; | 156 | return 0; |
| 133 | } | 157 | } |
| 134 | } | 158 | } |
| 135 | 159 | ||
| 136 | u32 NvMap::PinHandle(NvMap::Handle::Id handle) { | 160 | DAddr NvMap::PinHandle(NvMap::Handle::Id handle, bool low_area_pin) { |
| 137 | auto handle_description{GetHandle(handle)}; | 161 | auto handle_description{GetHandle(handle)}; |
| 138 | if (!handle_description) [[unlikely]] { | 162 | if (!handle_description) [[unlikely]] { |
| 139 | return 0; | 163 | return 0; |
| 140 | } | 164 | } |
| 141 | 165 | ||
| 142 | std::scoped_lock lock(handle_description->mutex); | 166 | std::scoped_lock lock(handle_description->mutex); |
| 167 | const auto map_low_area = [&] { | ||
| 168 | if (handle_description->pin_virt_address == 0) { | ||
| 169 | auto& gmmu_allocator = host1x.Allocator(); | ||
| 170 | auto& gmmu = host1x.GMMU(); | ||
| 171 | u32 address = | ||
| 172 | gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size)); | ||
| 173 | gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address, | ||
| 174 | handle_description->aligned_size); | ||
| 175 | handle_description->pin_virt_address = address; | ||
| 176 | } | ||
| 177 | }; | ||
| 143 | if (!handle_description->pins) { | 178 | if (!handle_description->pins) { |
| 144 | // If we're in the unmap queue we can just remove ourselves and return since we're already | 179 | // If we're in the unmap queue we can just remove ourselves and return since we're already |
| 145 | // mapped | 180 | // mapped |
| @@ -151,37 +186,58 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) { | |||
| 151 | unmap_queue.erase(*handle_description->unmap_queue_entry); | 186 | unmap_queue.erase(*handle_description->unmap_queue_entry); |
| 152 | handle_description->unmap_queue_entry.reset(); | 187 | handle_description->unmap_queue_entry.reset(); |
| 153 | 188 | ||
| 189 | if (low_area_pin) { | ||
| 190 | map_low_area(); | ||
| 191 | handle_description->pins++; | ||
| 192 | return static_cast<DAddr>(handle_description->pin_virt_address); | ||
| 193 | } | ||
| 194 | |||
| 154 | handle_description->pins++; | 195 | handle_description->pins++; |
| 155 | return handle_description->pin_virt_address; | 196 | return handle_description->d_address; |
| 156 | } | 197 | } |
| 157 | } | 198 | } |
| 158 | 199 | ||
| 200 | using namespace std::placeholders; | ||
| 159 | // If not then allocate some space and map it | 201 | // If not then allocate some space and map it |
| 160 | u32 address{}; | 202 | DAddr address{}; |
| 161 | auto& smmu_allocator = host1x.Allocator(); | 203 | auto& smmu = host1x.MemoryManager(); |
| 162 | auto& smmu_memory_manager = host1x.MemoryManager(); | 204 | auto* session = core.GetSession(handle_description->session_id); |
| 163 | while ((address = smmu_allocator.Allocate( | 205 | const VAddr vaddress = handle_description->address; |
| 164 | static_cast<u32>(handle_description->aligned_size))) == 0) { | 206 | const size_t map_size = handle_description->aligned_size; |
| 165 | // Free handles until the allocation succeeds | 207 | if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) { |
| 166 | std::scoped_lock queueLock(unmap_queue_lock); | 208 | handle_description->d_address = session->mapper->Map(vaddress, map_size); |
| 167 | if (auto freeHandleDesc{unmap_queue.front()}) { | 209 | handle_description->in_heap = true; |
| 168 | // Handles in the unmap queue are guaranteed not to be pinned so don't bother | 210 | } else { |
| 169 | // checking if they are before unmapping | 211 | size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE); |
| 170 | std::scoped_lock freeLock(freeHandleDesc->mutex); | 212 | while ((address = smmu.Allocate(aligned_up)) == 0) { |
| 171 | if (handle_description->pin_virt_address) | 213 | // Free handles until the allocation succeeds |
| 172 | UnmapHandle(*freeHandleDesc); | 214 | std::scoped_lock queueLock(unmap_queue_lock); |
| 173 | } else { | 215 | if (auto freeHandleDesc{unmap_queue.front()}) { |
| 174 | LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | 216 | // Handles in the unmap queue are guaranteed not to be pinned so don't bother |
| 217 | // checking if they are before unmapping | ||
| 218 | std::scoped_lock freeLock(freeHandleDesc->mutex); | ||
| 219 | if (handle_description->d_address) | ||
| 220 | UnmapHandle(*freeHandleDesc); | ||
| 221 | } else { | ||
| 222 | LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | ||
| 223 | } | ||
| 175 | } | 224 | } |
| 225 | |||
| 226 | handle_description->d_address = address; | ||
| 227 | smmu.Map(address, vaddress, map_size, session->asid, true); | ||
| 228 | handle_description->in_heap = false; | ||
| 176 | } | 229 | } |
| 230 | } | ||
| 177 | 231 | ||
| 178 | smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address, | 232 | if (low_area_pin) { |
| 179 | handle_description->aligned_size); | 233 | map_low_area(); |
| 180 | handle_description->pin_virt_address = address; | ||
| 181 | } | 234 | } |
| 182 | 235 | ||
| 183 | handle_description->pins++; | 236 | handle_description->pins++; |
| 184 | return handle_description->pin_virt_address; | 237 | if (low_area_pin) { |
| 238 | return static_cast<DAddr>(handle_description->pin_virt_address); | ||
| 239 | } | ||
| 240 | return handle_description->d_address; | ||
| 185 | } | 241 | } |
| 186 | 242 | ||
| 187 | void NvMap::UnpinHandle(Handle::Id handle) { | 243 | void NvMap::UnpinHandle(Handle::Id handle) { |
| @@ -232,7 +288,7 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna | |||
| 232 | LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); | 288 | LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); |
| 233 | } else if (handle_description->dupes == 0) { | 289 | } else if (handle_description->dupes == 0) { |
| 234 | // Force unmap the handle | 290 | // Force unmap the handle |
| 235 | if (handle_description->pin_virt_address) { | 291 | if (handle_description->d_address) { |
| 236 | std::scoped_lock queueLock(unmap_queue_lock); | 292 | std::scoped_lock queueLock(unmap_queue_lock); |
| 237 | UnmapHandle(*handle_description); | 293 | UnmapHandle(*handle_description); |
| 238 | } | 294 | } |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index a8e573890..d7f695845 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | 14 | ||
| 15 | #include "common/bit_field.h" | 15 | #include "common/bit_field.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "core/hle/service/nvdrv/core/container.h" | ||
| 17 | #include "core/hle/service/nvdrv/nvdata.h" | 18 | #include "core/hle/service/nvdrv/nvdata.h" |
| 18 | 19 | ||
| 19 | namespace Tegra { | 20 | namespace Tegra { |
| @@ -25,6 +26,8 @@ class Host1x; | |||
| 25 | } // namespace Tegra | 26 | } // namespace Tegra |
| 26 | 27 | ||
| 27 | namespace Service::Nvidia::NvCore { | 28 | namespace Service::Nvidia::NvCore { |
| 29 | |||
| 30 | class Container; | ||
| 28 | /** | 31 | /** |
| 29 | * @brief The nvmap core class holds the global state for nvmap and provides methods to manage | 32 | * @brief The nvmap core class holds the global state for nvmap and provides methods to manage |
| 30 | * handles | 33 | * handles |
| @@ -48,7 +51,7 @@ public: | |||
| 48 | using Id = u32; | 51 | using Id = u32; |
| 49 | Id id; //!< A globally unique identifier for this handle | 52 | Id id; //!< A globally unique identifier for this handle |
| 50 | 53 | ||
| 51 | s32 pins{}; | 54 | s64 pins{}; |
| 52 | u32 pin_virt_address{}; | 55 | u32 pin_virt_address{}; |
| 53 | std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; | 56 | std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; |
| 54 | 57 | ||
| @@ -61,15 +64,18 @@ public: | |||
| 61 | } flags{}; | 64 | } flags{}; |
| 62 | static_assert(sizeof(Flags) == sizeof(u32)); | 65 | static_assert(sizeof(Flags) == sizeof(u32)); |
| 63 | 66 | ||
| 64 | u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to, | 67 | VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, |
| 65 | //!< this can also be in the nvdrv tmem | 68 | //!< this can also be in the nvdrv tmem |
| 66 | bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC | 69 | bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC |
| 67 | //!< call | 70 | //!< call |
| 68 | 71 | ||
| 69 | u8 kind{}; //!< Used for memory compression | 72 | u8 kind{}; //!< Used for memory compression |
| 70 | bool allocated{}; //!< If the handle has been allocated with `Alloc` | 73 | bool allocated{}; //!< If the handle has been allocated with `Alloc` |
| 74 | bool in_heap{}; | ||
| 75 | NvCore::SessionId session_id{}; | ||
| 71 | 76 | ||
| 72 | u64 dma_map_addr{}; //! remove me after implementing pinning. | 77 | DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds |
| 78 | //!< to, this can also be in the nvdrv tmem | ||
| 73 | 79 | ||
| 74 | Handle(u64 size, Id id); | 80 | Handle(u64 size, Id id); |
| 75 | 81 | ||
| @@ -77,7 +83,8 @@ public: | |||
| 77 | * @brief Sets up the handle with the given memory config, can allocate memory from the tmem | 83 | * @brief Sets up the handle with the given memory config, can allocate memory from the tmem |
| 78 | * if a 0 address is passed | 84 | * if a 0 address is passed |
| 79 | */ | 85 | */ |
| 80 | [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress); | 86 | [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, |
| 87 | NvCore::SessionId pSessionId); | ||
| 81 | 88 | ||
| 82 | /** | 89 | /** |
| 83 | * @brief Increases the dupe counter of the handle for the given session | 90 | * @brief Increases the dupe counter of the handle for the given session |
| @@ -108,7 +115,7 @@ public: | |||
| 108 | bool can_unlock; //!< If the address region is ready to be unlocked | 115 | bool can_unlock; //!< If the address region is ready to be unlocked |
| 109 | }; | 116 | }; |
| 110 | 117 | ||
| 111 | explicit NvMap(Tegra::Host1x::Host1x& host1x); | 118 | explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x); |
| 112 | 119 | ||
| 113 | /** | 120 | /** |
| 114 | * @brief Creates an unallocated handle of the given size | 121 | * @brief Creates an unallocated handle of the given size |
| @@ -117,7 +124,7 @@ public: | |||
| 117 | 124 | ||
| 118 | std::shared_ptr<Handle> GetHandle(Handle::Id handle); | 125 | std::shared_ptr<Handle> GetHandle(Handle::Id handle); |
| 119 | 126 | ||
| 120 | VAddr GetHandleAddress(Handle::Id handle); | 127 | DAddr GetHandleAddress(Handle::Id handle); |
| 121 | 128 | ||
| 122 | /** | 129 | /** |
| 123 | * @brief Maps a handle into the SMMU address space | 130 | * @brief Maps a handle into the SMMU address space |
| @@ -125,7 +132,7 @@ public: | |||
| 125 | * number of calls to `UnpinHandle` | 132 | * number of calls to `UnpinHandle` |
| 126 | * @return The SMMU virtual address that the handle has been mapped to | 133 | * @return The SMMU virtual address that the handle has been mapped to |
| 127 | */ | 134 | */ |
| 128 | u32 PinHandle(Handle::Id handle); | 135 | DAddr PinHandle(Handle::Id handle, bool low_area_pin); |
| 129 | 136 | ||
| 130 | /** | 137 | /** |
| 131 | * @brief When this has been called an equal number of times to `PinHandle` for the supplied | 138 | * @brief When this has been called an equal number of times to `PinHandle` for the supplied |
| @@ -172,5 +179,7 @@ private: | |||
| 172 | * @return If the handle was removed from the map | 179 | * @return If the handle was removed from the map |
| 173 | */ | 180 | */ |
| 174 | bool TryRemoveHandle(const Handle& handle_description); | 181 | bool TryRemoveHandle(const Handle& handle_description); |
| 182 | |||
| 183 | Container& core; | ||
| 175 | }; | 184 | }; |
| 176 | } // namespace Service::Nvidia::NvCore | 185 | } // namespace Service::Nvidia::NvCore |
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index a04538d5d..8adaddc60 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvdrv/core/container.h" | ||
| 10 | #include "core/hle/service/nvdrv/nvdata.h" | 11 | #include "core/hle/service/nvdrv/nvdata.h" |
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
| @@ -62,7 +63,7 @@ public: | |||
| 62 | * Called once a device is opened | 63 | * Called once a device is opened |
| 63 | * @param fd The device fd | 64 | * @param fd The device fd |
| 64 | */ | 65 | */ |
| 65 | virtual void OnOpen(DeviceFD fd) = 0; | 66 | virtual void OnOpen(NvCore::SessionId session_id, DeviceFD fd) = 0; |
| 66 | 67 | ||
| 67 | /** | 68 | /** |
| 68 | * Called once a device is closed | 69 | * Called once a device is closed |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 05a43d8dc..c1ebbd62d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -35,14 +35,14 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in | |||
| 35 | return NvResult::NotImplemented; | 35 | return NvResult::NotImplemented; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | void nvdisp_disp0::OnOpen(DeviceFD fd) {} | 38 | void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 39 | void nvdisp_disp0::OnClose(DeviceFD fd) {} | 39 | void nvdisp_disp0::OnClose(DeviceFD fd) {} |
| 40 | 40 | ||
| 41 | void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, | 41 | void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, |
| 42 | u32 height, u32 stride, android::BufferTransformFlags transform, | 42 | u32 height, u32 stride, android::BufferTransformFlags transform, |
| 43 | const Common::Rectangle<int>& crop_rect, | 43 | const Common::Rectangle<int>& crop_rect, |
| 44 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | 44 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { |
| 45 | const VAddr addr = nvmap.GetHandleAddress(buffer_handle); | 45 | const DAddr addr = nvmap.GetHandleAddress(buffer_handle); |
| 46 | LOG_TRACE(Service, | 46 | LOG_TRACE(Service, |
| 47 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | 47 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", |
| 48 | addr, offset, width, height, stride, format); | 48 | addr, offset, width, height, stride, format); |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index daee05fe8..5f13a50a2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -32,7 +32,7 @@ public: | |||
| 32 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 32 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 33 | std::span<u8> inline_output) override; | 33 | std::span<u8> inline_output) override; |
| 34 | 34 | ||
| 35 | void OnOpen(DeviceFD fd) override; | 35 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 36 | void OnClose(DeviceFD fd) override; | 36 | void OnClose(DeviceFD fd) override; |
| 37 | 37 | ||
| 38 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 38 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 6b3639008..e6646ba04 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -86,7 +86,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i | |||
| 86 | return NvResult::NotImplemented; | 86 | return NvResult::NotImplemented; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | void nvhost_as_gpu::OnOpen(DeviceFD fd) {} | 89 | void nvhost_as_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 90 | void nvhost_as_gpu::OnClose(DeviceFD fd) {} | 90 | void nvhost_as_gpu::OnClose(DeviceFD fd) {} |
| 91 | 91 | ||
| 92 | NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { | 92 | NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { |
| @@ -206,6 +206,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | |||
| 206 | static_cast<u32>(aligned_size >> page_size_bits)); | 206 | static_cast<u32>(aligned_size >> page_size_bits)); |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | nvmap.UnpinHandle(mapping->handle); | ||
| 210 | |||
| 209 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | 211 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |
| 210 | // Only FreeSpace can unmap them fully | 212 | // Only FreeSpace can unmap them fully |
| 211 | if (mapping->sparse_alloc) { | 213 | if (mapping->sparse_alloc) { |
| @@ -293,12 +295,12 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | |||
| 293 | return NvResult::BadValue; | 295 | return NvResult::BadValue; |
| 294 | } | 296 | } |
| 295 | 297 | ||
| 296 | VAddr cpu_address{static_cast<VAddr>( | 298 | DAddr base = nvmap.PinHandle(entry.handle, false); |
| 297 | handle->address + | 299 | DAddr device_address{static_cast<DAddr>( |
| 298 | (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | 300 | base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |
| 299 | 301 | ||
| 300 | gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), | 302 | gmmu->Map(virtual_address, device_address, size, |
| 301 | use_big_pages); | 303 | static_cast<Tegra::PTEKind>(entry.kind), use_big_pages); |
| 302 | } | 304 | } |
| 303 | } | 305 | } |
| 304 | 306 | ||
| @@ -331,9 +333,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 331 | } | 333 | } |
| 332 | 334 | ||
| 333 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | 335 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |
| 334 | VAddr cpu_address{mapping->ptr + params.buffer_offset}; | 336 | VAddr device_address{mapping->ptr + params.buffer_offset}; |
| 335 | 337 | ||
| 336 | gmmu->Map(gpu_address, cpu_address, params.mapping_size, | 338 | gmmu->Map(gpu_address, device_address, params.mapping_size, |
| 337 | static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); | 339 | static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); |
| 338 | 340 | ||
| 339 | return NvResult::Success; | 341 | return NvResult::Success; |
| @@ -349,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 349 | return NvResult::BadValue; | 351 | return NvResult::BadValue; |
| 350 | } | 352 | } |
| 351 | 353 | ||
| 352 | VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; | 354 | DAddr device_address{ |
| 355 | static_cast<DAddr>(nvmap.PinHandle(params.handle, false) + params.buffer_offset)}; | ||
| 353 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | 356 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |
| 354 | 357 | ||
| 355 | bool big_page{[&]() { | 358 | bool big_page{[&]() { |
| @@ -373,15 +376,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 373 | } | 376 | } |
| 374 | 377 | ||
| 375 | const bool use_big_pages = alloc->second.big_pages && big_page; | 378 | const bool use_big_pages = alloc->second.big_pages && big_page; |
| 376 | gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), | 379 | gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind), |
| 377 | use_big_pages); | 380 | use_big_pages); |
| 378 | 381 | ||
| 379 | auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, | 382 | auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, |
| 380 | use_big_pages, alloc->second.sparse)}; | 383 | true, use_big_pages, alloc->second.sparse)}; |
| 381 | alloc->second.mappings.push_back(mapping); | 384 | alloc->second.mappings.push_back(mapping); |
| 382 | mapping_map[params.offset] = mapping; | 385 | mapping_map[params.offset] = mapping; |
| 383 | } else { | 386 | } else { |
| 384 | |||
| 385 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | 387 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; |
| 386 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | 388 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; |
| 387 | u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | 389 | u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; |
| @@ -394,11 +396,11 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 394 | return NvResult::InsufficientMemory; | 396 | return NvResult::InsufficientMemory; |
| 395 | } | 397 | } |
| 396 | 398 | ||
| 397 | gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), | 399 | gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), |
| 398 | static_cast<Tegra::PTEKind>(params.kind), big_page); | 400 | static_cast<Tegra::PTEKind>(params.kind), big_page); |
| 399 | 401 | ||
| 400 | auto mapping{ | 402 | auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, |
| 401 | std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | 403 | false, big_page, false)}; |
| 402 | mapping_map[params.offset] = mapping; | 404 | mapping_map[params.offset] = mapping; |
| 403 | } | 405 | } |
| 404 | 406 | ||
| @@ -433,6 +435,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | |||
| 433 | gmmu->Unmap(params.offset, mapping->size); | 435 | gmmu->Unmap(params.offset, mapping->size); |
| 434 | } | 436 | } |
| 435 | 437 | ||
| 438 | nvmap.UnpinHandle(mapping->handle); | ||
| 439 | |||
| 436 | mapping_map.erase(params.offset); | 440 | mapping_map.erase(params.offset); |
| 437 | } catch (const std::out_of_range&) { | 441 | } catch (const std::out_of_range&) { |
| 438 | LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); | 442 | LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 79a21683d..7d0a99988 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -55,7 +55,7 @@ public: | |||
| 55 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 55 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 56 | std::span<u8> inline_output) override; | 56 | std::span<u8> inline_output) override; |
| 57 | 57 | ||
| 58 | void OnOpen(DeviceFD fd) override; | 58 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 59 | void OnClose(DeviceFD fd) override; | 59 | void OnClose(DeviceFD fd) override; |
| 60 | 60 | ||
| 61 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 61 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
| @@ -159,16 +159,18 @@ private: | |||
| 159 | NvCore::NvMap& nvmap; | 159 | NvCore::NvMap& nvmap; |
| 160 | 160 | ||
| 161 | struct Mapping { | 161 | struct Mapping { |
| 162 | VAddr ptr; | 162 | NvCore::NvMap::Handle::Id handle; |
| 163 | DAddr ptr; | ||
| 163 | u64 offset; | 164 | u64 offset; |
| 164 | u64 size; | 165 | u64 size; |
| 165 | bool fixed; | 166 | bool fixed; |
| 166 | bool big_page; // Only valid if fixed == false | 167 | bool big_page; // Only valid if fixed == false |
| 167 | bool sparse_alloc; | 168 | bool sparse_alloc; |
| 168 | 169 | ||
| 169 | Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) | 170 | Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, |
| 170 | : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), | 171 | bool big_page_, bool sparse_alloc_) |
| 171 | sparse_alloc(sparse_alloc_) {} | 172 | : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), |
| 173 | big_page(big_page_), sparse_alloc(sparse_alloc_) {} | ||
| 172 | }; | 174 | }; |
| 173 | 175 | ||
| 174 | struct Allocation { | 176 | struct Allocation { |
| @@ -212,9 +214,6 @@ private: | |||
| 212 | bool initialised{}; | 214 | bool initialised{}; |
| 213 | } vm; | 215 | } vm; |
| 214 | std::shared_ptr<Tegra::MemoryManager> gmmu; | 216 | std::shared_ptr<Tegra::MemoryManager> gmmu; |
| 215 | |||
| 216 | // s32 channel{}; | ||
| 217 | // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; | ||
| 218 | }; | 217 | }; |
| 219 | 218 | ||
| 220 | } // namespace Service::Nvidia::Devices | 219 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index b8dd34e24..250d01de3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -76,7 +76,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inp | |||
| 76 | return NvResult::NotImplemented; | 76 | return NvResult::NotImplemented; |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void nvhost_ctrl::OnOpen(DeviceFD fd) {} | 79 | void nvhost_ctrl::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 80 | 80 | ||
| 81 | void nvhost_ctrl::OnClose(DeviceFD fd) {} | 81 | void nvhost_ctrl::OnClose(DeviceFD fd) {} |
| 82 | 82 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 992124b60..403f1a746 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h | |||
| @@ -32,7 +32,7 @@ public: | |||
| 32 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 32 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 33 | std::span<u8> inline_output) override; | 33 | std::span<u8> inline_output) override; |
| 34 | 34 | ||
| 35 | void OnOpen(DeviceFD fd) override; | 35 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 36 | void OnClose(DeviceFD fd) override; | 36 | void OnClose(DeviceFD fd) override; |
| 37 | 37 | ||
| 38 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 38 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 3e0c96456..ddd85678b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> | |||
| 82 | return NvResult::NotImplemented; | 82 | return NvResult::NotImplemented; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} | 85 | void nvhost_ctrl_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 86 | void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} | 86 | void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} |
| 87 | 87 | ||
| 88 | NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { | 88 | NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index d170299bd..d2ab05b21 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -28,7 +28,7 @@ public: | |||
| 28 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 28 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 29 | std::span<u8> inline_output) override; | 29 | std::span<u8> inline_output) override; |
| 30 | 30 | ||
| 31 | void OnOpen(DeviceFD fd) override; | 31 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 32 | void OnClose(DeviceFD fd) override; | 32 | void OnClose(DeviceFD fd) override; |
| 33 | 33 | ||
| 34 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 34 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index b0395c2f0..bf12d69a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -120,7 +120,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu | |||
| 120 | return NvResult::NotImplemented; | 120 | return NvResult::NotImplemented; |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | void nvhost_gpu::OnOpen(DeviceFD fd) {} | 123 | void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 124 | void nvhost_gpu::OnClose(DeviceFD fd) {} | 124 | void nvhost_gpu::OnClose(DeviceFD fd) {} |
| 125 | 125 | ||
| 126 | NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) { | 126 | NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 88fd228ff..e34a978db 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -47,7 +47,7 @@ public: | |||
| 47 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 47 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 48 | std::span<u8> inline_output) override; | 48 | std::span<u8> inline_output) override; |
| 49 | 49 | ||
| 50 | void OnOpen(DeviceFD fd) override; | 50 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 51 | void OnClose(DeviceFD fd) override; | 51 | void OnClose(DeviceFD fd) override; |
| 52 | 52 | ||
| 53 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 53 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index f43914e1b..2c0ac2a46 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | |||
| @@ -35,7 +35,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in | |||
| 35 | case 0x7: | 35 | case 0x7: |
| 36 | return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); | 36 | return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); |
| 37 | case 0x9: | 37 | case 0x9: |
| 38 | return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output); | 38 | return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output, fd); |
| 39 | case 0xa: | 39 | case 0xa: |
| 40 | return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); | 40 | return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); |
| 41 | default: | 41 | default: |
| @@ -68,9 +68,10 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in | |||
| 68 | return NvResult::NotImplemented; | 68 | return NvResult::NotImplemented; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void nvhost_nvdec::OnOpen(DeviceFD fd) { | 71 | void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { |
| 72 | LOG_INFO(Service_NVDRV, "NVDEC video stream started"); | 72 | LOG_INFO(Service_NVDRV, "NVDEC video stream started"); |
| 73 | system.SetNVDECActive(true); | 73 | system.SetNVDECActive(true); |
| 74 | sessions[fd] = session_id; | ||
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | void nvhost_nvdec::OnClose(DeviceFD fd) { | 77 | void nvhost_nvdec::OnClose(DeviceFD fd) { |
| @@ -81,6 +82,10 @@ void nvhost_nvdec::OnClose(DeviceFD fd) { | |||
| 81 | system.GPU().ClearCdmaInstance(iter->second); | 82 | system.GPU().ClearCdmaInstance(iter->second); |
| 82 | } | 83 | } |
| 83 | system.SetNVDECActive(false); | 84 | system.SetNVDECActive(false); |
| 85 | auto it = sessions.find(fd); | ||
| 86 | if (it != sessions.end()) { | ||
| 87 | sessions.erase(it); | ||
| 88 | } | ||
| 84 | } | 89 | } |
| 85 | 90 | ||
| 86 | } // namespace Service::Nvidia::Devices | 91 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index ad2233c49..627686757 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | |||
| @@ -20,7 +20,7 @@ public: | |||
| 20 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 20 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 21 | std::span<u8> inline_output) override; | 21 | std::span<u8> inline_output) override; |
| 22 | 22 | ||
| 23 | void OnOpen(DeviceFD fd) override; | 23 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 24 | void OnClose(DeviceFD fd) override; | 24 | void OnClose(DeviceFD fd) override; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 74c701b95..a0a7bfa40 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/hle/kernel/k_process.h" | ||
| 11 | #include "core/hle/service/nvdrv/core/container.h" | 12 | #include "core/hle/service/nvdrv/core/container.h" |
| 12 | #include "core/hle/service/nvdrv/core/nvmap.h" | 13 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 13 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 14 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| @@ -95,6 +96,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
| 95 | offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); | 96 | offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); |
| 96 | 97 | ||
| 97 | auto& gpu = system.GPU(); | 98 | auto& gpu = system.GPU(); |
| 99 | auto* session = core.GetSession(sessions[fd]); | ||
| 100 | |||
| 98 | if (gpu.UseNvdec()) { | 101 | if (gpu.UseNvdec()) { |
| 99 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | 102 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { |
| 100 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; | 103 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; |
| @@ -106,8 +109,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
| 106 | const auto object = nvmap.GetHandle(cmd_buffer.memory_id); | 109 | const auto object = nvmap.GetHandle(cmd_buffer.memory_id); |
| 107 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | 110 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |
| 108 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | 111 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); |
| 109 | system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | 112 | session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), |
| 110 | cmdlist.size() * sizeof(u32)); | 113 | cmdlist.size() * sizeof(u32)); |
| 111 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); | 114 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); |
| 112 | } | 115 | } |
| 113 | // Some games expect command_buffers to be written back | 116 | // Some games expect command_buffers to be written back |
| @@ -133,10 +136,12 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { | |||
| 133 | return NvResult::Success; | 136 | return NvResult::Success; |
| 134 | } | 137 | } |
| 135 | 138 | ||
| 136 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) { | 139 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, |
| 140 | DeviceFD fd) { | ||
| 137 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); | 141 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); |
| 138 | for (size_t i = 0; i < num_entries; i++) { | 142 | for (size_t i = 0; i < num_entries; i++) { |
| 139 | entries[i].map_address = nvmap.PinHandle(entries[i].map_handle); | 143 | DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, true); |
| 144 | entries[i].map_address = static_cast<u32>(pin_address); | ||
| 140 | } | 145 | } |
| 141 | 146 | ||
| 142 | return NvResult::Success; | 147 | return NvResult::Success; |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 7ce748e18..900db81d2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <deque> | 6 | #include <deque> |
| 7 | #include <unordered_map> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 9 | |||
| 8 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 9 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 10 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 12 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| @@ -111,7 +113,7 @@ protected: | |||
| 111 | NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd); | 113 | NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd); |
| 112 | NvResult GetSyncpoint(IoctlGetSyncpoint& params); | 114 | NvResult GetSyncpoint(IoctlGetSyncpoint& params); |
| 113 | NvResult GetWaitbase(IoctlGetWaitbase& params); | 115 | NvResult GetWaitbase(IoctlGetWaitbase& params); |
| 114 | NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); | 116 | NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd); |
| 115 | NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); | 117 | NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); |
| 116 | NvResult SetSubmitTimeout(u32 timeout); | 118 | NvResult SetSubmitTimeout(u32 timeout); |
| 117 | 119 | ||
| @@ -125,6 +127,7 @@ protected: | |||
| 125 | NvCore::NvMap& nvmap; | 127 | NvCore::NvMap& nvmap; |
| 126 | NvCore::ChannelType channel_type; | 128 | NvCore::ChannelType channel_type; |
| 127 | std::array<u32, MaxSyncPoints> device_syncpoints{}; | 129 | std::array<u32, MaxSyncPoints> device_syncpoints{}; |
| 130 | std::unordered_map<DeviceFD, NvCore::SessionId> sessions; | ||
| 128 | }; | 131 | }; |
| 129 | }; // namespace Devices | 132 | }; // namespace Devices |
| 130 | } // namespace Service::Nvidia | 133 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 9e6b86458..f87d53f12 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp | |||
| @@ -44,7 +44,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in | |||
| 44 | return NvResult::NotImplemented; | 44 | return NvResult::NotImplemented; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void nvhost_nvjpg::OnOpen(DeviceFD fd) {} | 47 | void nvhost_nvjpg::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} |
| 48 | void nvhost_nvjpg::OnClose(DeviceFD fd) {} | 48 | void nvhost_nvjpg::OnClose(DeviceFD fd) {} |
| 49 | 49 | ||
| 50 | NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) { | 50 | NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 790c97f6a..def9c254d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h | |||
| @@ -22,7 +22,7 @@ public: | |||
| 22 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 22 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 23 | std::span<u8> inline_output) override; | 23 | std::span<u8> inline_output) override; |
| 24 | 24 | ||
| 25 | void OnOpen(DeviceFD fd) override; | 25 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 26 | void OnClose(DeviceFD fd) override; | 26 | void OnClose(DeviceFD fd) override; |
| 27 | 27 | ||
| 28 | private: | 28 | private: |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 87f8d7c22..bf090f5eb 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp | |||
| @@ -33,7 +33,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu | |||
| 33 | case 0x3: | 33 | case 0x3: |
| 34 | return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); | 34 | return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); |
| 35 | case 0x9: | 35 | case 0x9: |
| 36 | return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output); | 36 | return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output, fd); |
| 37 | case 0xa: | 37 | case 0xa: |
| 38 | return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); | 38 | return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); |
| 39 | default: | 39 | default: |
| @@ -68,7 +68,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu | |||
| 68 | return NvResult::NotImplemented; | 68 | return NvResult::NotImplemented; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void nvhost_vic::OnOpen(DeviceFD fd) {} | 71 | void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { |
| 72 | sessions[fd] = session_id; | ||
| 73 | } | ||
| 72 | 74 | ||
| 73 | void nvhost_vic::OnClose(DeviceFD fd) { | 75 | void nvhost_vic::OnClose(DeviceFD fd) { |
| 74 | auto& host1x_file = core.Host1xDeviceFile(); | 76 | auto& host1x_file = core.Host1xDeviceFile(); |
| @@ -76,6 +78,7 @@ void nvhost_vic::OnClose(DeviceFD fd) { | |||
| 76 | if (iter != host1x_file.fd_to_id.end()) { | 78 | if (iter != host1x_file.fd_to_id.end()) { |
| 77 | system.GPU().ClearCdmaInstance(iter->second); | 79 | system.GPU().ClearCdmaInstance(iter->second); |
| 78 | } | 80 | } |
| 81 | sessions.erase(fd); | ||
| 79 | } | 82 | } |
| 80 | 83 | ||
| 81 | } // namespace Service::Nvidia::Devices | 84 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index cadbcb0a5..0cc04354a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h | |||
| @@ -19,7 +19,7 @@ public: | |||
| 19 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 19 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 20 | std::span<u8> inline_output) override; | 20 | std::span<u8> inline_output) override; |
| 21 | 21 | ||
| 22 | void OnOpen(DeviceFD fd) override; | 22 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 23 | void OnClose(DeviceFD fd) override; | 23 | void OnClose(DeviceFD fd) override; |
| 24 | }; | 24 | }; |
| 25 | } // namespace Service::Nvidia::Devices | 25 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 71b2e62ec..da61a3bfe 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -36,9 +36,9 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, | |||
| 36 | case 0x3: | 36 | case 0x3: |
| 37 | return WrapFixed(this, &nvmap::IocFromId, input, output); | 37 | return WrapFixed(this, &nvmap::IocFromId, input, output); |
| 38 | case 0x4: | 38 | case 0x4: |
| 39 | return WrapFixed(this, &nvmap::IocAlloc, input, output); | 39 | return WrapFixed(this, &nvmap::IocAlloc, input, output, fd); |
| 40 | case 0x5: | 40 | case 0x5: |
| 41 | return WrapFixed(this, &nvmap::IocFree, input, output); | 41 | return WrapFixed(this, &nvmap::IocFree, input, output, fd); |
| 42 | case 0x9: | 42 | case 0x9: |
| 43 | return WrapFixed(this, &nvmap::IocParam, input, output); | 43 | return WrapFixed(this, &nvmap::IocParam, input, output); |
| 44 | case 0xe: | 44 | case 0xe: |
| @@ -67,8 +67,15 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, st | |||
| 67 | return NvResult::NotImplemented; | 67 | return NvResult::NotImplemented; |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | void nvmap::OnOpen(DeviceFD fd) {} | 70 | void nvmap::OnOpen(NvCore::SessionId session_id, DeviceFD fd) { |
| 71 | void nvmap::OnClose(DeviceFD fd) {} | 71 | sessions[fd] = session_id; |
| 72 | } | ||
| 73 | void nvmap::OnClose(DeviceFD fd) { | ||
| 74 | auto it = sessions.find(fd); | ||
| 75 | if (it != sessions.end()) { | ||
| 76 | sessions.erase(it); | ||
| 77 | } | ||
| 78 | } | ||
| 72 | 79 | ||
| 73 | NvResult nvmap::IocCreate(IocCreateParams& params) { | 80 | NvResult nvmap::IocCreate(IocCreateParams& params) { |
| 74 | LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); | 81 | LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); |
| @@ -87,7 +94,7 @@ NvResult nvmap::IocCreate(IocCreateParams& params) { | |||
| 87 | return NvResult::Success; | 94 | return NvResult::Success; |
| 88 | } | 95 | } |
| 89 | 96 | ||
| 90 | NvResult nvmap::IocAlloc(IocAllocParams& params) { | 97 | NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { |
| 91 | LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); | 98 | LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); |
| 92 | 99 | ||
| 93 | if (!params.handle) { | 100 | if (!params.handle) { |
| @@ -116,15 +123,15 @@ NvResult nvmap::IocAlloc(IocAllocParams& params) { | |||
| 116 | return NvResult::InsufficientMemory; | 123 | return NvResult::InsufficientMemory; |
| 117 | } | 124 | } |
| 118 | 125 | ||
| 119 | const auto result = | 126 | const auto result = handle_description->Alloc(params.flags, params.align, params.kind, |
| 120 | handle_description->Alloc(params.flags, params.align, params.kind, params.address); | 127 | params.address, sessions[fd]); |
| 121 | if (result != NvResult::Success) { | 128 | if (result != NvResult::Success) { |
| 122 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | 129 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); |
| 123 | return result; | 130 | return result; |
| 124 | } | 131 | } |
| 125 | bool is_out_io{}; | 132 | bool is_out_io{}; |
| 126 | ASSERT(system.ApplicationProcess() | 133 | auto process = container.GetSession(sessions[fd])->process; |
| 127 | ->GetPageTable() | 134 | ASSERT(process->GetPageTable() |
| 128 | .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address, | 135 | .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address, |
| 129 | handle_description->size, | 136 | handle_description->size, |
| 130 | Kernel::KMemoryPermission::None, true, false) | 137 | Kernel::KMemoryPermission::None, true, false) |
| @@ -224,7 +231,7 @@ NvResult nvmap::IocParam(IocParamParams& params) { | |||
| 224 | return NvResult::Success; | 231 | return NvResult::Success; |
| 225 | } | 232 | } |
| 226 | 233 | ||
| 227 | NvResult nvmap::IocFree(IocFreeParams& params) { | 234 | NvResult nvmap::IocFree(IocFreeParams& params, DeviceFD fd) { |
| 228 | LOG_DEBUG(Service_NVDRV, "called"); | 235 | LOG_DEBUG(Service_NVDRV, "called"); |
| 229 | 236 | ||
| 230 | if (!params.handle) { | 237 | if (!params.handle) { |
| @@ -233,9 +240,9 @@ NvResult nvmap::IocFree(IocFreeParams& params) { | |||
| 233 | } | 240 | } |
| 234 | 241 | ||
| 235 | if (auto freeInfo{file.FreeHandle(params.handle, false)}) { | 242 | if (auto freeInfo{file.FreeHandle(params.handle, false)}) { |
| 243 | auto process = container.GetSession(sessions[fd])->process; | ||
| 236 | if (freeInfo->can_unlock) { | 244 | if (freeInfo->can_unlock) { |
| 237 | ASSERT(system.ApplicationProcess() | 245 | ASSERT(process->GetPageTable() |
| 238 | ->GetPageTable() | ||
| 239 | .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) | 246 | .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) |
| 240 | .IsSuccess()); | 247 | .IsSuccess()); |
| 241 | } | 248 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 049c11028..d07d85f88 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h | |||
| @@ -33,7 +33,7 @@ public: | |||
| 33 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, | 33 | NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, |
| 34 | std::span<u8> inline_output) override; | 34 | std::span<u8> inline_output) override; |
| 35 | 35 | ||
| 36 | void OnOpen(DeviceFD fd) override; | 36 | void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; |
| 37 | void OnClose(DeviceFD fd) override; | 37 | void OnClose(DeviceFD fd) override; |
| 38 | 38 | ||
| 39 | enum class HandleParameterType : u32_le { | 39 | enum class HandleParameterType : u32_le { |
| @@ -100,11 +100,11 @@ public: | |||
| 100 | static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); | 100 | static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); |
| 101 | 101 | ||
| 102 | NvResult IocCreate(IocCreateParams& params); | 102 | NvResult IocCreate(IocCreateParams& params); |
| 103 | NvResult IocAlloc(IocAllocParams& params); | 103 | NvResult IocAlloc(IocAllocParams& params, DeviceFD fd); |
| 104 | NvResult IocGetId(IocGetIdParams& params); | 104 | NvResult IocGetId(IocGetIdParams& params); |
| 105 | NvResult IocFromId(IocFromIdParams& params); | 105 | NvResult IocFromId(IocFromIdParams& params); |
| 106 | NvResult IocParam(IocParamParams& params); | 106 | NvResult IocParam(IocParamParams& params); |
| 107 | NvResult IocFree(IocFreeParams& params); | 107 | NvResult IocFree(IocFreeParams& params, DeviceFD fd); |
| 108 | 108 | ||
| 109 | private: | 109 | private: |
| 110 | /// Id to use for the next handle that is created. | 110 | /// Id to use for the next handle that is created. |
| @@ -115,6 +115,7 @@ private: | |||
| 115 | 115 | ||
| 116 | NvCore::Container& container; | 116 | NvCore::Container& container; |
| 117 | NvCore::NvMap& file; | 117 | NvCore::NvMap& file; |
| 118 | std::unordered_map<DeviceFD, NvCore::SessionId> sessions; | ||
| 118 | }; | 119 | }; |
| 119 | 120 | ||
| 120 | } // namespace Service::Nvidia::Devices | 121 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 9e46ee8dd..cb256e5b4 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -45,13 +45,22 @@ void EventInterface::FreeEvent(Kernel::KEvent* event) { | |||
| 45 | void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) { | 45 | void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) { |
| 46 | auto server_manager = std::make_unique<ServerManager>(system); | 46 | auto server_manager = std::make_unique<ServerManager>(system); |
| 47 | auto module = std::make_shared<Module>(system); | 47 | auto module = std::make_shared<Module>(system); |
| 48 | server_manager->RegisterNamedService("nvdrv", std::make_shared<NVDRV>(system, module, "nvdrv")); | 48 | const auto NvdrvInterfaceFactoryForApplication = [&, module] { |
| 49 | server_manager->RegisterNamedService("nvdrv:a", | 49 | return std::make_shared<NVDRV>(system, module, "nvdrv"); |
| 50 | std::make_shared<NVDRV>(system, module, "nvdrv:a")); | 50 | }; |
| 51 | server_manager->RegisterNamedService("nvdrv:s", | 51 | const auto NvdrvInterfaceFactoryForApplets = [&, module] { |
| 52 | std::make_shared<NVDRV>(system, module, "nvdrv:s")); | 52 | return std::make_shared<NVDRV>(system, module, "nvdrv:a"); |
| 53 | server_manager->RegisterNamedService("nvdrv:t", | 53 | }; |
| 54 | std::make_shared<NVDRV>(system, module, "nvdrv:t")); | 54 | const auto NvdrvInterfaceFactoryForSysmodules = [&, module] { |
| 55 | return std::make_shared<NVDRV>(system, module, "nvdrv:s"); | ||
| 56 | }; | ||
| 57 | const auto NvdrvInterfaceFactoryForTesting = [&, module] { | ||
| 58 | return std::make_shared<NVDRV>(system, module, "nvdrv:t"); | ||
| 59 | }; | ||
| 60 | server_manager->RegisterNamedService("nvdrv", NvdrvInterfaceFactoryForApplication); | ||
| 61 | server_manager->RegisterNamedService("nvdrv:a", NvdrvInterfaceFactoryForApplets); | ||
| 62 | server_manager->RegisterNamedService("nvdrv:s", NvdrvInterfaceFactoryForSysmodules); | ||
| 63 | server_manager->RegisterNamedService("nvdrv:t", NvdrvInterfaceFactoryForTesting); | ||
| 55 | server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system)); | 64 | server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system)); |
| 56 | nvnflinger.SetNVDrvInstance(module); | 65 | nvnflinger.SetNVDrvInstance(module); |
| 57 | ServerManager::RunServer(std::move(server_manager)); | 66 | ServerManager::RunServer(std::move(server_manager)); |
| @@ -113,7 +122,7 @@ NvResult Module::VerifyFD(DeviceFD fd) const { | |||
| 113 | return NvResult::Success; | 122 | return NvResult::Success; |
| 114 | } | 123 | } |
| 115 | 124 | ||
| 116 | DeviceFD Module::Open(const std::string& device_name) { | 125 | DeviceFD Module::Open(const std::string& device_name, NvCore::SessionId session_id) { |
| 117 | auto it = builders.find(device_name); | 126 | auto it = builders.find(device_name); |
| 118 | if (it == builders.end()) { | 127 | if (it == builders.end()) { |
| 119 | LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name); | 128 | LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name); |
| @@ -124,7 +133,7 @@ DeviceFD Module::Open(const std::string& device_name) { | |||
| 124 | auto& builder = it->second; | 133 | auto& builder = it->second; |
| 125 | auto device = builder(fd)->second; | 134 | auto device = builder(fd)->second; |
| 126 | 135 | ||
| 127 | device->OnOpen(fd); | 136 | device->OnOpen(session_id, fd); |
| 128 | 137 | ||
| 129 | return fd; | 138 | return fd; |
| 130 | } | 139 | } |
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index d8622b3ca..c594f0e5e 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h | |||
| @@ -77,7 +77,7 @@ public: | |||
| 77 | NvResult VerifyFD(DeviceFD fd) const; | 77 | NvResult VerifyFD(DeviceFD fd) const; |
| 78 | 78 | ||
| 79 | /// Opens a device node and returns a file descriptor to it. | 79 | /// Opens a device node and returns a file descriptor to it. |
| 80 | DeviceFD Open(const std::string& device_name); | 80 | DeviceFD Open(const std::string& device_name, NvCore::SessionId session_id); |
| 81 | 81 | ||
| 82 | /// Sends an ioctl command to the specified file descriptor. | 82 | /// Sends an ioctl command to the specified file descriptor. |
| 83 | NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); | 83 | NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); |
| @@ -93,6 +93,10 @@ public: | |||
| 93 | 93 | ||
| 94 | NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event); | 94 | NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event); |
| 95 | 95 | ||
| 96 | NvCore::Container& GetContainer() { | ||
| 97 | return container; | ||
| 98 | } | ||
| 99 | |||
| 96 | private: | 100 | private: |
| 97 | friend class EventInterface; | 101 | friend class EventInterface; |
| 98 | friend class Service::Nvnflinger::Nvnflinger; | 102 | friend class Service::Nvnflinger::Nvnflinger; |
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index c8a880e84..6e4825313 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp | |||
| @@ -3,8 +3,10 @@ | |||
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later | 3 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | 5 | #include "common/logging/log.h" |
| 6 | #include "common/scope_exit.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/hle/kernel/k_event.h" | 8 | #include "core/hle/kernel/k_event.h" |
| 9 | #include "core/hle/kernel/k_process.h" | ||
| 8 | #include "core/hle/kernel/k_readable_event.h" | 10 | #include "core/hle/kernel/k_readable_event.h" |
| 9 | #include "core/hle/service/ipc_helpers.h" | 11 | #include "core/hle/service/ipc_helpers.h" |
| 10 | #include "core/hle/service/nvdrv/nvdata.h" | 12 | #include "core/hle/service/nvdrv/nvdata.h" |
| @@ -37,7 +39,7 @@ void NVDRV::Open(HLERequestContext& ctx) { | |||
| 37 | return; | 39 | return; |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | DeviceFD fd = nvdrv->Open(device_name); | 42 | DeviceFD fd = nvdrv->Open(device_name, session_id); |
| 41 | 43 | ||
| 42 | rb.Push<DeviceFD>(fd); | 44 | rb.Push<DeviceFD>(fd); |
| 43 | rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed); | 45 | rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed); |
| @@ -150,12 +152,29 @@ void NVDRV::Close(HLERequestContext& ctx) { | |||
| 150 | 152 | ||
| 151 | void NVDRV::Initialize(HLERequestContext& ctx) { | 153 | void NVDRV::Initialize(HLERequestContext& ctx) { |
| 152 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); | 154 | LOG_WARNING(Service_NVDRV, "(STUBBED) called"); |
| 155 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 156 | SCOPE_EXIT({ | ||
| 157 | rb.Push(ResultSuccess); | ||
| 158 | rb.PushEnum(NvResult::Success); | ||
| 159 | }); | ||
| 153 | 160 | ||
| 154 | is_initialized = true; | 161 | if (is_initialized) { |
| 162 | // No need to initialize again | ||
| 163 | return; | ||
| 164 | } | ||
| 155 | 165 | ||
| 156 | IPC::ResponseBuilder rb{ctx, 3}; | 166 | IPC::RequestParser rp{ctx}; |
| 157 | rb.Push(ResultSuccess); | 167 | const auto process_handle{ctx.GetCopyHandle(0)}; |
| 158 | rb.PushEnum(NvResult::Success); | 168 | // The transfer memory is lent to nvdrv as a work buffer since nvdrv is |
| 169 | // unable to allocate as much memory on its own. For HLE it's unnecessary to handle it | ||
| 170 | [[maybe_unused]] const auto transfer_memory_handle{ctx.GetCopyHandle(1)}; | ||
| 171 | [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); | ||
| 172 | |||
| 173 | auto& container = nvdrv->GetContainer(); | ||
| 174 | auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle); | ||
| 175 | session_id = container.OpenSession(process.GetPointerUnsafe()); | ||
| 176 | |||
| 177 | is_initialized = true; | ||
| 159 | } | 178 | } |
| 160 | 179 | ||
| 161 | void NVDRV::QueryEvent(HLERequestContext& ctx) { | 180 | void NVDRV::QueryEvent(HLERequestContext& ctx) { |
| @@ -242,6 +261,9 @@ NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char* | |||
| 242 | RegisterHandlers(functions); | 261 | RegisterHandlers(functions); |
| 243 | } | 262 | } |
| 244 | 263 | ||
| 245 | NVDRV::~NVDRV() = default; | 264 | NVDRV::~NVDRV() { |
| 265 | auto& container = nvdrv->GetContainer(); | ||
| 266 | container.CloseSession(session_id); | ||
| 267 | } | ||
| 246 | 268 | ||
| 247 | } // namespace Service::Nvidia | 269 | } // namespace Service::Nvidia |
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 6e98115dc..f2195ae1e 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h | |||
| @@ -35,6 +35,7 @@ private: | |||
| 35 | 35 | ||
| 36 | u64 pid{}; | 36 | u64 pid{}; |
| 37 | bool is_initialized{}; | 37 | bool is_initialized{}; |
| 38 | NvCore::SessionId session_id{}; | ||
| 38 | Common::ScratchBuffer<u8> output_buffer; | 39 | Common::ScratchBuffer<u8> output_buffer; |
| 39 | Common::ScratchBuffer<u8> inline_output_buffer; | 40 | Common::ScratchBuffer<u8> inline_output_buffer; |
| 40 | }; | 41 | }; |
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp index 2fef6cc1a..86e272b41 100644 --- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp | |||
| @@ -87,19 +87,20 @@ Result CreateNvMapHandle(u32* out_nv_map_handle, Nvidia::Devices::nvmap& nvmap, | |||
| 87 | R_SUCCEED(); | 87 | R_SUCCEED(); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle) { | 90 | Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Nvidia::DeviceFD nvmap_fd) { |
| 91 | // Free the handle. | 91 | // Free the handle. |
| 92 | Nvidia::Devices::nvmap::IocFreeParams free_params{ | 92 | Nvidia::Devices::nvmap::IocFreeParams free_params{ |
| 93 | .handle = handle, | 93 | .handle = handle, |
| 94 | }; | 94 | }; |
| 95 | R_UNLESS(nvmap.IocFree(free_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); | 95 | R_UNLESS(nvmap.IocFree(free_params, nvmap_fd) == Nvidia::NvResult::Success, |
| 96 | VI::ResultOperationFailed); | ||
| 96 | 97 | ||
| 97 | // We succeeded. | 98 | // We succeeded. |
| 98 | R_SUCCEED(); | 99 | R_SUCCEED(); |
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, | 102 | Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, |
| 102 | u32 size) { | 103 | u32 size, Nvidia::DeviceFD nvmap_fd) { |
| 103 | // Assign the allocated memory to the handle. | 104 | // Assign the allocated memory to the handle. |
| 104 | Nvidia::Devices::nvmap::IocAllocParams alloc_params{ | 105 | Nvidia::Devices::nvmap::IocAllocParams alloc_params{ |
| 105 | .handle = handle, | 106 | .handle = handle, |
| @@ -109,16 +110,16 @@ Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::Proce | |||
| 109 | .kind = 0, | 110 | .kind = 0, |
| 110 | .address = GetInteger(buffer), | 111 | .address = GetInteger(buffer), |
| 111 | }; | 112 | }; |
| 112 | R_UNLESS(nvmap.IocAlloc(alloc_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); | 113 | R_UNLESS(nvmap.IocAlloc(alloc_params, nvmap_fd) == Nvidia::NvResult::Success, |
| 114 | VI::ResultOperationFailed); | ||
| 113 | 115 | ||
| 114 | // We succeeded. | 116 | // We succeeded. |
| 115 | R_SUCCEED(); | 117 | R_SUCCEED(); |
| 116 | } | 118 | } |
| 117 | 119 | ||
| 118 | Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, | 120 | Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd, |
| 119 | Common::ProcessAddress buffer, u32 size) { | 121 | Common::ProcessAddress buffer, u32 size) { |
| 120 | // Get the nvmap device. | 122 | // Get the nvmap device. |
| 121 | auto nvmap_fd = nvdrv.Open("/dev/nvmap"); | ||
| 122 | auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd); | 123 | auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd); |
| 123 | ASSERT(nvmap != nullptr); | 124 | ASSERT(nvmap != nullptr); |
| 124 | 125 | ||
| @@ -127,11 +128,11 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, | |||
| 127 | 128 | ||
| 128 | // Ensure we maintain a clean state on failure. | 129 | // Ensure we maintain a clean state on failure. |
| 129 | ON_RESULT_FAILURE { | 130 | ON_RESULT_FAILURE { |
| 130 | ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle))); | 131 | ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle, nvmap_fd))); |
| 131 | }; | 132 | }; |
| 132 | 133 | ||
| 133 | // Assign the allocated memory to the handle. | 134 | // Assign the allocated memory to the handle. |
| 134 | R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size)); | 135 | R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd)); |
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; | 138 | constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; |
| @@ -197,9 +198,13 @@ Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u | |||
| 197 | std::addressof(m_buffer_page_group), m_system, | 198 | std::addressof(m_buffer_page_group), m_system, |
| 198 | SharedBufferSize)); | 199 | SharedBufferSize)); |
| 199 | 200 | ||
| 201 | auto& container = m_nvdrv->GetContainer(); | ||
| 202 | m_session_id = container.OpenSession(m_system.ApplicationProcess()); | ||
| 203 | m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id); | ||
| 204 | |||
| 200 | // Create an nvmap handle for the buffer and assign the memory to it. | 205 | // Create an nvmap handle for the buffer and assign the memory to it. |
| 201 | R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, map_address, | 206 | R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd, |
| 202 | SharedBufferSize)); | 207 | map_address, SharedBufferSize)); |
| 203 | 208 | ||
| 204 | // Record the display id. | 209 | // Record the display id. |
| 205 | m_display_id = display_id; | 210 | m_display_id = display_id; |
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h index c809c01b4..033bf4bbe 100644 --- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h +++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "common/math_util.h" | 6 | #include "common/math_util.h" |
| 7 | #include "core/hle/service/nvdrv/core/container.h" | ||
| 8 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 7 | #include "core/hle/service/nvnflinger/nvnflinger.h" | 9 | #include "core/hle/service/nvnflinger/nvnflinger.h" |
| 8 | #include "core/hle/service/nvnflinger/ui/fence.h" | 10 | #include "core/hle/service/nvnflinger/ui/fence.h" |
| 9 | 11 | ||
| @@ -53,7 +55,8 @@ private: | |||
| 53 | u64 m_layer_id = 0; | 55 | u64 m_layer_id = 0; |
| 54 | u32 m_buffer_nvmap_handle = 0; | 56 | u32 m_buffer_nvmap_handle = 0; |
| 55 | SharedMemoryPoolLayout m_pool_layout = {}; | 57 | SharedMemoryPoolLayout m_pool_layout = {}; |
| 56 | 58 | Nvidia::DeviceFD m_nvmap_fd = {}; | |
| 59 | Nvidia::NvCore::SessionId m_session_id = {}; | ||
| 57 | std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group; | 60 | std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group; |
| 58 | 61 | ||
| 59 | std::mutex m_guard; | 62 | std::mutex m_guard; |
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index af6591370..71d6fdb0c 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp | |||
| @@ -124,7 +124,7 @@ void Nvnflinger::ShutdownLayers() { | |||
| 124 | 124 | ||
| 125 | void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { | 125 | void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { |
| 126 | nvdrv = std::move(instance); | 126 | nvdrv = std::move(instance); |
| 127 | disp_fd = nvdrv->Open("/dev/nvdisp_disp0"); | 127 | disp_fd = nvdrv->Open("/dev/nvdisp_disp0", {}); |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) { | 130 | std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) { |
diff --git a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp index ce70946ec..ede2a1193 100644 --- a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp +++ b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp | |||
| @@ -22,11 +22,13 @@ GraphicBuffer::GraphicBuffer(Service::Nvidia::NvCore::NvMap& nvmap, | |||
| 22 | : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) { | 22 | : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) { |
| 23 | if (this->BufferId() > 0) { | 23 | if (this->BufferId() > 0) { |
| 24 | m_nvmap->DuplicateHandle(this->BufferId(), true); | 24 | m_nvmap->DuplicateHandle(this->BufferId(), true); |
| 25 | m_nvmap->PinHandle(this->BufferId(), false); | ||
| 25 | } | 26 | } |
| 26 | } | 27 | } |
| 27 | 28 | ||
| 28 | GraphicBuffer::~GraphicBuffer() { | 29 | GraphicBuffer::~GraphicBuffer() { |
| 29 | if (m_nvmap != nullptr && this->BufferId() > 0) { | 30 | if (m_nvmap != nullptr && this->BufferId() > 0) { |
| 31 | m_nvmap->UnpinHandle(this->BufferId()); | ||
| 30 | m_nvmap->FreeHandle(this->BufferId(), true); | 32 | m_nvmap->FreeHandle(this->BufferId(), true); |
| 31 | } | 33 | } |
| 32 | } | 34 | } |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8176a41be..1c218566f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include "core/hle/kernel/k_process.h" | 24 | #include "core/hle/kernel/k_process.h" |
| 25 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 26 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 27 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 28 | #include "video_core/host1x/host1x.h" | ||
| 27 | #include "video_core/rasterizer_download_area.h" | 29 | #include "video_core/rasterizer_download_area.h" |
| 28 | 30 | ||
| 29 | namespace Core::Memory { | 31 | namespace Core::Memory { |
| @@ -637,17 +639,6 @@ struct Memory::Impl { | |||
| 637 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), | 639 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), |
| 638 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | 640 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); |
| 639 | 641 | ||
| 640 | // During boot, current_page_table might not be set yet, in which case we need not flush | ||
| 641 | if (system.IsPoweredOn()) { | ||
| 642 | auto& gpu = system.GPU(); | ||
| 643 | for (u64 i = 0; i < size; i++) { | ||
| 644 | const auto page = base + i; | ||
| 645 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | ||
| 646 | gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | const auto end = base + size; | 642 | const auto end = base + size; |
| 652 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | 643 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| 653 | base + page_table.pointers.size()); | 644 | base + page_table.pointers.size()); |
| @@ -811,21 +802,33 @@ struct Memory::Impl { | |||
| 811 | return true; | 802 | return true; |
| 812 | } | 803 | } |
| 813 | 804 | ||
| 814 | void HandleRasterizerDownload(VAddr address, size_t size) { | 805 | void HandleRasterizerDownload(VAddr v_address, size_t size) { |
| 806 | const auto* p = GetPointerImpl( | ||
| 807 | v_address, []() {}, []() {}); | ||
| 808 | if (!gpu_device_memory) [[unlikely]] { | ||
| 809 | gpu_device_memory = &system.Host1x().MemoryManager(); | ||
| 810 | } | ||
| 815 | const size_t core = system.GetCurrentHostThreadID(); | 811 | const size_t core = system.GetCurrentHostThreadID(); |
| 816 | auto& current_area = rasterizer_read_areas[core]; | 812 | auto& current_area = rasterizer_read_areas[core]; |
| 817 | const VAddr end_address = address + size; | 813 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 818 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 814 | const DAddr end_address = address + size; |
| 819 | [[likely]] { | 815 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 820 | return; | 816 | [[likely]] { |
| 821 | } | 817 | return; |
| 822 | current_area = system.GPU().OnCPURead(address, size); | 818 | } |
| 819 | current_area = system.GPU().OnCPURead(address, size); | ||
| 820 | }); | ||
| 823 | } | 821 | } |
| 824 | 822 | ||
| 825 | void HandleRasterizerWrite(VAddr address, size_t size) { | 823 | void HandleRasterizerWrite(VAddr v_address, size_t size) { |
| 824 | const auto* p = GetPointerImpl( | ||
| 825 | v_address, []() {}, []() {}); | ||
| 826 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | 826 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 827 | const size_t core = std::min(system.GetCurrentHostThreadID(), | 827 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 828 | sys_core); // any other calls threads go to syscore. | 828 | sys_core); // any other calls threads go to syscore. |
| 829 | if (!gpu_device_memory) [[unlikely]] { | ||
| 830 | gpu_device_memory = &system.Host1x().MemoryManager(); | ||
| 831 | } | ||
| 829 | // Guard on sys_core; | 832 | // Guard on sys_core; |
| 830 | if (core == sys_core) [[unlikely]] { | 833 | if (core == sys_core) [[unlikely]] { |
| 831 | sys_core_guard.lock(); | 834 | sys_core_guard.lock(); |
| @@ -835,36 +838,53 @@ struct Memory::Impl { | |||
| 835 | sys_core_guard.unlock(); | 838 | sys_core_guard.unlock(); |
| 836 | } | 839 | } |
| 837 | }); | 840 | }); |
| 838 | auto& current_area = rasterizer_write_areas[core]; | 841 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 839 | VAddr subaddress = address >> YUZU_PAGEBITS; | 842 | auto& current_area = rasterizer_write_areas[core]; |
| 840 | bool do_collection = current_area.last_address == subaddress; | 843 | PAddr subaddress = address >> YUZU_PAGEBITS; |
| 841 | if (!do_collection) [[unlikely]] { | 844 | bool do_collection = current_area.last_address == subaddress; |
| 842 | do_collection = system.GPU().OnCPUWrite(address, size); | 845 | if (!do_collection) [[unlikely]] { |
| 843 | if (!do_collection) { | 846 | do_collection = system.GPU().OnCPUWrite(address, size); |
| 844 | return; | 847 | if (!do_collection) { |
| 848 | return; | ||
| 849 | } | ||
| 850 | current_area.last_address = subaddress; | ||
| 845 | } | 851 | } |
| 846 | current_area.last_address = subaddress; | 852 | gpu_dirty_managers[core].Collect(address, size); |
| 847 | } | 853 | }); |
| 848 | gpu_dirty_managers[core].Collect(address, size); | ||
| 849 | } | 854 | } |
| 850 | 855 | ||
| 851 | struct GPUDirtyState { | 856 | struct GPUDirtyState { |
| 852 | VAddr last_address; | 857 | PAddr last_address; |
| 853 | }; | 858 | }; |
| 854 | 859 | ||
| 855 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | 860 | void InvalidateGPUMemory(u8* p, size_t size) { |
| 856 | system.GPU().InvalidateRegion(GetInteger(dest_addr), size); | 861 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 857 | } | 862 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 858 | 863 | sys_core); // any other calls threads go to syscore. | |
| 859 | void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | 864 | if (!gpu_device_memory) [[unlikely]] { |
| 860 | system.GPU().FlushRegion(GetInteger(dest_addr), size); | 865 | gpu_device_memory = &system.Host1x().MemoryManager(); |
| 866 | } | ||
| 867 | // Guard on sys_core; | ||
| 868 | if (core == sys_core) [[unlikely]] { | ||
| 869 | sys_core_guard.lock(); | ||
| 870 | } | ||
| 871 | SCOPE_EXIT({ | ||
| 872 | if (core == sys_core) [[unlikely]] { | ||
| 873 | sys_core_guard.unlock(); | ||
| 874 | } | ||
| 875 | }); | ||
| 876 | auto& gpu = system.GPU(); | ||
| 877 | gpu_device_memory->ApplyOpOnPointer( | ||
| 878 | p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); }); | ||
| 861 | } | 879 | } |
| 862 | 880 | ||
| 863 | Core::System& system; | 881 | Core::System& system; |
| 882 | Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; | ||
| 864 | Common::PageTable* current_page_table = nullptr; | 883 | Common::PageTable* current_page_table = nullptr; |
| 865 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | 884 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> |
| 866 | rasterizer_read_areas{}; | 885 | rasterizer_read_areas{}; |
| 867 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | 886 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; |
| 887 | std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; | ||
| 868 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | 888 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; |
| 869 | std::mutex sys_core_guard; | 889 | std::mutex sys_core_guard; |
| 870 | 890 | ||
| @@ -1059,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) | |||
| 1059 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); | 1079 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); |
| 1060 | } | 1080 | } |
| 1061 | 1081 | ||
| 1062 | void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1063 | impl->InvalidateRegion(dest_addr, size); | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1067 | impl->FlushRegion(dest_addr, size); | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | 1082 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { |
| 1071 | [[maybe_unused]] bool mapped = true; | 1083 | [[maybe_unused]] bool mapped = true; |
| 1072 | [[maybe_unused]] bool rasterizer = false; | 1084 | [[maybe_unused]] bool rasterizer = false; |
| @@ -1078,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | |||
| 1078 | GetInteger(vaddr)); | 1090 | GetInteger(vaddr)); |
| 1079 | mapped = false; | 1091 | mapped = false; |
| 1080 | }, | 1092 | }, |
| 1081 | [&] { | 1093 | [&] { rasterizer = true; }); |
| 1082 | impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); | 1094 | if (rasterizer) { |
| 1083 | rasterizer = true; | 1095 | impl->InvalidateGPUMemory(ptr, size); |
| 1084 | }); | 1096 | } |
| 1085 | 1097 | ||
| 1086 | #ifdef __linux__ | 1098 | #ifdef __linux__ |
| 1087 | if (!rasterizer && mapped) { | 1099 | if (!rasterizer && mapped) { |
diff --git a/src/core/memory.h b/src/core/memory.h index dddfaf4a4..f7e6b297f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include "common/scratch_buffer.h" | 13 | #include "common/scratch_buffer.h" |
| 14 | #include "common/typed_address.h" | 14 | #include "common/typed_address.h" |
| 15 | #include "core/guest_memory.h" | ||
| 15 | #include "core/hle/result.h" | 16 | #include "core/hle/result.h" |
| 16 | 17 | ||
| 17 | namespace Common { | 18 | namespace Common { |
| @@ -486,10 +487,10 @@ public: | |||
| 486 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | 487 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |
| 487 | 488 | ||
| 488 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | 489 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); |
| 489 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); | 490 | |
| 490 | bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); | 491 | bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); |
| 492 | |||
| 491 | bool InvalidateSeparateHeap(void* fault_address); | 493 | bool InvalidateSeparateHeap(void* fault_address); |
| 492 | void FlushRegion(Common::ProcessAddress dest_addr, size_t size); | ||
| 493 | 494 | ||
| 494 | private: | 495 | private: |
| 495 | Core::System& system; | 496 | Core::System& system; |
| @@ -498,209 +499,9 @@ private: | |||
| 498 | std::unique_ptr<Impl> impl; | 499 | std::unique_ptr<Impl> impl; |
| 499 | }; | 500 | }; |
| 500 | 501 | ||
| 501 | enum GuestMemoryFlags : u32 { | ||
| 502 | Read = 1 << 0, | ||
| 503 | Write = 1 << 1, | ||
| 504 | Safe = 1 << 2, | ||
| 505 | Cached = 1 << 3, | ||
| 506 | |||
| 507 | SafeRead = Read | Safe, | ||
| 508 | SafeWrite = Write | Safe, | ||
| 509 | SafeReadWrite = SafeRead | SafeWrite, | ||
| 510 | SafeReadCachedWrite = SafeReadWrite | Cached, | ||
| 511 | |||
| 512 | UnsafeRead = Read, | ||
| 513 | UnsafeWrite = Write, | ||
| 514 | UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||
| 515 | UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||
| 516 | }; | ||
| 517 | |||
| 518 | namespace { | ||
| 519 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 520 | class GuestMemory { | ||
| 521 | using iterator = T*; | ||
| 522 | using const_iterator = const T*; | ||
| 523 | using value_type = T; | ||
| 524 | using element_type = T; | ||
| 525 | using iterator_category = std::contiguous_iterator_tag; | ||
| 526 | |||
| 527 | public: | ||
| 528 | GuestMemory() = delete; | ||
| 529 | explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||
| 530 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 531 | : m_memory{memory}, m_addr{addr}, m_size{size} { | ||
| 532 | static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||
| 533 | if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||
| 534 | Read(addr, size, backup); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | ~GuestMemory() = default; | ||
| 539 | |||
| 540 | T* data() noexcept { | ||
| 541 | return m_data_span.data(); | ||
| 542 | } | ||
| 543 | |||
| 544 | const T* data() const noexcept { | ||
| 545 | return m_data_span.data(); | ||
| 546 | } | ||
| 547 | |||
| 548 | size_t size() const noexcept { | ||
| 549 | return m_size; | ||
| 550 | } | ||
| 551 | |||
| 552 | size_t size_bytes() const noexcept { | ||
| 553 | return this->size() * sizeof(T); | ||
| 554 | } | ||
| 555 | |||
| 556 | [[nodiscard]] T* begin() noexcept { | ||
| 557 | return this->data(); | ||
| 558 | } | ||
| 559 | |||
| 560 | [[nodiscard]] const T* begin() const noexcept { | ||
| 561 | return this->data(); | ||
| 562 | } | ||
| 563 | |||
| 564 | [[nodiscard]] T* end() noexcept { | ||
| 565 | return this->data() + this->size(); | ||
| 566 | } | ||
| 567 | |||
| 568 | [[nodiscard]] const T* end() const noexcept { | ||
| 569 | return this->data() + this->size(); | ||
| 570 | } | ||
| 571 | |||
| 572 | T& operator[](size_t index) noexcept { | ||
| 573 | return m_data_span[index]; | ||
| 574 | } | ||
| 575 | |||
| 576 | const T& operator[](size_t index) const noexcept { | ||
| 577 | return m_data_span[index]; | ||
| 578 | } | ||
| 579 | |||
| 580 | void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||
| 581 | m_addr = addr; | ||
| 582 | m_size = size; | ||
| 583 | m_addr_changed = true; | ||
| 584 | } | ||
| 585 | |||
| 586 | std::span<T> Read(u64 addr, std::size_t size, | ||
| 587 | Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||
| 588 | m_addr = addr; | ||
| 589 | m_size = size; | ||
| 590 | if (m_size == 0) { | ||
| 591 | m_is_data_copy = true; | ||
| 592 | return {}; | ||
| 593 | } | ||
| 594 | |||
| 595 | if (this->TrySetSpan()) { | ||
| 596 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 597 | m_memory.FlushRegion(m_addr, this->size_bytes()); | ||
| 598 | } | ||
| 599 | } else { | ||
| 600 | if (backup) { | ||
| 601 | backup->resize_destructive(this->size()); | ||
| 602 | m_data_span = *backup; | ||
| 603 | } else { | ||
| 604 | m_data_copy.resize(this->size()); | ||
| 605 | m_data_span = std::span(m_data_copy); | ||
| 606 | } | ||
| 607 | m_is_data_copy = true; | ||
| 608 | m_span_valid = true; | ||
| 609 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 610 | m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||
| 611 | } else { | ||
| 612 | m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||
| 613 | } | ||
| 614 | } | ||
| 615 | return m_data_span; | ||
| 616 | } | ||
| 617 | |||
| 618 | void Write(std::span<T> write_data) noexcept { | ||
| 619 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 620 | m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||
| 621 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 622 | m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||
| 623 | } else { | ||
| 624 | m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||
| 625 | } | ||
| 626 | } | ||
| 627 | |||
| 628 | bool TrySetSpan() noexcept { | ||
| 629 | if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||
| 630 | m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||
| 631 | m_span_valid = true; | ||
| 632 | return true; | ||
| 633 | } | ||
| 634 | return false; | ||
| 635 | } | ||
| 636 | |||
| 637 | protected: | ||
| 638 | bool IsDataCopy() const noexcept { | ||
| 639 | return m_is_data_copy; | ||
| 640 | } | ||
| 641 | |||
| 642 | bool AddressChanged() const noexcept { | ||
| 643 | return m_addr_changed; | ||
| 644 | } | ||
| 645 | |||
| 646 | M& m_memory; | ||
| 647 | u64 m_addr{}; | ||
| 648 | size_t m_size{}; | ||
| 649 | std::span<T> m_data_span{}; | ||
| 650 | std::vector<T> m_data_copy{}; | ||
| 651 | bool m_span_valid{false}; | ||
| 652 | bool m_is_data_copy{false}; | ||
| 653 | bool m_addr_changed{false}; | ||
| 654 | }; | ||
| 655 | |||
| 656 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 657 | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||
| 658 | public: | ||
| 659 | GuestMemoryScoped() = delete; | ||
| 660 | explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||
| 661 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 662 | : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||
| 663 | if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||
| 664 | if (!this->TrySetSpan()) { | ||
| 665 | if (backup) { | ||
| 666 | this->m_data_span = *backup; | ||
| 667 | this->m_span_valid = true; | ||
| 668 | this->m_is_data_copy = true; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 674 | ~GuestMemoryScoped() { | ||
| 675 | if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||
| 676 | if (this->size() == 0) [[unlikely]] { | ||
| 677 | return; | ||
| 678 | } | ||
| 679 | |||
| 680 | if (this->AddressChanged() || this->IsDataCopy()) { | ||
| 681 | ASSERT(this->m_span_valid); | ||
| 682 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 683 | this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||
| 684 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 685 | this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||
| 686 | } else { | ||
| 687 | this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||
| 688 | } | ||
| 689 | } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || | ||
| 690 | (FLAGS & GuestMemoryFlags::Cached)) { | ||
| 691 | this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||
| 692 | } | ||
| 693 | } | ||
| 694 | } | ||
| 695 | }; | ||
| 696 | } // namespace | ||
| 697 | |||
| 698 | template <typename T, GuestMemoryFlags FLAGS> | 502 | template <typename T, GuestMemoryFlags FLAGS> |
| 699 | using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; | 503 | using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>; |
| 700 | template <typename T, GuestMemoryFlags FLAGS> | 504 | template <typename T, GuestMemoryFlags FLAGS> |
| 701 | using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; | 505 | using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>; |
| 702 | template <typename T, GuestMemoryFlags FLAGS> | 506 | |
| 703 | using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||
| 704 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 705 | using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||
| 706 | } // namespace Core::Memory | 507 | } // namespace Core::Memory |
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 618793668..0e559a590 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -24,9 +24,8 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE; | |||
| 24 | class RasterizerInterface { | 24 | class RasterizerInterface { |
| 25 | public: | 25 | public: |
| 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | 27 | const u64 page_start{addr >> Core::DEVICE_PAGEBITS}; |
| 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | 28 | const u64 page_end{(addr + size + Core::DEVICE_PAGESIZE - 1) >> Core::DEVICE_PAGEBITS}; |
| 29 | Core::Memory::YUZU_PAGEBITS}; | ||
| 30 | for (u64 page = page_start; page < page_end; ++page) { | 29 | for (u64 page = page_start; page < page_end; ++page) { |
| 31 | int& value = page_table[page]; | 30 | int& value = page_table[page]; |
| 32 | value += delta; | 31 | value += delta; |
| @@ -40,7 +39,7 @@ public: | |||
| 40 | } | 39 | } |
| 41 | 40 | ||
| 42 | [[nodiscard]] int Count(VAddr addr) const noexcept { | 41 | [[nodiscard]] int Count(VAddr addr) const noexcept { |
| 43 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | 42 | const auto it = page_table.find(addr >> Core::DEVICE_PAGEBITS); |
| 44 | return it == page_table.end() ? 0 : it->second; | 43 | return it == page_table.end() ? 0 : it->second; |
| 45 | } | 44 | } |
| 46 | 45 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c22c7631c..5ed0ad0ed 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -71,6 +71,8 @@ add_library(video_core STATIC | |||
| 71 | host1x/ffmpeg/ffmpeg.h | 71 | host1x/ffmpeg/ffmpeg.h |
| 72 | host1x/control.cpp | 72 | host1x/control.cpp |
| 73 | host1x/control.h | 73 | host1x/control.h |
| 74 | host1x/gpu_device_memory_manager.cpp | ||
| 75 | host1x/gpu_device_memory_manager.h | ||
| 74 | host1x/host1x.cpp | 76 | host1x/host1x.cpp |
| 75 | host1x/host1x.h | 77 | host1x/host1x.h |
| 76 | host1x/nvdec.cpp | 78 | host1x/nvdec.cpp |
| @@ -93,6 +95,7 @@ add_library(video_core STATIC | |||
| 93 | gpu.h | 95 | gpu.h |
| 94 | gpu_thread.cpp | 96 | gpu_thread.cpp |
| 95 | gpu_thread.h | 97 | gpu_thread.h |
| 98 | guest_memory.h | ||
| 96 | invalidation_accumulator.h | 99 | invalidation_accumulator.h |
| 97 | memory_manager.cpp | 100 | memory_manager.cpp |
| 98 | memory_manager.h | 101 | memory_manager.h |
| @@ -105,8 +108,6 @@ add_library(video_core STATIC | |||
| 105 | query_cache/query_stream.h | 108 | query_cache/query_stream.h |
| 106 | query_cache/types.h | 109 | query_cache/types.h |
| 107 | query_cache.h | 110 | query_cache.h |
| 108 | rasterizer_accelerated.cpp | ||
| 109 | rasterizer_accelerated.h | ||
| 110 | rasterizer_interface.h | 111 | rasterizer_interface.h |
| 111 | renderer_base.cpp | 112 | renderer_base.cpp |
| 112 | renderer_base.h | 113 | renderer_base.h |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 0bb3bf8ae..40e98e395 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -33,13 +33,12 @@ struct NullBufferParams {}; | |||
| 33 | * | 33 | * |
| 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. | 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. |
| 35 | */ | 35 | */ |
| 36 | template <class RasterizerInterface> | ||
| 37 | class BufferBase { | 36 | class BufferBase { |
| 38 | public: | 37 | public: |
| 39 | static constexpr u64 BASE_PAGE_BITS = 16; | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 40 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 41 | 40 | ||
| 42 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | 41 | explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) |
| 43 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} |
| 44 | 43 | ||
| 45 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6d1fc3887..b4bf369d1 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -8,16 +8,16 @@ | |||
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | 9 | ||
| 10 | #include "video_core/buffer_cache/buffer_cache_base.h" | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 12 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 11 | 13 | ||
| 12 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 13 | 15 | ||
| 14 | using Core::Memory::YUZU_PAGESIZE; | 16 | using Core::DEVICE_PAGESIZE; |
| 15 | 17 | ||
| 16 | template <class P> | 18 | template <class P> |
| 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 19 | BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) |
| 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 20 | : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { |
| 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ | ||
| 20 | rasterizer} { | ||
| 21 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 23 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| @@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 29 | return; | 29 | return; |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | 32 | const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 33 | const s64 min_spacing_expected = device_memory - 1_GiB; | 33 | const s64 min_spacing_expected = device_local_memory - 1_GiB; |
| 34 | const s64 min_spacing_critical = device_memory - 512_MiB; | 34 | const s64 min_spacing_critical = device_local_memory - 512_MiB; |
| 35 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | 35 | const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); |
| 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 38 | minimum_memory = static_cast<u64>( | 38 | minimum_memory = static_cast<u64>( |
| 39 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | 39 | std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), |
| 40 | DEFAULT_EXPECTED_MEMORY)); | 40 | DEFAULT_EXPECTED_MEMORY)); |
| 41 | critical_memory = static_cast<u64>( | 41 | critical_memory = static_cast<u64>( |
| 42 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | 42 | std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), |
| 43 | DEFAULT_CRITICAL_MEMORY)); | 43 | DEFAULT_CRITICAL_MEMORY)); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| @@ -105,71 +105,71 @@ void BufferCache<P>::TickFrame() { | |||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | template <class P> | 107 | template <class P> |
| 108 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 108 | void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { |
| 109 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 109 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 110 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; | 110 | const IntervalType subtract_interval{device_addr, device_addr + size}; |
| 111 | ClearDownload(subtract_interval); | 111 | ClearDownload(subtract_interval); |
| 112 | common_ranges.subtract(subtract_interval); | 112 | common_ranges.subtract(subtract_interval); |
| 113 | } | 113 | } |
| 114 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); | 114 | memory_tracker.MarkRegionAsCpuModified(device_addr, size); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | template <class P> | 117 | template <class P> |
| 118 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 118 | void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) { |
| 119 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 119 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 120 | if (!is_dirty) { | 120 | if (!is_dirty) { |
| 121 | return; | 121 | return; |
| 122 | } | 122 | } |
| 123 | VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); | 123 | DAddr aligned_start = Common::AlignDown(device_addr, DEVICE_PAGESIZE); |
| 124 | VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); | 124 | DAddr aligned_end = Common::AlignUp(device_addr + size, DEVICE_PAGESIZE); |
| 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 126 | WriteMemory(cpu_addr, size); | 126 | WriteMemory(device_addr, size); |
| 127 | return; | 127 | return; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | tmp_buffer.resize_destructive(size); | 130 | tmp_buffer.resize_destructive(size); |
| 131 | cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); | 131 | device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size); |
| 132 | 132 | ||
| 133 | InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | 133 | InlineMemoryImplementation(device_addr, size, tmp_buffer); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | template <class P> | 136 | template <class P> |
| 137 | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | 137 | bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) { |
| 138 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 138 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 139 | if (!is_dirty) { | 139 | if (!is_dirty) { |
| 140 | return false; | 140 | return false; |
| 141 | } | 141 | } |
| 142 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 142 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 143 | return true; | 143 | return true; |
| 144 | } | 144 | } |
| 145 | WriteMemory(cpu_addr, size); | 145 | WriteMemory(device_addr, size); |
| 146 | return false; | 146 | return false; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | template <class P> | 149 | template <class P> |
| 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr, |
| 151 | u64 size) { | 151 | u64 size) { |
| 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| 153 | area.emplace(); | 153 | area.emplace(); |
| 154 | VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); | 154 | DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::DEVICE_PAGESIZE); |
| 155 | VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 155 | DAddr device_addr_end_aligned = Common::AlignUp(device_addr + size, Core::DEVICE_PAGESIZE); |
| 156 | area->start_address = cpu_addr_start_aligned; | 156 | area->start_address = device_addr_start_aligned; |
| 157 | area->end_address = cpu_addr_end_aligned; | 157 | area->end_address = device_addr_end_aligned; |
| 158 | if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { | 158 | if (memory_tracker.IsRegionPreflushable(device_addr, size)) { |
| 159 | area->preemtive = true; | 159 | area->preemtive = true; |
| 160 | return area; | 160 | return area; |
| 161 | }; | 161 | }; |
| 162 | area->preemtive = | 162 | area->preemtive = !IsRegionGpuModified(device_addr_start_aligned, |
| 163 | !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); | 163 | device_addr_end_aligned - device_addr_start_aligned); |
| 164 | memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, | 164 | memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, |
| 165 | cpu_addr_end_aligned - cpu_addr_start_aligned); | 165 | device_addr_end_aligned - device_addr_start_aligned); |
| 166 | return area; | 166 | return area; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | template <class P> | 169 | template <class P> |
| 170 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 170 | void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) { |
| 171 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 171 | ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) { |
| 172 | DownloadBufferMemory(buffer, cpu_addr, size); | 172 | DownloadBufferMemory(buffer, device_addr, size); |
| 173 | }); | 173 | }); |
| 174 | } | 174 | } |
| 175 | 175 | ||
| @@ -184,8 +184,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | |||
| 184 | 184 | ||
| 185 | template <class P> | 185 | template <class P> |
| 186 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 186 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 187 | const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); | 187 | const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); |
| 188 | const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); | 188 | const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); |
| 189 | if (!cpu_src_address || !cpu_dest_address) { | 189 | if (!cpu_src_address || !cpu_dest_address) { |
| 190 | return false; | 190 | return false; |
| 191 | } | 191 | } |
| @@ -216,10 +216,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 216 | }}; | 216 | }}; |
| 217 | 217 | ||
| 218 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 218 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 219 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 219 | auto mirror = [&](DAddr base_address, DAddr base_address_end) { |
| 220 | const u64 size = base_address_end - base_address; | 220 | const u64 size = base_address_end - base_address; |
| 221 | const VAddr diff = base_address - *cpu_src_address; | 221 | const DAddr diff = base_address - *cpu_src_address; |
| 222 | const VAddr new_base_address = *cpu_dest_address + diff; | 222 | const DAddr new_base_address = *cpu_dest_address + diff; |
| 223 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 223 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 224 | tmp_intervals.push_back(add_interval); | 224 | tmp_intervals.push_back(add_interval); |
| 225 | uncommitted_ranges.add(add_interval); | 225 | uncommitted_ranges.add(add_interval); |
| @@ -239,15 +239,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( | 242 | Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> |
| 243 | cpu_memory, *cpu_src_address, amount, &tmp_buffer); | 243 | tmp(device_memory, *cpu_src_address, amount, &tmp_buffer); |
| 244 | tmp.SetAddressAndSize(*cpu_dest_address, amount); | 244 | tmp.SetAddressAndSize(*cpu_dest_address, amount); |
| 245 | return true; | 245 | return true; |
| 246 | } | 246 | } |
| 247 | 247 | ||
| 248 | template <class P> | 248 | template <class P> |
| 249 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | 249 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { |
| 250 | const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); | 250 | const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); |
| 251 | if (!cpu_dst_address) { | 251 | if (!cpu_dst_address) { |
| 252 | return false; | 252 | return false; |
| 253 | } | 253 | } |
| @@ -273,23 +273,23 @@ template <class P> | |||
| 273 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 273 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 274 | ObtainBufferSynchronize sync_info, | 274 | ObtainBufferSynchronize sync_info, |
| 275 | ObtainBufferOperation post_op) { | 275 | ObtainBufferOperation post_op) { |
| 276 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 276 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 277 | if (!cpu_addr) { | 277 | if (!device_addr) { |
| 278 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | 278 | return {&slot_buffers[NULL_BUFFER_ID], 0}; |
| 279 | } | 279 | } |
| 280 | return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); | 280 | return ObtainCPUBuffer(*device_addr, size, sync_info, post_op); |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | template <class P> | 283 | template <class P> |
| 284 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | 284 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( |
| 285 | VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { | 285 | DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { |
| 286 | const BufferId buffer_id = FindBuffer(cpu_addr, size); | 286 | const BufferId buffer_id = FindBuffer(device_addr, size); |
| 287 | Buffer& buffer = slot_buffers[buffer_id]; | 287 | Buffer& buffer = slot_buffers[buffer_id]; |
| 288 | 288 | ||
| 289 | // synchronize op | 289 | // synchronize op |
| 290 | switch (sync_info) { | 290 | switch (sync_info) { |
| 291 | case ObtainBufferSynchronize::FullSynchronize: | 291 | case ObtainBufferSynchronize::FullSynchronize: |
| 292 | SynchronizeBuffer(buffer, cpu_addr, size); | 292 | SynchronizeBuffer(buffer, device_addr, size); |
| 293 | break; | 293 | break; |
| 294 | default: | 294 | default: |
| 295 | break; | 295 | break; |
| @@ -297,12 +297,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 297 | 297 | ||
| 298 | switch (post_op) { | 298 | switch (post_op) { |
| 299 | case ObtainBufferOperation::MarkAsWritten: | 299 | case ObtainBufferOperation::MarkAsWritten: |
| 300 | MarkWrittenBuffer(buffer_id, cpu_addr, size); | 300 | MarkWrittenBuffer(buffer_id, device_addr, size); |
| 301 | break; | 301 | break; |
| 302 | case ObtainBufferOperation::DiscardWrite: { | 302 | case ObtainBufferOperation::DiscardWrite: { |
| 303 | VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); | 303 | DAddr device_addr_start = Common::AlignDown(device_addr, 64); |
| 304 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); | 304 | DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); |
| 305 | IntervalType interval{cpu_addr_start, cpu_addr_end}; | 305 | IntervalType interval{device_addr_start, device_addr_end}; |
| 306 | ClearDownload(interval); | 306 | ClearDownload(interval); |
| 307 | common_ranges.subtract(interval); | 307 | common_ranges.subtract(interval); |
| 308 | break; | 308 | break; |
| @@ -311,15 +311,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 311 | break; | 311 | break; |
| 312 | } | 312 | } |
| 313 | 313 | ||
| 314 | return {&buffer, buffer.Offset(cpu_addr)}; | 314 | return {&buffer, buffer.Offset(device_addr)}; |
| 315 | } | 315 | } |
| 316 | 316 | ||
| 317 | template <class P> | 317 | template <class P> |
| 318 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 318 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 319 | u32 size) { | 319 | u32 size) { |
| 320 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 320 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 321 | const Binding binding{ | 321 | const Binding binding{ |
| 322 | .cpu_addr = *cpu_addr, | 322 | .device_addr = *device_addr, |
| 323 | .size = size, | 323 | .size = size, |
| 324 | .buffer_id = BufferId{}, | 324 | .buffer_id = BufferId{}, |
| 325 | }; | 325 | }; |
| @@ -555,16 +555,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 555 | for (const IntervalSet& intervals : committed_ranges) { | 555 | for (const IntervalSet& intervals : committed_ranges) { |
| 556 | for (auto& interval : intervals) { | 556 | for (auto& interval : intervals) { |
| 557 | const std::size_t size = interval.upper() - interval.lower(); | 557 | const std::size_t size = interval.upper() - interval.lower(); |
| 558 | const VAddr cpu_addr = interval.lower(); | 558 | const DAddr device_addr = interval.lower(); |
| 559 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 559 | ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 560 | const VAddr buffer_start = buffer.CpuAddr(); | 560 | const DAddr buffer_start = buffer.CpuAddr(); |
| 561 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); | 561 | const DAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 562 | const VAddr new_start = std::max(buffer_start, cpu_addr); | 562 | const DAddr new_start = std::max(buffer_start, device_addr); |
| 563 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); | 563 | const DAddr new_end = std::min(buffer_end, device_addr + size); |
| 564 | memory_tracker.ForEachDownloadRange( | 564 | memory_tracker.ForEachDownloadRange( |
| 565 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | 565 | new_start, new_end - new_start, false, |
| 566 | const VAddr buffer_addr = buffer.CpuAddr(); | 566 | [&](u64 device_addr_out, u64 range_size) { |
| 567 | const auto add_download = [&](VAddr start, VAddr end) { | 567 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 568 | const auto add_download = [&](DAddr start, DAddr end) { | ||
| 568 | const u64 new_offset = start - buffer_addr; | 569 | const u64 new_offset = start - buffer_addr; |
| 569 | const u64 new_size = end - start; | 570 | const u64 new_size = end - start; |
| 570 | downloads.push_back({ | 571 | downloads.push_back({ |
| @@ -582,7 +583,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 582 | largest_copy = std::max(largest_copy, new_size); | 583 | largest_copy = std::max(largest_copy, new_size); |
| 583 | }; | 584 | }; |
| 584 | 585 | ||
| 585 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); | 586 | ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); |
| 586 | }); | 587 | }); |
| 587 | }); | 588 | }); |
| 588 | } | 589 | } |
| @@ -605,8 +606,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 605 | BufferCopy second_copy{copy}; | 606 | BufferCopy second_copy{copy}; |
| 606 | Buffer& buffer = slot_buffers[buffer_id]; | 607 | Buffer& buffer = slot_buffers[buffer_id]; |
| 607 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 608 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 609 | DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); |
| 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 610 | const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; |
| 610 | async_downloads += std::make_pair(base_interval, 1); | 611 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | 612 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 613 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| @@ -635,11 +636,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 635 | runtime.Finish(); | 636 | runtime.Finish(); |
| 636 | for (const auto& [copy, buffer_id] : downloads) { | 637 | for (const auto& [copy, buffer_id] : downloads) { |
| 637 | const Buffer& buffer = slot_buffers[buffer_id]; | 638 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 638 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 639 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 639 | // Undo the modified offset | 640 | // Undo the modified offset |
| 640 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 641 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 641 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 642 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; |
| 642 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 643 | device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size); |
| 643 | } | 644 | } |
| 644 | } else { | 645 | } else { |
| 645 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 646 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| @@ -647,8 +648,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 647 | Buffer& buffer = slot_buffers[buffer_id]; | 648 | Buffer& buffer = slot_buffers[buffer_id]; |
| 648 | buffer.ImmediateDownload(copy.src_offset, | 649 | buffer.ImmediateDownload(copy.src_offset, |
| 649 | immediate_buffer.subspan(0, copy.size)); | 650 | immediate_buffer.subspan(0, copy.size)); |
| 650 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 651 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 651 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 652 | device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 652 | } | 653 | } |
| 653 | } | 654 | } |
| 654 | } | 655 | } |
| @@ -681,19 +682,19 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 681 | u8* base = async_buffer->mapped_span.data(); | 682 | u8* base = async_buffer->mapped_span.data(); |
| 682 | const size_t base_offset = async_buffer->offset; | 683 | const size_t base_offset = async_buffer->offset; |
| 683 | for (const auto& copy : downloads) { | 684 | for (const auto& copy : downloads) { |
| 684 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | 685 | const DAddr device_addr = static_cast<DAddr>(copy.src_offset); |
| 685 | const u64 dst_offset = copy.dst_offset - base_offset; | 686 | const u64 dst_offset = copy.dst_offset - base_offset; |
| 686 | const u8* read_mapped_memory = base + dst_offset; | 687 | const u8* read_mapped_memory = base + dst_offset; |
| 687 | ForEachInOverlapCounter( | 688 | ForEachInOverlapCounter( |
| 688 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | 689 | async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { |
| 689 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | 690 | device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], |
| 690 | end - start); | 691 | end - start); |
| 691 | if (count == 1) { | 692 | if (count == 1) { |
| 692 | const IntervalType base_interval{start, end}; | 693 | const IntervalType base_interval{start, end}; |
| 693 | common_ranges.subtract(base_interval); | 694 | common_ranges.subtract(base_interval); |
| 694 | } | 695 | } |
| 695 | }); | 696 | }); |
| 696 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | 697 | const IntervalType subtract_interval{device_addr, device_addr + copy.size}; |
| 697 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | 698 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); |
| 698 | } | 699 | } |
| 699 | async_buffers_death_ring.emplace_back(*async_buffer); | 700 | async_buffers_death_ring.emplace_back(*async_buffer); |
| @@ -703,15 +704,15 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 703 | } | 704 | } |
| 704 | 705 | ||
| 705 | template <class P> | 706 | template <class P> |
| 706 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 707 | bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 707 | bool is_dirty = false; | 708 | bool is_dirty = false; |
| 708 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | 709 | ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); |
| 709 | return is_dirty; | 710 | return is_dirty; |
| 710 | } | 711 | } |
| 711 | 712 | ||
| 712 | template <class P> | 713 | template <class P> |
| 713 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 714 | bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) { |
| 714 | const VAddr end_addr = addr + size; | 715 | const DAddr end_addr = addr + size; |
| 715 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 716 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 716 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { | 717 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 717 | const BufferId buffer_id = page_table[page]; | 718 | const BufferId buffer_id = page_table[page]; |
| @@ -720,8 +721,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 720 | continue; | 721 | continue; |
| 721 | } | 722 | } |
| 722 | Buffer& buffer = slot_buffers[buffer_id]; | 723 | Buffer& buffer = slot_buffers[buffer_id]; |
| 723 | const VAddr buf_start_addr = buffer.CpuAddr(); | 724 | const DAddr buf_start_addr = buffer.CpuAddr(); |
| 724 | const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); | 725 | const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); |
| 725 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 726 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 726 | return true; | 727 | return true; |
| 727 | } | 728 | } |
| @@ -731,7 +732,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 731 | } | 732 | } |
| 732 | 733 | ||
| 733 | template <class P> | 734 | template <class P> |
| 734 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 735 | bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) { |
| 735 | return memory_tracker.IsRegionCpuModified(addr, size); | 736 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 736 | } | 737 | } |
| 737 | 738 | ||
| @@ -739,7 +740,7 @@ template <class P> | |||
| 739 | void BufferCache<P>::BindHostIndexBuffer() { | 740 | void BufferCache<P>::BindHostIndexBuffer() { |
| 740 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; | 741 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; |
| 741 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); | 742 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); |
| 742 | const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); | 743 | const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr); |
| 743 | const u32 size = channel_state->index_buffer.size; | 744 | const u32 size = channel_state->index_buffer.size; |
| 744 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 745 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 745 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { | 746 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| @@ -754,7 +755,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 755 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 755 | } | 756 | } |
| 756 | } else { | 757 | } else { |
| 757 | SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); | 758 | SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size); |
| 758 | } | 759 | } |
| 759 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 760 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 760 | const u32 new_offset = | 761 | const u32 new_offset = |
| @@ -777,7 +778,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 777 | const Binding& binding = channel_state->vertex_buffers[index]; | 778 | const Binding& binding = channel_state->vertex_buffers[index]; |
| 778 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 779 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 779 | TouchBuffer(buffer, binding.buffer_id); | 780 | TouchBuffer(buffer, binding.buffer_id); |
| 780 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 781 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 781 | if (!flags[Dirty::VertexBuffer0 + index]) { | 782 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 782 | continue; | 783 | continue; |
| 783 | } | 784 | } |
| @@ -797,7 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 797 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 798 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 798 | 799 | ||
| 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 800 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 800 | const u32 offset = buffer.Offset(binding.cpu_addr); | 801 | const u32 offset = buffer.Offset(binding.device_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | 802 | buffer.MarkUsage(offset, binding.size); |
| 802 | 803 | ||
| 803 | host_bindings.buffers.push_back(&buffer); | 804 | host_bindings.buffers.push_back(&buffer); |
| @@ -814,7 +815,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { | |||
| 814 | const auto bind_buffer = [this](const Binding& binding) { | 815 | const auto bind_buffer = [this](const Binding& binding) { |
| 815 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 816 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 816 | TouchBuffer(buffer, binding.buffer_id); | 817 | TouchBuffer(buffer, binding.buffer_id); |
| 817 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 818 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 818 | }; | 819 | }; |
| 819 | if (current_draw_indirect->include_count) { | 820 | if (current_draw_indirect->include_count) { |
| 820 | bind_buffer(channel_state->count_buffer_binding); | 821 | bind_buffer(channel_state->count_buffer_binding); |
| @@ -842,13 +843,13 @@ template <class P> | |||
| 842 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | 843 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, |
| 843 | bool needs_bind) { | 844 | bool needs_bind) { |
| 844 | const Binding& binding = channel_state->uniform_buffers[stage][index]; | 845 | const Binding& binding = channel_state->uniform_buffers[stage][index]; |
| 845 | const VAddr cpu_addr = binding.cpu_addr; | 846 | const DAddr device_addr = binding.device_addr; |
| 846 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); | 847 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); |
| 847 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 848 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 848 | TouchBuffer(buffer, binding.buffer_id); | 849 | TouchBuffer(buffer, binding.buffer_id); |
| 849 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 850 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 850 | size <= channel_state->uniform_buffer_skip_cache_size && | 851 | size <= channel_state->uniform_buffer_skip_cache_size && |
| 851 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); | 852 | !memory_tracker.IsRegionGpuModified(device_addr, size); |
| 852 | if (use_fast_buffer) { | 853 | if (use_fast_buffer) { |
| 853 | if constexpr (IS_OPENGL) { | 854 | if constexpr (IS_OPENGL) { |
| 854 | if (runtime.HasFastBufferSubData()) { | 855 | if (runtime.HasFastBufferSubData()) { |
| @@ -862,7 +863,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 862 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 863 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 863 | runtime.BindFastUniformBuffer(stage, binding_index, size); | 864 | runtime.BindFastUniformBuffer(stage, binding_index, size); |
| 864 | } | 865 | } |
| 865 | const auto span = ImmediateBufferWithData(cpu_addr, size); | 866 | const auto span = ImmediateBufferWithData(device_addr, size); |
| 866 | runtime.PushFastUniformBuffer(stage, binding_index, span); | 867 | runtime.PushFastUniformBuffer(stage, binding_index, span); |
| 867 | return; | 868 | return; |
| 868 | } | 869 | } |
| @@ -873,11 +874,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 873 | } | 874 | } |
| 874 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan | 875 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 875 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | 876 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| 876 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 877 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 877 | return; | 878 | return; |
| 878 | } | 879 | } |
| 879 | // Classic cached path | 880 | // Classic cached path |
| 880 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); | 881 | const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size); |
| 881 | if (sync_cached) { | 882 | if (sync_cached) { |
| 882 | ++channel_state->uniform_cache_hits[0]; | 883 | ++channel_state->uniform_cache_hits[0]; |
| 883 | } | 884 | } |
| @@ -892,7 +893,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 892 | if (!needs_bind) { | 893 | if (!needs_bind) { |
| 893 | return; | 894 | return; |
| 894 | } | 895 | } |
| 895 | const u32 offset = buffer.Offset(cpu_addr); | 896 | const u32 offset = buffer.Offset(device_addr); |
| 896 | if constexpr (IS_OPENGL) { | 897 | if constexpr (IS_OPENGL) { |
| 897 | // Fast buffer will be unbound | 898 | // Fast buffer will be unbound |
| 898 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | 899 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); |
| @@ -920,14 +921,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 920 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 921 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 921 | TouchBuffer(buffer, binding.buffer_id); | 922 | TouchBuffer(buffer, binding.buffer_id); |
| 922 | const u32 size = binding.size; | 923 | const u32 size = binding.size; |
| 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 924 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 924 | 925 | ||
| 925 | const u32 offset = buffer.Offset(binding.cpu_addr); | 926 | const u32 offset = buffer.Offset(binding.device_addr); |
| 926 | buffer.MarkUsage(offset, size); | 927 | buffer.MarkUsage(offset, size); |
| 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 928 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 928 | 929 | ||
| 929 | if (is_written) { | 930 | if (is_written) { |
| 930 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 931 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 931 | } | 932 | } |
| 932 | 933 | ||
| 933 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 934 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -945,14 +946,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 945 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; | 946 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; |
| 946 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 947 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 947 | const u32 size = binding.size; | 948 | const u32 size = binding.size; |
| 948 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 949 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 949 | 950 | ||
| 950 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; | 951 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; |
| 951 | if (is_written) { | 952 | if (is_written) { |
| 952 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 953 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 953 | } | 954 | } |
| 954 | 955 | ||
| 955 | const u32 offset = buffer.Offset(binding.cpu_addr); | 956 | const u32 offset = buffer.Offset(binding.device_addr); |
| 956 | const PixelFormat format = binding.format; | 957 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | 958 | buffer.MarkUsage(offset, size); |
| 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 959 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -982,11 +983,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 982 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 983 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 983 | TouchBuffer(buffer, binding.buffer_id); | 984 | TouchBuffer(buffer, binding.buffer_id); |
| 984 | const u32 size = binding.size; | 985 | const u32 size = binding.size; |
| 985 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 986 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 986 | 987 | ||
| 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 988 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 988 | 989 | ||
| 989 | const u32 offset = buffer.Offset(binding.cpu_addr); | 990 | const u32 offset = buffer.Offset(binding.device_addr); |
| 990 | buffer.MarkUsage(offset, size); | 991 | buffer.MarkUsage(offset, size); |
| 991 | host_bindings.buffers.push_back(&buffer); | 992 | host_bindings.buffers.push_back(&buffer); |
| 992 | host_bindings.offsets.push_back(offset); | 993 | host_bindings.offsets.push_back(offset); |
| @@ -1011,9 +1012,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1011 | TouchBuffer(buffer, binding.buffer_id); | 1012 | TouchBuffer(buffer, binding.buffer_id); |
| 1012 | const u32 size = | 1013 | const u32 size = |
| 1013 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); | 1014 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); |
| 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1015 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1015 | 1016 | ||
| 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1017 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1017 | buffer.MarkUsage(offset, size); | 1018 | buffer.MarkUsage(offset, size); |
| 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1019 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1020 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| @@ -1032,15 +1033,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1032 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1033 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1033 | TouchBuffer(buffer, binding.buffer_id); | 1034 | TouchBuffer(buffer, binding.buffer_id); |
| 1034 | const u32 size = binding.size; | 1035 | const u32 size = binding.size; |
| 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1036 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1036 | 1037 | ||
| 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1038 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1038 | buffer.MarkUsage(offset, size); | 1039 | buffer.MarkUsage(offset, size); |
| 1039 | const bool is_written = | 1040 | const bool is_written = |
| 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1041 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1041 | 1042 | ||
| 1042 | if (is_written) { | 1043 | if (is_written) { |
| 1043 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1044 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1044 | } | 1045 | } |
| 1045 | 1046 | ||
| 1046 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 1047 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -1058,15 +1059,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1058 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; | 1059 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; |
| 1059 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1060 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1060 | const u32 size = binding.size; | 1061 | const u32 size = binding.size; |
| 1061 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1062 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1062 | 1063 | ||
| 1063 | const bool is_written = | 1064 | const bool is_written = |
| 1064 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; | 1065 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; |
| 1065 | if (is_written) { | 1066 | if (is_written) { |
| 1066 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1067 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1067 | } | 1068 | } |
| 1068 | 1069 | ||
| 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1070 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1070 | const PixelFormat format = binding.format; | 1071 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | 1072 | buffer.MarkUsage(offset, size); |
| 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1073 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -1131,7 +1132,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1131 | inline_buffer_id = CreateBuffer(0, buffer_size); | 1132 | inline_buffer_id = CreateBuffer(0, buffer_size); |
| 1132 | } | 1133 | } |
| 1133 | channel_state->index_buffer = Binding{ | 1134 | channel_state->index_buffer = Binding{ |
| 1134 | .cpu_addr = 0, | 1135 | .device_addr = 0, |
| 1135 | .size = inline_index_size, | 1136 | .size = inline_index_size, |
| 1136 | .buffer_id = inline_buffer_id, | 1137 | .buffer_id = inline_buffer_id, |
| 1137 | }; | 1138 | }; |
| @@ -1140,19 +1141,19 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1140 | 1141 | ||
| 1141 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); | 1142 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1142 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | 1143 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); |
| 1143 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1144 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1144 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1145 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1145 | const u32 draw_size = | 1146 | const u32 draw_size = |
| 1146 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | 1147 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); |
| 1147 | const u32 size = std::min(address_size, draw_size); | 1148 | const u32 size = std::min(address_size, draw_size); |
| 1148 | if (size == 0 || !cpu_addr) { | 1149 | if (size == 0 || !device_addr) { |
| 1149 | channel_state->index_buffer = NULL_BINDING; | 1150 | channel_state->index_buffer = NULL_BINDING; |
| 1150 | return; | 1151 | return; |
| 1151 | } | 1152 | } |
| 1152 | channel_state->index_buffer = Binding{ | 1153 | channel_state->index_buffer = Binding{ |
| 1153 | .cpu_addr = *cpu_addr, | 1154 | .device_addr = *device_addr, |
| 1154 | .size = size, | 1155 | .size = size, |
| 1155 | .buffer_id = FindBuffer(*cpu_addr, size), | 1156 | .buffer_id = FindBuffer(*device_addr, size), |
| 1156 | }; | 1157 | }; |
| 1157 | } | 1158 | } |
| 1158 | 1159 | ||
| @@ -1178,19 +1179,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1178 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; | 1179 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; |
| 1179 | const GPUVAddr gpu_addr_begin = array.Address(); | 1180 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1180 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1181 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1181 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1182 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1182 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1183 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1183 | u32 size = address_size; // TODO: Analyze stride and number of vertices | 1184 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1184 | if (array.enable == 0 || size == 0 || !cpu_addr) { | 1185 | if (array.enable == 0 || size == 0 || !device_addr) { |
| 1185 | channel_state->vertex_buffers[index] = NULL_BINDING; | 1186 | channel_state->vertex_buffers[index] = NULL_BINDING; |
| 1186 | return; | 1187 | return; |
| 1187 | } | 1188 | } |
| 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1189 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1190 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1190 | } | 1191 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1192 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1192 | channel_state->vertex_buffers[index] = Binding{ | 1193 | channel_state->vertex_buffers[index] = Binding{ |
| 1193 | .cpu_addr = *cpu_addr, | 1194 | .device_addr = *device_addr, |
| 1194 | .size = size, | 1195 | .size = size, |
| 1195 | .buffer_id = buffer_id, | 1196 | .buffer_id = buffer_id, |
| 1196 | }; | 1197 | }; |
| @@ -1199,15 +1200,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1199 | template <class P> | 1200 | template <class P> |
| 1200 | void BufferCache<P>::UpdateDrawIndirect() { | 1201 | void BufferCache<P>::UpdateDrawIndirect() { |
| 1201 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { | 1202 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { |
| 1202 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1203 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1203 | if (!cpu_addr) { | 1204 | if (!device_addr) { |
| 1204 | binding = NULL_BINDING; | 1205 | binding = NULL_BINDING; |
| 1205 | return; | 1206 | return; |
| 1206 | } | 1207 | } |
| 1207 | binding = Binding{ | 1208 | binding = Binding{ |
| 1208 | .cpu_addr = *cpu_addr, | 1209 | .device_addr = *device_addr, |
| 1209 | .size = static_cast<u32>(size), | 1210 | .size = static_cast<u32>(size), |
| 1210 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), | 1211 | .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)), |
| 1211 | }; | 1212 | }; |
| 1212 | }; | 1213 | }; |
| 1213 | if (current_draw_indirect->include_count) { | 1214 | if (current_draw_indirect->include_count) { |
| @@ -1231,7 +1232,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | |||
| 1231 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; | 1232 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; |
| 1232 | } | 1233 | } |
| 1233 | // Resolve buffer | 1234 | // Resolve buffer |
| 1234 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1235 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1235 | }); | 1236 | }); |
| 1236 | } | 1237 | } |
| 1237 | 1238 | ||
| @@ -1240,7 +1241,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | |||
| 1240 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { | 1241 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { |
| 1241 | // Resolve buffer | 1242 | // Resolve buffer |
| 1242 | Binding& binding = channel_state->storage_buffers[stage][index]; | 1243 | Binding& binding = channel_state->storage_buffers[stage][index]; |
| 1243 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1244 | const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1244 | binding.buffer_id = buffer_id; | 1245 | binding.buffer_id = buffer_id; |
| 1245 | }); | 1246 | }); |
| 1246 | } | 1247 | } |
| @@ -1249,7 +1250,7 @@ template <class P> | |||
| 1249 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | 1250 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { |
| 1250 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { | 1251 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { |
| 1251 | Binding& binding = channel_state->texture_buffers[stage][index]; | 1252 | Binding& binding = channel_state->texture_buffers[stage][index]; |
| 1252 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1253 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1253 | }); | 1254 | }); |
| 1254 | } | 1255 | } |
| 1255 | 1256 | ||
| @@ -1268,14 +1269,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1268 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; | 1269 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; |
| 1269 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; | 1270 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; |
| 1270 | const u32 size = binding.size; | 1271 | const u32 size = binding.size; |
| 1271 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1272 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1272 | if (binding.enable == 0 || size == 0 || !cpu_addr) { | 1273 | if (binding.enable == 0 || size == 0 || !device_addr) { |
| 1273 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; | 1274 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; |
| 1274 | return; | 1275 | return; |
| 1275 | } | 1276 | } |
| 1276 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1277 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1277 | channel_state->transform_feedback_buffers[index] = Binding{ | 1278 | channel_state->transform_feedback_buffers[index] = Binding{ |
| 1278 | .cpu_addr = *cpu_addr, | 1279 | .device_addr = *device_addr, |
| 1279 | .size = size, | 1280 | .size = size, |
| 1280 | .buffer_id = buffer_id, | 1281 | .buffer_id = buffer_id, |
| 1281 | }; | 1282 | }; |
| @@ -1289,13 +1290,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1289 | const auto& launch_desc = kepler_compute->launch_description; | 1290 | const auto& launch_desc = kepler_compute->launch_description; |
| 1290 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1291 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| 1291 | const auto& cbuf = launch_desc.const_buffer_config[index]; | 1292 | const auto& cbuf = launch_desc.const_buffer_config[index]; |
| 1292 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); | 1293 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); |
| 1293 | if (cpu_addr) { | 1294 | if (device_addr) { |
| 1294 | binding.cpu_addr = *cpu_addr; | 1295 | binding.device_addr = *device_addr; |
| 1295 | binding.size = cbuf.size; | 1296 | binding.size = cbuf.size; |
| 1296 | } | 1297 | } |
| 1297 | } | 1298 | } |
| 1298 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1299 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1299 | }); | 1300 | }); |
| 1300 | } | 1301 | } |
| 1301 | 1302 | ||
| @@ -1304,7 +1305,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1304 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { | 1305 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { |
| 1305 | // Resolve buffer | 1306 | // Resolve buffer |
| 1306 | Binding& binding = channel_state->compute_storage_buffers[index]; | 1307 | Binding& binding = channel_state->compute_storage_buffers[index]; |
| 1307 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1308 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1308 | }); | 1309 | }); |
| 1309 | } | 1310 | } |
| 1310 | 1311 | ||
| @@ -1312,45 +1313,63 @@ template <class P> | |||
| 1312 | void BufferCache<P>::UpdateComputeTextureBuffers() { | 1313 | void BufferCache<P>::UpdateComputeTextureBuffers() { |
| 1313 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { | 1314 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { |
| 1314 | Binding& binding = channel_state->compute_texture_buffers[index]; | 1315 | Binding& binding = channel_state->compute_texture_buffers[index]; |
| 1315 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1316 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1316 | }); | 1317 | }); |
| 1317 | } | 1318 | } |
| 1318 | 1319 | ||
| 1319 | template <class P> | 1320 | template <class P> |
| 1320 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1321 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { |
| 1321 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); | 1322 | memory_tracker.MarkRegionAsGpuModified(device_addr, size); |
| 1322 | 1323 | ||
| 1323 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1324 | const IntervalType base_interval{device_addr, device_addr + size}; |
| 1324 | common_ranges.add(base_interval); | 1325 | common_ranges.add(base_interval); |
| 1325 | uncommitted_ranges.add(base_interval); | 1326 | uncommitted_ranges.add(base_interval); |
| 1326 | } | 1327 | } |
| 1327 | 1328 | ||
| 1328 | template <class P> | 1329 | template <class P> |
| 1329 | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | 1330 | BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) { |
| 1330 | if (cpu_addr == 0) { | 1331 | if (device_addr == 0) { |
| 1331 | return NULL_BUFFER_ID; | 1332 | return NULL_BUFFER_ID; |
| 1332 | } | 1333 | } |
| 1333 | const u64 page = cpu_addr >> CACHING_PAGEBITS; | 1334 | const u64 page = device_addr >> CACHING_PAGEBITS; |
| 1334 | const BufferId buffer_id = page_table[page]; | 1335 | const BufferId buffer_id = page_table[page]; |
| 1335 | if (!buffer_id) { | 1336 | if (!buffer_id) { |
| 1336 | return CreateBuffer(cpu_addr, size); | 1337 | return CreateBuffer(device_addr, size); |
| 1337 | } | 1338 | } |
| 1338 | const Buffer& buffer = slot_buffers[buffer_id]; | 1339 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 1339 | if (buffer.IsInBounds(cpu_addr, size)) { | 1340 | if (buffer.IsInBounds(device_addr, size)) { |
| 1340 | return buffer_id; | 1341 | return buffer_id; |
| 1341 | } | 1342 | } |
| 1342 | return CreateBuffer(cpu_addr, size); | 1343 | return CreateBuffer(device_addr, size); |
| 1343 | } | 1344 | } |
| 1344 | 1345 | ||
| 1345 | template <class P> | 1346 | template <class P> |
| 1346 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1347 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr, |
| 1347 | u32 wanted_size) { | 1348 | u32 wanted_size) { |
| 1348 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | 1349 | static constexpr int STREAM_LEAP_THRESHOLD = 16; |
| 1349 | boost::container::small_vector<BufferId, 16> overlap_ids; | 1350 | boost::container::small_vector<BufferId, 16> overlap_ids; |
| 1350 | VAddr begin = cpu_addr; | 1351 | DAddr begin = device_addr; |
| 1351 | VAddr end = cpu_addr + wanted_size; | 1352 | DAddr end = device_addr + wanted_size; |
| 1352 | int stream_score = 0; | 1353 | int stream_score = 0; |
| 1353 | bool has_stream_leap = false; | 1354 | bool has_stream_leap = false; |
| 1355 | auto expand_begin = [&](DAddr add_value) { | ||
| 1356 | static constexpr DAddr min_page = CACHING_PAGESIZE + Core::DEVICE_PAGESIZE; | ||
| 1357 | if (add_value > begin - min_page) { | ||
| 1358 | begin = min_page; | ||
| 1359 | device_addr = Core::DEVICE_PAGESIZE; | ||
| 1360 | return; | ||
| 1361 | } | ||
| 1362 | begin -= add_value; | ||
| 1363 | device_addr = begin - CACHING_PAGESIZE; | ||
| 1364 | }; | ||
| 1365 | auto expand_end = [&](DAddr add_value) { | ||
| 1366 | static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS; | ||
| 1367 | if (add_value > max_page - end) { | ||
| 1368 | end = max_page; | ||
| 1369 | return; | ||
| 1370 | } | ||
| 1371 | end += add_value; | ||
| 1372 | }; | ||
| 1354 | if (begin == 0) { | 1373 | if (begin == 0) { |
| 1355 | return OverlapResult{ | 1374 | return OverlapResult{ |
| 1356 | .ids = std::move(overlap_ids), | 1375 | .ids = std::move(overlap_ids), |
| @@ -1359,9 +1378,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1359 | .has_stream_leap = has_stream_leap, | 1378 | .has_stream_leap = has_stream_leap, |
| 1360 | }; | 1379 | }; |
| 1361 | } | 1380 | } |
| 1362 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); | 1381 | for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1363 | cpu_addr += CACHING_PAGESIZE) { | 1382 | device_addr += CACHING_PAGESIZE) { |
| 1364 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; | 1383 | const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; |
| 1365 | if (!overlap_id) { | 1384 | if (!overlap_id) { |
| 1366 | continue; | 1385 | continue; |
| 1367 | } | 1386 | } |
| @@ -1371,12 +1390,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1371 | } | 1390 | } |
| 1372 | overlap_ids.push_back(overlap_id); | 1391 | overlap_ids.push_back(overlap_id); |
| 1373 | overlap.Pick(); | 1392 | overlap.Pick(); |
| 1374 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | 1393 | const DAddr overlap_device_addr = overlap.CpuAddr(); |
| 1375 | const bool expands_left = overlap_cpu_addr < begin; | 1394 | const bool expands_left = overlap_device_addr < begin; |
| 1376 | if (expands_left) { | 1395 | if (expands_left) { |
| 1377 | begin = overlap_cpu_addr; | 1396 | begin = overlap_device_addr; |
| 1378 | } | 1397 | } |
| 1379 | const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); | 1398 | const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); |
| 1380 | const bool expands_right = overlap_end > end; | 1399 | const bool expands_right = overlap_end > end; |
| 1381 | if (overlap_end > end) { | 1400 | if (overlap_end > end) { |
| 1382 | end = overlap_end; | 1401 | end = overlap_end; |
| @@ -1387,11 +1406,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1387 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1406 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1388 | has_stream_leap = true; | 1407 | has_stream_leap = true; |
| 1389 | if (expands_right) { | 1408 | if (expands_right) { |
| 1390 | begin -= CACHING_PAGESIZE * 256; | 1409 | expand_begin(CACHING_PAGESIZE * 128); |
| 1391 | cpu_addr = begin - CACHING_PAGESIZE; | ||
| 1392 | } | 1410 | } |
| 1393 | if (expands_left) { | 1411 | if (expands_left) { |
| 1394 | end += CACHING_PAGESIZE * 256; | 1412 | expand_end(CACHING_PAGESIZE * 128); |
| 1395 | } | 1413 | } |
| 1396 | } | 1414 | } |
| 1397 | } | 1415 | } |
| @@ -1424,13 +1442,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1424 | } | 1442 | } |
| 1425 | 1443 | ||
| 1426 | template <class P> | 1444 | template <class P> |
| 1427 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1445 | BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) { |
| 1428 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | 1446 | DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); |
| 1429 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | 1447 | device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); |
| 1430 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | 1448 | wanted_size = static_cast<u32>(device_addr_end - device_addr); |
| 1431 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1449 | const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); |
| 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1450 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1451 | const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size); |
| 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1452 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1435 | const size_t size_bytes = new_buffer.SizeBytes(); | 1453 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | 1454 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); |
| @@ -1465,10 +1483,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1465 | total_used_memory -= Common::AlignUp(size, 1024); | 1483 | total_used_memory -= Common::AlignUp(size, 1024); |
| 1466 | lru_cache.Free(buffer.getLRUID()); | 1484 | lru_cache.Free(buffer.getLRUID()); |
| 1467 | } | 1485 | } |
| 1468 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1486 | const DAddr device_addr_begin = buffer.CpuAddr(); |
| 1469 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1487 | const DAddr device_addr_end = device_addr_begin + size; |
| 1470 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; | 1488 | const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; |
| 1471 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); | 1489 | const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); |
| 1472 | for (u64 page = page_begin; page != page_end; ++page) { | 1490 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1473 | if constexpr (insert) { | 1491 | if constexpr (insert) { |
| 1474 | page_table[page] = buffer_id; | 1492 | page_table[page] = buffer_id; |
| @@ -1486,15 +1504,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1486 | } | 1504 | } |
| 1487 | 1505 | ||
| 1488 | template <class P> | 1506 | template <class P> |
| 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1507 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) { |
| 1490 | boost::container::small_vector<BufferCopy, 4> copies; | 1508 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1491 | u64 total_size_bytes = 0; | 1509 | u64 total_size_bytes = 0; |
| 1492 | u64 largest_copy = 0; | 1510 | u64 largest_copy = 0; |
| 1493 | VAddr buffer_start = buffer.CpuAddr(); | 1511 | DAddr buffer_start = buffer.CpuAddr(); |
| 1494 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1512 | memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1495 | copies.push_back(BufferCopy{ | 1513 | copies.push_back(BufferCopy{ |
| 1496 | .src_offset = total_size_bytes, | 1514 | .src_offset = total_size_bytes, |
| 1497 | .dst_offset = cpu_addr_out - buffer_start, | 1515 | .dst_offset = device_addr_out - buffer_start, |
| 1498 | .size = range_size, | 1516 | .size = range_size, |
| 1499 | }); | 1517 | }); |
| 1500 | total_size_bytes += range_size; | 1518 | total_size_bytes += range_size; |
| @@ -1526,14 +1544,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1526 | std::span<u8> immediate_buffer; | 1544 | std::span<u8> immediate_buffer; |
| 1527 | for (const BufferCopy& copy : copies) { | 1545 | for (const BufferCopy& copy : copies) { |
| 1528 | std::span<const u8> upload_span; | 1546 | std::span<const u8> upload_span; |
| 1529 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1547 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1530 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1548 | if (IsRangeGranular(device_addr, copy.size)) { |
| 1531 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1549 | upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size); |
| 1532 | } else { | 1550 | } else { |
| 1533 | if (immediate_buffer.empty()) { | 1551 | if (immediate_buffer.empty()) { |
| 1534 | immediate_buffer = ImmediateBuffer(largest_copy); | 1552 | immediate_buffer = ImmediateBuffer(largest_copy); |
| 1535 | } | 1553 | } |
| 1536 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1554 | device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 1537 | upload_span = immediate_buffer.subspan(0, copy.size); | 1555 | upload_span = immediate_buffer.subspan(0, copy.size); |
| 1538 | } | 1556 | } |
| 1539 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | 1557 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| @@ -1550,8 +1568,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1550 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1568 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1551 | for (BufferCopy& copy : copies) { | 1569 | for (BufferCopy& copy : copies) { |
| 1552 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1570 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1553 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1571 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1554 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1572 | device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size); |
| 1555 | 1573 | ||
| 1556 | // Apply the staging offset | 1574 | // Apply the staging offset |
| 1557 | copy.src_offset += upload_staging.offset; | 1575 | copy.src_offset += upload_staging.offset; |
| @@ -1562,14 +1580,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1562 | } | 1580 | } |
| 1563 | 1581 | ||
| 1564 | template <class P> | 1582 | template <class P> |
| 1565 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | 1583 | bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size, |
| 1566 | std::span<const u8> inlined_buffer) { | 1584 | std::span<const u8> inlined_buffer) { |
| 1567 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | 1585 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); |
| 1568 | if (!is_dirty) { | 1586 | if (!is_dirty) { |
| 1569 | return false; | 1587 | return false; |
| 1570 | } | 1588 | } |
| 1571 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); | 1589 | DAddr aligned_start = Common::AlignDown(dest_address, DEVICE_PAGESIZE); |
| 1572 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | 1590 | DAddr aligned_end = Common::AlignUp(dest_address + copy_size, DEVICE_PAGESIZE); |
| 1573 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 1591 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 1574 | return false; | 1592 | return false; |
| 1575 | } | 1593 | } |
| @@ -1580,7 +1598,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1580 | } | 1598 | } |
| 1581 | 1599 | ||
| 1582 | template <class P> | 1600 | template <class P> |
| 1583 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 1601 | void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 1584 | std::span<const u8> inlined_buffer) { | 1602 | std::span<const u8> inlined_buffer) { |
| 1585 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1603 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |
| 1586 | ClearDownload(subtract_interval); | 1604 | ClearDownload(subtract_interval); |
| @@ -1612,14 +1630,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | |||
| 1612 | } | 1630 | } |
| 1613 | 1631 | ||
| 1614 | template <class P> | 1632 | template <class P> |
| 1615 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | 1633 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) { |
| 1616 | boost::container::small_vector<BufferCopy, 1> copies; | 1634 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1617 | u64 total_size_bytes = 0; | 1635 | u64 total_size_bytes = 0; |
| 1618 | u64 largest_copy = 0; | 1636 | u64 largest_copy = 0; |
| 1619 | memory_tracker.ForEachDownloadRangeAndClear( | 1637 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1620 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1638 | device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1621 | const VAddr buffer_addr = buffer.CpuAddr(); | 1639 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 1622 | const auto add_download = [&](VAddr start, VAddr end) { | 1640 | const auto add_download = [&](DAddr start, DAddr end) { |
| 1623 | const u64 new_offset = start - buffer_addr; | 1641 | const u64 new_offset = start - buffer_addr; |
| 1624 | const u64 new_size = end - start; | 1642 | const u64 new_size = end - start; |
| 1625 | copies.push_back(BufferCopy{ | 1643 | copies.push_back(BufferCopy{ |
| @@ -1634,8 +1652,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1634 | largest_copy = std::max(largest_copy, new_size); | 1652 | largest_copy = std::max(largest_copy, new_size); |
| 1635 | }; | 1653 | }; |
| 1636 | 1654 | ||
| 1637 | const VAddr start_address = cpu_addr_out; | 1655 | const DAddr start_address = device_addr_out; |
| 1638 | const VAddr end_address = start_address + range_size; | 1656 | const DAddr end_address = start_address + range_size; |
| 1639 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); | 1657 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1640 | const IntervalType subtract_interval{start_address, end_address}; | 1658 | const IntervalType subtract_interval{start_address, end_address}; |
| 1641 | ClearDownload(subtract_interval); | 1659 | ClearDownload(subtract_interval); |
| @@ -1658,18 +1676,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); | 1676 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1659 | runtime.Finish(); | 1677 | runtime.Finish(); |
| 1660 | for (const BufferCopy& copy : copies) { | 1678 | for (const BufferCopy& copy : copies) { |
| 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1679 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1662 | // Undo the modified offset | 1680 | // Undo the modified offset |
| 1663 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 1681 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 1664 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | 1682 | const u8* copy_mapped_memory = mapped_memory + dst_offset; |
| 1665 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | 1683 | device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size); |
| 1666 | } | 1684 | } |
| 1667 | } else { | 1685 | } else { |
| 1668 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 1686 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 1669 | for (const BufferCopy& copy : copies) { | 1687 | for (const BufferCopy& copy : copies) { |
| 1670 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 1688 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 1671 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1689 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1672 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | 1690 | device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size); |
| 1673 | } | 1691 | } |
| 1674 | } | 1692 | } |
| 1675 | } | 1693 | } |
| @@ -1758,20 +1776,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1758 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); | 1776 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); |
| 1759 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; | 1777 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; |
| 1760 | 1778 | ||
| 1761 | const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | 1779 | const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); |
| 1762 | if (!aligned_cpu_addr || size == 0) { | 1780 | if (!aligned_device_addr || size == 0) { |
| 1763 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1781 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 1764 | return NULL_BINDING; | 1782 | return NULL_BINDING; |
| 1765 | } | 1783 | } |
| 1766 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1784 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1767 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", | 1785 | ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}", |
| 1768 | cbuf_index); | 1786 | cbuf_index); |
| 1769 | // The end address used for size calculation does not need to be aligned | 1787 | // The end address used for size calculation does not need to be aligned |
| 1770 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1788 | const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::DEVICE_PAGESIZE); |
| 1771 | 1789 | ||
| 1772 | const Binding binding{ | 1790 | const Binding binding{ |
| 1773 | .cpu_addr = *aligned_cpu_addr, | 1791 | .device_addr = *aligned_device_addr, |
| 1774 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), | 1792 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr), |
| 1775 | .buffer_id = BufferId{}, | 1793 | .buffer_id = BufferId{}, |
| 1776 | }; | 1794 | }; |
| 1777 | return binding; | 1795 | return binding; |
| @@ -1780,15 +1798,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1780 | template <class P> | 1798 | template <class P> |
| 1781 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 1799 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 1782 | PixelFormat format) { | 1800 | PixelFormat format) { |
| 1783 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1801 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1784 | TextureBufferBinding binding; | 1802 | TextureBufferBinding binding; |
| 1785 | if (!cpu_addr || size == 0) { | 1803 | if (!device_addr || size == 0) { |
| 1786 | binding.cpu_addr = 0; | 1804 | binding.device_addr = 0; |
| 1787 | binding.size = 0; | 1805 | binding.size = 0; |
| 1788 | binding.buffer_id = NULL_BUFFER_ID; | 1806 | binding.buffer_id = NULL_BUFFER_ID; |
| 1789 | binding.format = PixelFormat::Invalid; | 1807 | binding.format = PixelFormat::Invalid; |
| 1790 | } else { | 1808 | } else { |
| 1791 | binding.cpu_addr = *cpu_addr; | 1809 | binding.device_addr = *device_addr; |
| 1792 | binding.size = size; | 1810 | binding.size = size; |
| 1793 | binding.buffer_id = BufferId{}; | 1811 | binding.buffer_id = BufferId{}; |
| 1794 | binding.format = format; | 1812 | binding.format = format; |
| @@ -1797,14 +1815,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, | |||
| 1797 | } | 1815 | } |
| 1798 | 1816 | ||
| 1799 | template <class P> | 1817 | template <class P> |
| 1800 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | 1818 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) { |
| 1801 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | 1819 | u8* const base_pointer = device_memory.GetPointer<u8>(device_addr); |
| 1802 | if (IsRangeGranular(cpu_addr, size) || | 1820 | if (IsRangeGranular(device_addr, size) || |
| 1803 | base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { | 1821 | base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) { |
| 1804 | return std::span(base_pointer, size); | 1822 | return std::span(base_pointer, size); |
| 1805 | } else { | 1823 | } else { |
| 1806 | const std::span<u8> span = ImmediateBuffer(size); | 1824 | const std::span<u8> span = ImmediateBuffer(size); |
| 1807 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 1825 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 1808 | return span; | 1826 | return span; |
| 1809 | } | 1827 | } |
| 1810 | } | 1828 | } |
| @@ -1828,13 +1846,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) | |||
| 1828 | template <class P> | 1846 | template <class P> |
| 1829 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { | 1847 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { |
| 1830 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; | 1848 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; |
| 1831 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); | 1849 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr)); |
| 1832 | } | 1850 | } |
| 1833 | 1851 | ||
| 1834 | template <class P> | 1852 | template <class P> |
| 1835 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { | 1853 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { |
| 1836 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; | 1854 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; |
| 1837 | return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); | 1855 | return std::make_pair(&buffer, |
| 1856 | buffer.Offset(channel_state->indirect_buffer_binding.device_addr)); | ||
| 1838 | } | 1857 | } |
| 1839 | 1858 | ||
| 1840 | } // namespace VideoCommon | 1859 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index d6d696d8c..80dbb81e7 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include "common/microprofile.h" | 32 | #include "common/microprofile.h" |
| 33 | #include "common/scope_exit.h" | 33 | #include "common/scope_exit.h" |
| 34 | #include "common/settings.h" | 34 | #include "common/settings.h" |
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | 35 | #include "video_core/buffer_cache/buffer_base.h" |
| 37 | #include "video_core/control/channel_state_cache.h" | 36 | #include "video_core/control/channel_state_cache.h" |
| 38 | #include "video_core/delayed_destruction_ring.h" | 37 | #include "video_core/delayed_destruction_ring.h" |
| @@ -41,7 +40,6 @@ | |||
| 41 | #include "video_core/engines/kepler_compute.h" | 40 | #include "video_core/engines/kepler_compute.h" |
| 42 | #include "video_core/engines/maxwell_3d.h" | 41 | #include "video_core/engines/maxwell_3d.h" |
| 43 | #include "video_core/memory_manager.h" | 42 | #include "video_core/memory_manager.h" |
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | 43 | #include "video_core/surface.h" |
| 46 | #include "video_core/texture_cache/slot_vector.h" | 44 | #include "video_core/texture_cache/slot_vector.h" |
| 47 | #include "video_core/texture_cache/types.h" | 45 | #include "video_core/texture_cache/types.h" |
| @@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0}; | |||
| 94 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | 92 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 95 | 93 | ||
| 96 | struct Binding { | 94 | struct Binding { |
| 97 | VAddr cpu_addr{}; | 95 | DAddr device_addr{}; |
| 98 | u32 size{}; | 96 | u32 size{}; |
| 99 | BufferId buffer_id; | 97 | BufferId buffer_id; |
| 100 | }; | 98 | }; |
| @@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding { | |||
| 104 | }; | 102 | }; |
| 105 | 103 | ||
| 106 | static constexpr Binding NULL_BINDING{ | 104 | static constexpr Binding NULL_BINDING{ |
| 107 | .cpu_addr = 0, | 105 | .device_addr = 0, |
| 108 | .size = 0, | 106 | .size = 0, |
| 109 | .buffer_id = NULL_BUFFER_ID, | 107 | .buffer_id = NULL_BUFFER_ID, |
| 110 | }; | 108 | }; |
| @@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 204 | using Async_Buffer = typename P::Async_Buffer; | 202 | using Async_Buffer = typename P::Async_Buffer; |
| 205 | using MemoryTracker = typename P::MemoryTracker; | 203 | using MemoryTracker = typename P::MemoryTracker; |
| 206 | 204 | ||
| 207 | using IntervalCompare = std::less<VAddr>; | 205 | using IntervalCompare = std::less<DAddr>; |
| 208 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | 206 | using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; |
| 209 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | 207 | using IntervalAllocator = boost::fast_pool_allocator<DAddr>; |
| 210 | using IntervalSet = boost::icl::interval_set<VAddr>; | 208 | using IntervalSet = boost::icl::interval_set<DAddr>; |
| 211 | using IntervalType = typename IntervalSet::interval_type; | 209 | using IntervalType = typename IntervalSet::interval_type; |
| 212 | 210 | ||
| 213 | template <typename Type> | 211 | template <typename Type> |
| @@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 230 | 228 | ||
| 231 | using OverlapCombine = counter_add_functor<int>; | 229 | using OverlapCombine = counter_add_functor<int>; |
| 232 | using OverlapSection = boost::icl::inter_section<int>; | 230 | using OverlapSection = boost::icl::inter_section<int>; |
| 233 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 231 | using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; |
| 234 | 232 | ||
| 235 | struct OverlapResult { | 233 | struct OverlapResult { |
| 236 | boost::container::small_vector<BufferId, 16> ids; | 234 | boost::container::small_vector<BufferId, 16> ids; |
| 237 | VAddr begin; | 235 | DAddr begin; |
| 238 | VAddr end; | 236 | DAddr end; |
| 239 | bool has_stream_leap = false; | 237 | bool has_stream_leap = false; |
| 240 | }; | 238 | }; |
| 241 | 239 | ||
| 242 | public: | 240 | public: |
| 243 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 241 | explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); |
| 244 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 245 | 242 | ||
| 246 | void TickFrame(); | 243 | void TickFrame(); |
| 247 | 244 | ||
| 248 | void WriteMemory(VAddr cpu_addr, u64 size); | 245 | void WriteMemory(DAddr device_addr, u64 size); |
| 249 | 246 | ||
| 250 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 247 | void CachedWriteMemory(DAddr device_addr, u64 size); |
| 251 | 248 | ||
| 252 | bool OnCPUWrite(VAddr cpu_addr, u64 size); | 249 | bool OnCPUWrite(DAddr device_addr, u64 size); |
| 253 | 250 | ||
| 254 | void DownloadMemory(VAddr cpu_addr, u64 size); | 251 | void DownloadMemory(DAddr device_addr, u64 size); |
| 255 | 252 | ||
| 256 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 253 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size); |
| 257 | 254 | ||
| 258 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | 255 | bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |
| 259 | 256 | ||
| 260 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 257 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 261 | 258 | ||
| @@ -300,7 +297,7 @@ public: | |||
| 300 | ObtainBufferSynchronize sync_info, | 297 | ObtainBufferSynchronize sync_info, |
| 301 | ObtainBufferOperation post_op); | 298 | ObtainBufferOperation post_op); |
| 302 | 299 | ||
| 303 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | 300 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size, |
| 304 | ObtainBufferSynchronize sync_info, | 301 | ObtainBufferSynchronize sync_info, |
| 305 | ObtainBufferOperation post_op); | 302 | ObtainBufferOperation post_op); |
| 306 | void FlushCachedWrites(); | 303 | void FlushCachedWrites(); |
| @@ -326,13 +323,13 @@ public: | |||
| 326 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | 323 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); |
| 327 | 324 | ||
| 328 | /// Return true when a CPU region is modified from the GPU | 325 | /// Return true when a CPU region is modified from the GPU |
| 329 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 326 | [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); |
| 330 | 327 | ||
| 331 | /// Return true when a region is registered on the cache | 328 | /// Return true when a region is registered on the cache |
| 332 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | 329 | [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size); |
| 333 | 330 | ||
| 334 | /// Return true when a CPU region is modified from the CPU | 331 | /// Return true when a CPU region is modified from the CPU |
| 335 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 332 | [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size); |
| 336 | 333 | ||
| 337 | void SetDrawIndirect( | 334 | void SetDrawIndirect( |
| 338 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | 335 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { |
| @@ -366,9 +363,9 @@ private: | |||
| 366 | } | 363 | } |
| 367 | 364 | ||
| 368 | template <typename Func> | 365 | template <typename Func> |
| 369 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | 366 | void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) { |
| 370 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | 367 | const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); |
| 371 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | 368 | for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { |
| 372 | const BufferId buffer_id = page_table[page]; | 369 | const BufferId buffer_id = page_table[page]; |
| 373 | if (!buffer_id) { | 370 | if (!buffer_id) { |
| 374 | ++page; | 371 | ++page; |
| @@ -377,15 +374,15 @@ private: | |||
| 377 | Buffer& buffer = slot_buffers[buffer_id]; | 374 | Buffer& buffer = slot_buffers[buffer_id]; |
| 378 | func(buffer_id, buffer); | 375 | func(buffer_id, buffer); |
| 379 | 376 | ||
| 380 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 377 | const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); |
| 381 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 378 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 382 | } | 379 | } |
| 383 | } | 380 | } |
| 384 | 381 | ||
| 385 | template <typename Func> | 382 | template <typename Func> |
| 386 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | 383 | void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { |
| 387 | const VAddr start_address = cpu_addr; | 384 | const DAddr start_address = device_addr; |
| 388 | const VAddr end_address = start_address + size; | 385 | const DAddr end_address = start_address + size; |
| 389 | const IntervalType search_interval{start_address, end_address}; | 386 | const IntervalType search_interval{start_address, end_address}; |
| 390 | auto it = current_range.lower_bound(search_interval); | 387 | auto it = current_range.lower_bound(search_interval); |
| 391 | if (it == current_range.end()) { | 388 | if (it == current_range.end()) { |
| @@ -393,8 +390,8 @@ private: | |||
| 393 | } | 390 | } |
| 394 | auto end_it = current_range.upper_bound(search_interval); | 391 | auto end_it = current_range.upper_bound(search_interval); |
| 395 | for (; it != end_it; it++) { | 392 | for (; it != end_it; it++) { |
| 396 | VAddr inter_addr_end = it->upper(); | 393 | DAddr inter_addr_end = it->upper(); |
| 397 | VAddr inter_addr = it->lower(); | 394 | DAddr inter_addr = it->lower(); |
| 398 | if (inter_addr_end > end_address) { | 395 | if (inter_addr_end > end_address) { |
| 399 | inter_addr_end = end_address; | 396 | inter_addr_end = end_address; |
| 400 | } | 397 | } |
| @@ -406,10 +403,10 @@ private: | |||
| 406 | } | 403 | } |
| 407 | 404 | ||
| 408 | template <typename Func> | 405 | template <typename Func> |
| 409 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | 406 | void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, |
| 410 | Func&& func) { | 407 | Func&& func) { |
| 411 | const VAddr start_address = cpu_addr; | 408 | const DAddr start_address = device_addr; |
| 412 | const VAddr end_address = start_address + size; | 409 | const DAddr end_address = start_address + size; |
| 413 | const IntervalType search_interval{start_address, end_address}; | 410 | const IntervalType search_interval{start_address, end_address}; |
| 414 | auto it = current_range.lower_bound(search_interval); | 411 | auto it = current_range.lower_bound(search_interval); |
| 415 | if (it == current_range.end()) { | 412 | if (it == current_range.end()) { |
| @@ -418,8 +415,8 @@ private: | |||
| 418 | auto end_it = current_range.upper_bound(search_interval); | 415 | auto end_it = current_range.upper_bound(search_interval); |
| 419 | for (; it != end_it; it++) { | 416 | for (; it != end_it; it++) { |
| 420 | auto& inter = it->first; | 417 | auto& inter = it->first; |
| 421 | VAddr inter_addr_end = inter.upper(); | 418 | DAddr inter_addr_end = inter.upper(); |
| 422 | VAddr inter_addr = inter.lower(); | 419 | DAddr inter_addr = inter.lower(); |
| 423 | if (inter_addr_end > end_address) { | 420 | if (inter_addr_end > end_address) { |
| 424 | inter_addr_end = end_address; | 421 | inter_addr_end = end_address; |
| 425 | } | 422 | } |
| @@ -451,9 +448,9 @@ private: | |||
| 451 | } while (any_removals); | 448 | } while (any_removals); |
| 452 | } | 449 | } |
| 453 | 450 | ||
| 454 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | 451 | static bool IsRangeGranular(DAddr device_addr, size_t size) { |
| 455 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | 452 | return (device_addr & ~Core::DEVICE_PAGEMASK) == |
| 456 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | 453 | ((device_addr + size) & ~Core::DEVICE_PAGEMASK); |
| 457 | } | 454 | } |
| 458 | 455 | ||
| 459 | void RunGarbageCollector(); | 456 | void RunGarbageCollector(); |
| @@ -508,15 +505,15 @@ private: | |||
| 508 | 505 | ||
| 509 | void UpdateComputeTextureBuffers(); | 506 | void UpdateComputeTextureBuffers(); |
| 510 | 507 | ||
| 511 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | 508 | void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size); |
| 512 | 509 | ||
| 513 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | 510 | [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size); |
| 514 | 511 | ||
| 515 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | 512 | [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size); |
| 516 | 513 | ||
| 517 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | 514 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); |
| 518 | 515 | ||
| 519 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | 516 | [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size); |
| 520 | 517 | ||
| 521 | void Register(BufferId buffer_id); | 518 | void Register(BufferId buffer_id); |
| 522 | 519 | ||
| @@ -527,7 +524,7 @@ private: | |||
| 527 | 524 | ||
| 528 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | 525 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; |
| 529 | 526 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 527 | bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size); |
| 531 | 528 | ||
| 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 529 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 533 | std::span<BufferCopy> copies); | 530 | std::span<BufferCopy> copies); |
| @@ -539,7 +536,7 @@ private: | |||
| 539 | 536 | ||
| 540 | void DownloadBufferMemory(Buffer& buffer_id); | 537 | void DownloadBufferMemory(Buffer& buffer_id); |
| 541 | 538 | ||
| 542 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | 539 | void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size); |
| 543 | 540 | ||
| 544 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | 541 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); |
| 545 | 542 | ||
| @@ -549,7 +546,7 @@ private: | |||
| 549 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 546 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 550 | PixelFormat format); | 547 | PixelFormat format); |
| 551 | 548 | ||
| 552 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | 549 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size); |
| 553 | 550 | ||
| 554 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | 551 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); |
| 555 | 552 | ||
| @@ -557,11 +554,10 @@ private: | |||
| 557 | 554 | ||
| 558 | void ClearDownload(IntervalType subtract_interval); | 555 | void ClearDownload(IntervalType subtract_interval); |
| 559 | 556 | ||
| 560 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 557 | void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 561 | std::span<const u8> inlined_buffer); | 558 | std::span<const u8> inlined_buffer); |
| 562 | 559 | ||
| 563 | VideoCore::RasterizerInterface& rasterizer; | 560 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 564 | Core::Memory::Memory& cpu_memory; | ||
| 565 | 561 | ||
| 566 | SlotVector<Buffer> slot_buffers; | 562 | SlotVector<Buffer> slot_buffers; |
| 567 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 563 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| @@ -598,7 +594,7 @@ private: | |||
| 598 | u64 critical_memory = 0; | 594 | u64 critical_memory = 0; |
| 599 | BufferId inline_buffer_id; | 595 | BufferId inline_buffer_id; |
| 600 | 596 | ||
| 601 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 597 | std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table; |
| 602 | Common::ScratchBuffer<u8> tmp_buffer; | 598 | Common::ScratchBuffer<u8> tmp_buffer; |
| 603 | }; | 599 | }; |
| 604 | 600 | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index 6c1c8287b..c95eed1f6 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -17,19 +17,19 @@ | |||
| 17 | 17 | ||
| 18 | namespace VideoCommon { | 18 | namespace VideoCommon { |
| 19 | 19 | ||
| 20 | template <class RasterizerInterface> | 20 | template <typename DeviceTracker> |
| 21 | class MemoryTrackerBase { | 21 | class MemoryTrackerBase { |
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 34; |
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; |
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; |
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; |
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); |
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; |
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; |
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | 29 | using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>; |
| 30 | 30 | ||
| 31 | public: | 31 | public: |
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | 32 | MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {} |
| 33 | ~MemoryTrackerBase() = default; | 33 | ~MemoryTrackerBase() = default; |
| 34 | 34 | ||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | 35 | /// Returns the inclusive CPU modified range in a begin end pair |
| @@ -74,7 +74,7 @@ public: | |||
| 74 | }); | 74 | }); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /// Mark region as CPU modified, notifying the rasterizer about this change | 77 | /// Mark region as CPU modified, notifying the device_tracker about this change |
| 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 79 | IteratePages<true>(dirty_cpu_addr, query_size, | 79 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 80 | [](Manager* manager, u64 offset, size_t size) { | 80 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -83,7 +83,7 @@ public: | |||
| 83 | }); | 83 | }); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /// Unmark region as CPU modified, notifying the rasterizer about this change | 86 | /// Unmark region as CPU modified, notifying the device_tracker about this change |
| 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 88 | IteratePages<true>(dirty_cpu_addr, query_size, | 88 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 89 | [](Manager* manager, u64 offset, size_t size) { | 89 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -139,7 +139,7 @@ public: | |||
| 139 | }); | 139 | }); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | 142 | /// Flushes cached CPU writes, and notify the device_tracker about the deltas |
| 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { |
| 144 | IteratePages<false>(query_cpu_addr, query_size, | 144 | IteratePages<false>(query_cpu_addr, query_size, |
| 145 | [](Manager* manager, [[maybe_unused]] u64 offset, | 145 | [](Manager* manager, [[maybe_unused]] u64 offset, |
| @@ -280,7 +280,7 @@ private: | |||
| 280 | manager_pool.emplace_back(); | 280 | manager_pool.emplace_back(); |
| 281 | auto& last_pool = manager_pool.back(); | 281 | auto& last_pool = manager_pool.back(); |
| 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { |
| 283 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | 283 | new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE); |
| 284 | free_managers.push_back(&last_pool[i]); | 284 | free_managers.push_back(&last_pool[i]); |
| 285 | } | 285 | } |
| 286 | return on_return(); | 286 | return on_return(); |
| @@ -293,7 +293,7 @@ private: | |||
| 293 | 293 | ||
| 294 | std::unordered_set<u32> cached_pages; | 294 | std::unordered_set<u32> cached_pages; |
| 295 | 295 | ||
| 296 | RasterizerInterface* rasterizer = nullptr; | 296 | DeviceTracker* device_tracker = nullptr; |
| 297 | }; | 297 | }; |
| 298 | 298 | ||
| 299 | } // namespace VideoCommon | 299 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..3db9d8b42 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -13,12 +13,12 @@ | |||
| 13 | #include "common/common_funcs.h" | 13 | #include "common/common_funcs.h" |
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/div_ceil.h" | 15 | #include "common/div_ceil.h" |
| 16 | #include "core/memory.h" | 16 | #include "video_core/host1x/gpu_device_memory_manager.h" |
| 17 | 17 | ||
| 18 | namespace VideoCommon { | 18 | namespace VideoCommon { |
| 19 | 19 | ||
| 20 | constexpr u64 PAGES_PER_WORD = 64; | 20 | constexpr u64 PAGES_PER_WORD = 64; |
| 21 | constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | 21 | constexpr u64 BYTES_PER_PAGE = Core::DEVICE_PAGESIZE; |
| 22 | constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | 22 | constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; |
| 23 | 23 | ||
| 24 | enum class Type { | 24 | enum class Type { |
| @@ -163,11 +163,11 @@ struct Words { | |||
| 163 | WordsArray<stack_words> preflushable; | 163 | WordsArray<stack_words> preflushable; |
| 164 | }; | 164 | }; |
| 165 | 165 | ||
| 166 | template <class RasterizerInterface, size_t stack_words = 1> | 166 | template <class DeviceTracker, size_t stack_words = 1> |
| 167 | class WordManager { | 167 | class WordManager { |
| 168 | public: | 168 | public: |
| 169 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | 169 | explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes) |
| 170 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | 170 | : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {} |
| 171 | 171 | ||
| 172 | explicit WordManager() = default; | 172 | explicit WordManager() = default; |
| 173 | 173 | ||
| @@ -279,7 +279,7 @@ public: | |||
| 279 | } | 279 | } |
| 280 | 280 | ||
| 281 | /** | 281 | /** |
| 282 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | 282 | * Loop over each page in the given range, turn off those bits and notify the tracker if |
| 283 | * needed. Call the given function on each turned off range. | 283 | * needed. Call the given function on each turned off range. |
| 284 | * | 284 | * |
| 285 | * @param query_cpu_range Base CPU address to loop over | 285 | * @param query_cpu_range Base CPU address to loop over |
| @@ -459,26 +459,26 @@ private: | |||
| 459 | } | 459 | } |
| 460 | 460 | ||
| 461 | /** | 461 | /** |
| 462 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | 462 | * Notify tracker about changes in the CPU tracking state of a word in the buffer |
| 463 | * | 463 | * |
| 464 | * @param word_index Index to the word to notify to the rasterizer | 464 | * @param word_index Index to the word to notify to the tracker |
| 465 | * @param current_bits Current state of the word | 465 | * @param current_bits Current state of the word |
| 466 | * @param new_bits New state of the word | 466 | * @param new_bits New state of the word |
| 467 | * | 467 | * |
| 468 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | 468 | * @tparam add_to_tracker True when the tracker should start tracking the new pages |
| 469 | */ | 469 | */ |
| 470 | template <bool add_to_rasterizer> | 470 | template <bool add_to_tracker> |
| 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |
| 472 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | 472 | u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; |
| 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { |
| 475 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | 475 | tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE, |
| 476 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | 476 | add_to_tracker ? 1 : -1); |
| 477 | }); | 477 | }); |
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | VAddr cpu_addr = 0; | 480 | VAddr cpu_addr = 0; |
| 481 | RasterizerInterface* rasterizer = nullptr; | 481 | DeviceTracker* tracker = nullptr; |
| 482 | Words<stack_words> words; | 482 | Words<stack_words> words; |
| 483 | }; | 483 | }; |
| 484 | 484 | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 58ce0d8c2..fb2060ca4 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -5,10 +5,10 @@ | |||
| 5 | #include "common/microprofile.h" | 5 | #include "common/microprofile.h" |
| 6 | #include "common/settings.h" | 6 | #include "common/settings.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/memory.h" | ||
| 9 | #include "video_core/dma_pusher.h" | 8 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 12 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 13 | 13 | ||
| 14 | namespace Tegra { | 14 | namespace Tegra { |
| @@ -85,15 +85,15 @@ bool DmaPusher::Step() { | |||
| 85 | } | 85 | } |
| 86 | } | 86 | } |
| 87 | const auto safe_process = [&] { | 87 | const auto safe_process = [&] { |
| 88 | Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | 88 | Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, |
| 89 | Core::Memory::GuestMemoryFlags::SafeRead> | 89 | Tegra::Memory::GuestMemoryFlags::SafeRead> |
| 90 | headers(memory_manager, dma_state.dma_get, command_list_header.size, | 90 | headers(memory_manager, dma_state.dma_get, command_list_header.size, |
| 91 | &command_headers); | 91 | &command_headers); |
| 92 | ProcessCommands(headers); | 92 | ProcessCommands(headers); |
| 93 | }; | 93 | }; |
| 94 | const auto unsafe_process = [&] { | 94 | const auto unsafe_process = [&] { |
| 95 | Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | 95 | Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, |
| 96 | Core::Memory::GuestMemoryFlags::UnsafeRead> | 96 | Tegra::Memory::GuestMemoryFlags::UnsafeRead> |
| 97 | headers(memory_manager, dma_state.dma_get, command_list_header.size, | 97 | headers(memory_manager, dma_state.dma_get, command_list_header.size, |
| 98 | &command_headers); | 98 | &command_headers); |
| 99 | ProcessCommands(headers); | 99 | ProcessCommands(headers); |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index bc64d4486..e5cc04ec4 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -5,8 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include "common/algorithm.h" | 6 | #include "common/algorithm.h" |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "core/memory.h" | ||
| 9 | #include "video_core/engines/engine_upload.h" | 8 | #include "video_core/engines/engine_upload.h" |
| 9 | #include "video_core/guest_memory.h" | ||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/textures/decoders.h" | 12 | #include "video_core/textures/decoders.h" |
| @@ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 68 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | 68 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 69 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | 69 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); |
| 70 | 70 | ||
| 71 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 71 | Tegra::Memory::GpuGuestMemoryScoped<u8, |
| 72 | Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||
| 72 | tmp(memory_manager, address, dst_size, &tmp_buffer); | 73 | tmp(memory_manager, address, dst_size, &tmp_buffer); |
| 73 | 74 | ||
| 74 | Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, | 75 | Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 95ba4f76c..a94e1f043 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/core_timing.h" | 11 | #include "core/core_timing.h" |
| 12 | #include "core/memory.h" | ||
| 13 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 14 | #include "video_core/engines/draw_manager.h" | 13 | #include "video_core/engines/draw_manager.h" |
| 15 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 56fbff306..2ebd21fc5 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -8,9 +8,9 @@ | |||
| 8 | #include "common/polyfill_ranges.h" | 8 | #include "common/polyfill_ranges.h" |
| 9 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/memory.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 12 | #include "video_core/engines/maxwell_dma.h" |
| 13 | #include "video_core/guest_memory.h" | ||
| 14 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/renderer_base.h" | 15 | #include "video_core/renderer_base.h" |
| 16 | #include "video_core/textures/decoders.h" | 16 | #include "video_core/textures/decoders.h" |
| @@ -133,8 +133,8 @@ void MaxwellDMA::Launch() { | |||
| 133 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 133 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 134 | read_buffer.resize_destructive(16); | 134 | read_buffer.resize_destructive(16); |
| 135 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 135 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 136 | Core::Memory::GpuGuestMemoryScoped< | 136 | Tegra::Memory::GpuGuestMemoryScoped< |
| 137 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 137 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 138 | tmp_write_buffer(memory_manager, | 138 | tmp_write_buffer(memory_manager, |
| 139 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 139 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 140 | 16, &read_buffer); | 140 | 16, &read_buffer); |
| @@ -146,16 +146,16 @@ void MaxwellDMA::Launch() { | |||
| 146 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 146 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 147 | read_buffer.resize_destructive(16); | 147 | read_buffer.resize_destructive(16); |
| 148 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 148 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 149 | Core::Memory::GpuGuestMemoryScoped< | 149 | Tegra::Memory::GpuGuestMemoryScoped< |
| 150 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 150 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 151 | tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); | 151 | tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); |
| 152 | tmp_write_buffer.SetAddressAndSize( | 152 | tmp_write_buffer.SetAddressAndSize( |
| 153 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); | 153 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); |
| 154 | } | 154 | } |
| 155 | } else { | 155 | } else { |
| 156 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 156 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 157 | Core::Memory::GpuGuestMemoryScoped< | 157 | Tegra::Memory::GpuGuestMemoryScoped< |
| 158 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 158 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 159 | tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, | 159 | tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, |
| 160 | &read_buffer); | 160 | &read_buffer); |
| 161 | tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); | 161 | tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); |
| @@ -226,9 +226,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 226 | 226 | ||
| 227 | const size_t dst_size = dst_operand.pitch * regs.line_count; | 227 | const size_t dst_size = dst_operand.pitch * regs.line_count; |
| 228 | 228 | ||
| 229 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 229 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 230 | memory_manager, src_operand.address, src_size, &read_buffer); | 230 | memory_manager, src_operand.address, src_size, &read_buffer); |
| 231 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | 231 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |
| 232 | tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | 232 | tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); |
| 233 | 233 | ||
| 234 | UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | 234 | UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, |
| @@ -290,9 +290,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 290 | 290 | ||
| 291 | GPUVAddr src_addr = regs.offset_in; | 291 | GPUVAddr src_addr = regs.offset_in; |
| 292 | GPUVAddr dst_addr = regs.offset_out; | 292 | GPUVAddr dst_addr = regs.offset_out; |
| 293 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 293 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 294 | memory_manager, src_addr, src_size, &read_buffer); | 294 | memory_manager, src_addr, src_size, &read_buffer); |
| 295 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | 295 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |
| 296 | tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | 296 | tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); |
| 297 | 297 | ||
| 298 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 298 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| @@ -344,9 +344,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 344 | 344 | ||
| 345 | intermediate_buffer.resize_destructive(mid_buffer_size); | 345 | intermediate_buffer.resize_destructive(mid_buffer_size); |
| 346 | 346 | ||
| 347 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 347 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 348 | memory_manager, regs.offset_in, src_size, &read_buffer); | 348 | memory_manager, regs.offset_in, src_size, &read_buffer); |
| 349 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 349 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 350 | tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | 350 | tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); |
| 351 | 351 | ||
| 352 | UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, | 352 | UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, |
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index 67ce9134b..4bc079024 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/scratch_buffer.h" | 8 | #include "common/scratch_buffer.h" |
| 9 | #include "video_core/engines/sw_blitter/blitter.h" | 9 | #include "video_core/engines/sw_blitter/blitter.h" |
| 10 | #include "video_core/engines/sw_blitter/converter.h" | 10 | #include "video_core/engines/sw_blitter/converter.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 11 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/surface.h" | 13 | #include "video_core/surface.h" |
| 13 | #include "video_core/textures/decoders.h" | 14 | #include "video_core/textures/decoders.h" |
| @@ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 160 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | 161 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |
| 161 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | 162 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |
| 162 | 163 | ||
| 163 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | 164 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( |
| 164 | memory_manager, src.Address(), src_size, &impl->tmp_buffer); | 165 | memory_manager, src.Address(), src_size, &impl->tmp_buffer); |
| 165 | 166 | ||
| 166 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | 167 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; |
| @@ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 220 | } | 221 | } |
| 221 | 222 | ||
| 222 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | 223 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |
| 223 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> | 224 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite> |
| 224 | tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); | 225 | tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); |
| 225 | 226 | ||
| 226 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | 227 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 5f3bffcab..856f4bd52 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h | |||
| @@ -14,7 +14,7 @@ namespace Tegra { | |||
| 14 | * Struct describing framebuffer configuration | 14 | * Struct describing framebuffer configuration |
| 15 | */ | 15 | */ |
| 16 | struct FramebufferConfig { | 16 | struct FramebufferConfig { |
| 17 | VAddr address{}; | 17 | DAddr address{}; |
| 18 | u32 offset{}; | 18 | u32 offset{}; |
| 19 | u32 width{}; | 19 | u32 width{}; |
| 20 | u32 height{}; | 20 | u32 height{}; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 11549d448..609704b33 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -85,7 +85,8 @@ struct GPU::Impl { | |||
| 85 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 85 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { |
| 86 | renderer = std::move(renderer_); | 86 | renderer = std::move(renderer_); |
| 87 | rasterizer = renderer->ReadRasterizer(); | 87 | rasterizer = renderer->ReadRasterizer(); |
| 88 | host1x.MemoryManager().BindRasterizer(rasterizer); | 88 | host1x.MemoryManager().BindInterface(rasterizer); |
| 89 | host1x.GMMU().BindRasterizer(rasterizer); | ||
| 89 | } | 90 | } |
| 90 | 91 | ||
| 91 | /// Flush all current written commands into the host GPU for execution. | 92 | /// Flush all current written commands into the host GPU for execution. |
| @@ -95,8 +96,8 @@ struct GPU::Impl { | |||
| 95 | 96 | ||
| 96 | /// Synchronizes CPU writes with Host GPU memory. | 97 | /// Synchronizes CPU writes with Host GPU memory. |
| 97 | void InvalidateGPUCache() { | 98 | void InvalidateGPUCache() { |
| 98 | std::function<void(VAddr, size_t)> callback_writes( | 99 | std::function<void(PAddr, size_t)> callback_writes( |
| 99 | [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | 100 | [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); |
| 100 | system.GatherGPUDirtyMemory(callback_writes); | 101 | system.GatherGPUDirtyMemory(callback_writes); |
| 101 | } | 102 | } |
| 102 | 103 | ||
| @@ -279,11 +280,11 @@ struct GPU::Impl { | |||
| 279 | } | 280 | } |
| 280 | 281 | ||
| 281 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 282 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 282 | void FlushRegion(VAddr addr, u64 size) { | 283 | void FlushRegion(DAddr addr, u64 size) { |
| 283 | gpu_thread.FlushRegion(addr, size); | 284 | gpu_thread.FlushRegion(addr, size); |
| 284 | } | 285 | } |
| 285 | 286 | ||
| 286 | VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { | 287 | VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { |
| 287 | auto raster_area = rasterizer->GetFlushArea(addr, size); | 288 | auto raster_area = rasterizer->GetFlushArea(addr, size); |
| 288 | if (raster_area.preemtive) { | 289 | if (raster_area.preemtive) { |
| 289 | return raster_area; | 290 | return raster_area; |
| @@ -299,16 +300,16 @@ struct GPU::Impl { | |||
| 299 | } | 300 | } |
| 300 | 301 | ||
| 301 | /// Notify rasterizer that any caches of the specified region should be invalidated | 302 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 302 | void InvalidateRegion(VAddr addr, u64 size) { | 303 | void InvalidateRegion(DAddr addr, u64 size) { |
| 303 | gpu_thread.InvalidateRegion(addr, size); | 304 | gpu_thread.InvalidateRegion(addr, size); |
| 304 | } | 305 | } |
| 305 | 306 | ||
| 306 | bool OnCPUWrite(VAddr addr, u64 size) { | 307 | bool OnCPUWrite(DAddr addr, u64 size) { |
| 307 | return rasterizer->OnCPUWrite(addr, size); | 308 | return rasterizer->OnCPUWrite(addr, size); |
| 308 | } | 309 | } |
| 309 | 310 | ||
| 310 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 311 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 311 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | 312 | void FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 312 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 313 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 313 | } | 314 | } |
| 314 | 315 | ||
| @@ -437,7 +438,7 @@ void GPU::OnCommandListEnd() { | |||
| 437 | impl->OnCommandListEnd(); | 438 | impl->OnCommandListEnd(); |
| 438 | } | 439 | } |
| 439 | 440 | ||
| 440 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 441 | u64 GPU::RequestFlush(DAddr addr, std::size_t size) { |
| 441 | return impl->RequestSyncOperation( | 442 | return impl->RequestSyncOperation( |
| 442 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | 443 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); |
| 443 | } | 444 | } |
| @@ -557,23 +558,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 557 | impl->SwapBuffers(framebuffer); | 558 | impl->SwapBuffers(framebuffer); |
| 558 | } | 559 | } |
| 559 | 560 | ||
| 560 | VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { | 561 | VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { |
| 561 | return impl->OnCPURead(addr, size); | 562 | return impl->OnCPURead(addr, size); |
| 562 | } | 563 | } |
| 563 | 564 | ||
| 564 | void GPU::FlushRegion(VAddr addr, u64 size) { | 565 | void GPU::FlushRegion(DAddr addr, u64 size) { |
| 565 | impl->FlushRegion(addr, size); | 566 | impl->FlushRegion(addr, size); |
| 566 | } | 567 | } |
| 567 | 568 | ||
| 568 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | 569 | void GPU::InvalidateRegion(DAddr addr, u64 size) { |
| 569 | impl->InvalidateRegion(addr, size); | 570 | impl->InvalidateRegion(addr, size); |
| 570 | } | 571 | } |
| 571 | 572 | ||
| 572 | bool GPU::OnCPUWrite(VAddr addr, u64 size) { | 573 | bool GPU::OnCPUWrite(DAddr addr, u64 size) { |
| 573 | return impl->OnCPUWrite(addr, size); | 574 | return impl->OnCPUWrite(addr, size); |
| 574 | } | 575 | } |
| 575 | 576 | ||
| 576 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 577 | void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 577 | impl->FlushAndInvalidateRegion(addr, size); | 578 | impl->FlushAndInvalidateRegion(addr, size); |
| 578 | } | 579 | } |
| 579 | 580 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ba2838b89..b3c1d15bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -158,7 +158,7 @@ public: | |||
| 158 | void InitAddressSpace(Tegra::MemoryManager& memory_manager); | 158 | void InitAddressSpace(Tegra::MemoryManager& memory_manager); |
| 159 | 159 | ||
| 160 | /// Request a host GPU memory flush from the CPU. | 160 | /// Request a host GPU memory flush from the CPU. |
| 161 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 161 | [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size); |
| 162 | 162 | ||
| 163 | /// Obtains current flush request fence id. | 163 | /// Obtains current flush request fence id. |
| 164 | [[nodiscard]] u64 CurrentSyncRequestFence() const; | 164 | [[nodiscard]] u64 CurrentSyncRequestFence() const; |
| @@ -242,20 +242,20 @@ public: | |||
| 242 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 242 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 243 | 243 | ||
| 244 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 244 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 245 | [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); | 245 | [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size); |
| 246 | 246 | ||
| 247 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 247 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 248 | void FlushRegion(VAddr addr, u64 size); | 248 | void FlushRegion(DAddr addr, u64 size); |
| 249 | 249 | ||
| 250 | /// Notify rasterizer that any caches of the specified region should be invalidated | 250 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 251 | void InvalidateRegion(VAddr addr, u64 size); | 251 | void InvalidateRegion(DAddr addr, u64 size); |
| 252 | 252 | ||
| 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is |
| 254 | /// sensible, false otherwise | 254 | /// sensible, false otherwise |
| 255 | bool OnCPUWrite(VAddr addr, u64 size); | 255 | bool OnCPUWrite(DAddr addr, u64 size); |
| 256 | 256 | ||
| 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 258 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 258 | void FlushAndInvalidateRegion(DAddr addr, u64 size); |
| 259 | 259 | ||
| 260 | private: | 260 | private: |
| 261 | struct Impl; | 261 | struct Impl; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2f0f9f593..788d4f61e 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 82 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | 82 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 85 | void ThreadManager::FlushRegion(DAddr addr, u64 size) { |
| 86 | if (!is_async) { | 86 | if (!is_async) { |
| 87 | // Always flush with synchronous GPU mode | 87 | // Always flush with synchronous GPU mode |
| 88 | PushCommand(FlushRegionCommand(addr, size)); | 88 | PushCommand(FlushRegionCommand(addr, size)); |
| @@ -101,11 +101,11 @@ void ThreadManager::TickGPU() { | |||
| 101 | PushCommand(GPUTickCommand()); | 101 | PushCommand(GPUTickCommand()); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { |
| 105 | rasterizer->OnCacheInvalidation(addr, size); | 105 | rasterizer->OnCacheInvalidation(addr, size); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 108 | void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 110 | rasterizer->OnCacheInvalidation(addr, size); | 110 | rasterizer->OnCacheInvalidation(addr, size); |
| 111 | } | 111 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 43940bd6d..2de25e9ef 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -54,26 +54,26 @@ struct SwapBuffersCommand final { | |||
| 54 | 54 | ||
| 55 | /// Command to signal to the GPU thread to flush a region | 55 | /// Command to signal to the GPU thread to flush a region |
| 56 | struct FlushRegionCommand final { | 56 | struct FlushRegionCommand final { |
| 57 | explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | 57 | explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |
| 58 | 58 | ||
| 59 | VAddr addr; | 59 | DAddr addr; |
| 60 | u64 size; | 60 | u64 size; |
| 61 | }; | 61 | }; |
| 62 | 62 | ||
| 63 | /// Command to signal to the GPU thread to invalidate a region | 63 | /// Command to signal to the GPU thread to invalidate a region |
| 64 | struct InvalidateRegionCommand final { | 64 | struct InvalidateRegionCommand final { |
| 65 | explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | 65 | explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |
| 66 | 66 | ||
| 67 | VAddr addr; | 67 | DAddr addr; |
| 68 | u64 size; | 68 | u64 size; |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | /// Command to signal to the GPU thread to flush and invalidate a region | 71 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 72 | struct FlushAndInvalidateRegionCommand final { | 72 | struct FlushAndInvalidateRegionCommand final { |
| 73 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) | 73 | explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_) |
| 74 | : addr{addr_}, size{size_} {} | 74 | : addr{addr_}, size{size_} {} |
| 75 | 75 | ||
| 76 | VAddr addr; | 76 | DAddr addr; |
| 77 | u64 size; | 77 | u64 size; |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| @@ -122,13 +122,13 @@ public: | |||
| 122 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 122 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 123 | 123 | ||
| 124 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 124 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 125 | void FlushRegion(VAddr addr, u64 size); | 125 | void FlushRegion(DAddr addr, u64 size); |
| 126 | 126 | ||
| 127 | /// Notify rasterizer that any caches of the specified region should be invalidated | 127 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 128 | void InvalidateRegion(VAddr addr, u64 size); | 128 | void InvalidateRegion(DAddr addr, u64 size); |
| 129 | 129 | ||
| 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 131 | void FlushAndInvalidateRegion(DAddr addr, u64 size); |
| 132 | 132 | ||
| 133 | void TickGPU(); | 133 | void TickGPU(); |
| 134 | 134 | ||
diff --git a/src/video_core/guest_memory.h b/src/video_core/guest_memory.h new file mode 100644 index 000000000..8b6213172 --- /dev/null +++ b/src/video_core/guest_memory.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <iterator> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/scratch_buffer.h" | ||
| 13 | #include "core/guest_memory.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | |||
| 16 | namespace Tegra::Memory { | ||
| 17 | |||
| 18 | using GuestMemoryFlags = Core::Memory::GuestMemoryFlags; | ||
| 19 | |||
| 20 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 21 | using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||
| 22 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 23 | using DeviceGuestMemoryScoped = | ||
| 24 | Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||
| 25 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 26 | using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||
| 27 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 28 | using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||
| 29 | |||
| 30 | } // namespace Tegra::Memory | ||
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 309a7f1d5..994591c8d 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -32,13 +32,12 @@ H264::~H264() = default; | |||
| 32 | std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, | 32 | std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, |
| 33 | size_t* out_configuration_size, bool is_first_frame) { | 33 | size_t* out_configuration_size, bool is_first_frame) { |
| 34 | H264DecoderContext context; | 34 | H264DecoderContext context; |
| 35 | host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, | 35 | host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); |
| 36 | sizeof(H264DecoderContext)); | ||
| 37 | 36 | ||
| 38 | const s64 frame_number = context.h264_parameter_set.frame_number.Value(); | 37 | const s64 frame_number = context.h264_parameter_set.frame_number.Value(); |
| 39 | if (!is_first_frame && frame_number != 0) { | 38 | if (!is_first_frame && frame_number != 0) { |
| 40 | frame.resize_destructive(context.stream_len); | 39 | frame.resize_destructive(context.stream_len); |
| 41 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); | 40 | host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); |
| 42 | *out_configuration_size = 0; | 41 | *out_configuration_size = 0; |
| 43 | return frame; | 42 | return frame; |
| 44 | } | 43 | } |
| @@ -159,8 +158,8 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters | |||
| 159 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); | 158 | std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); |
| 160 | 159 | ||
| 161 | *out_configuration_size = encoded_header.size(); | 160 | *out_configuration_size = encoded_header.size(); |
| 162 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, | 161 | host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(), |
| 163 | frame.data() + encoded_header.size(), context.stream_len); | 162 | context.stream_len); |
| 164 | 163 | ||
| 165 | return frame; | 164 | return frame; |
| 166 | } | 165 | } |
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index ee6392ff9..be97e3b00 100644 --- a/src/video_core/host1x/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp | |||
| @@ -14,7 +14,7 @@ VP8::~VP8() = default; | |||
| 14 | 14 | ||
| 15 | std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { | 15 | std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 16 | VP8PictureInfo info; | 16 | VP8PictureInfo info; |
| 17 | host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); | 17 | host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); |
| 18 | 18 | ||
| 19 | const bool is_key_frame = info.key_frame == 1u; | 19 | const bool is_key_frame = info.key_frame == 1u; |
| 20 | const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); | 20 | const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); |
| @@ -45,7 +45,7 @@ std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& | |||
| 45 | frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); | 45 | frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); |
| 46 | } | 46 | } |
| 47 | const u64 bitstream_offset = state.frame_bitstream_offset; | 47 | const u64 bitstream_offset = state.frame_bitstream_offset; |
| 48 | host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); | 48 | host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); |
| 49 | 49 | ||
| 50 | return frame; | 50 | return frame; |
| 51 | } | 51 | } |
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index 306c3d0e8..65d6fb2d5 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp | |||
| @@ -358,7 +358,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ | |||
| 358 | 358 | ||
| 359 | Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { | 359 | Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { |
| 360 | PictureInfo picture_info; | 360 | PictureInfo picture_info; |
| 361 | host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); | 361 | host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); |
| 362 | Vp9PictureInfo vp9_info = picture_info.Convert(); | 362 | Vp9PictureInfo vp9_info = picture_info.Convert(); |
| 363 | 363 | ||
| 364 | InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); | 364 | InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); |
| @@ -373,7 +373,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& | |||
| 373 | 373 | ||
| 374 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { | 374 | void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { |
| 375 | EntropyProbs entropy; | 375 | EntropyProbs entropy; |
| 376 | host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); | 376 | host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); |
| 377 | entropy.Convert(dst); | 377 | entropy.Convert(dst); |
| 378 | } | 378 | } |
| 379 | 379 | ||
| @@ -383,9 +383,8 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters | |||
| 383 | // gpu.SyncGuestHost(); epic, why? | 383 | // gpu.SyncGuestHost(); epic, why? |
| 384 | current_frame.info = GetVp9PictureInfo(state); | 384 | current_frame.info = GetVp9PictureInfo(state); |
| 385 | current_frame.bit_stream.resize(current_frame.info.bitstream_size); | 385 | current_frame.bit_stream.resize(current_frame.info.bitstream_size); |
| 386 | host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, | 386 | host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(), |
| 387 | current_frame.bit_stream.data(), | 387 | current_frame.info.bitstream_size); |
| 388 | current_frame.info.bitstream_size); | ||
| 389 | } | 388 | } |
| 390 | if (!next_frame.bit_stream.empty()) { | 389 | if (!next_frame.bit_stream.empty()) { |
| 391 | Vp9FrameContainer temp{ | 390 | Vp9FrameContainer temp{ |
diff --git a/src/video_core/host1x/gpu_device_memory_manager.cpp b/src/video_core/host1x/gpu_device_memory_manager.cpp new file mode 100644 index 000000000..668c2f08b --- /dev/null +++ b/src/video_core/host1x/gpu_device_memory_manager.cpp | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "core/device_memory_manager.inc" | ||
| 5 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 6 | #include "video_core/rasterizer_interface.h" | ||
| 7 | |||
| 8 | namespace Tegra { | ||
| 9 | |||
| 10 | struct MaxwellDeviceMethods { | ||
| 11 | static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address, | ||
| 12 | size_t size, bool caching) { | ||
| 13 | interface->RasterizerMarkRegionCached(address, size, caching); | ||
| 14 | } | ||
| 15 | }; | ||
| 16 | |||
| 17 | } // namespace Tegra | ||
| 18 | |||
| 19 | template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; | ||
| 20 | template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; | ||
| 21 | |||
| 22 | template const u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr) const; | ||
| 23 | template u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr); | ||
| 24 | |||
| 25 | template u8 Tegra::MaxwellDeviceMemoryManager::Read<u8>(DAddr addr) const; | ||
| 26 | template u16 Tegra::MaxwellDeviceMemoryManager::Read<u16>(DAddr addr) const; | ||
| 27 | template u32 Tegra::MaxwellDeviceMemoryManager::Read<u32>(DAddr addr) const; | ||
| 28 | template u64 Tegra::MaxwellDeviceMemoryManager::Read<u64>(DAddr addr) const; | ||
| 29 | template void Tegra::MaxwellDeviceMemoryManager::Write<u8>(DAddr addr, u8 data); | ||
| 30 | template void Tegra::MaxwellDeviceMemoryManager::Write<u16>(DAddr addr, u16 data); | ||
| 31 | template void Tegra::MaxwellDeviceMemoryManager::Write<u32>(DAddr addr, u32 data); | ||
| 32 | template void Tegra::MaxwellDeviceMemoryManager::Write<u64>(DAddr addr, u64 data); \ No newline at end of file | ||
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h new file mode 100644 index 000000000..a9f249991 --- /dev/null +++ b/src/video_core/host1x/gpu_device_memory_manager.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "core/device_memory_manager.h" | ||
| 7 | |||
| 8 | namespace VideoCore { | ||
| 9 | class RasterizerInterface; | ||
| 10 | } | ||
| 11 | |||
| 12 | namespace Tegra { | ||
| 13 | |||
| 14 | struct MaxwellDeviceMethods; | ||
| 15 | |||
| 16 | struct MaxwellDeviceTraits { | ||
| 17 | static constexpr size_t device_virtual_bits = 34; | ||
| 18 | using DeviceInterface = typename VideoCore::RasterizerInterface; | ||
| 19 | using DeviceMethods = MaxwellDeviceMethods; | ||
| 20 | }; | ||
| 21 | |||
| 22 | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; | ||
| 23 | |||
| 24 | } // namespace Tegra \ No newline at end of file | ||
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index 7c317a85d..c4c7a5883 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp | |||
| @@ -9,9 +9,12 @@ namespace Tegra { | |||
| 9 | namespace Host1x { | 9 | namespace Host1x { |
| 10 | 10 | ||
| 11 | Host1x::Host1x(Core::System& system_) | 11 | Host1x::Host1x(Core::System& system_) |
| 12 | : system{system_}, syncpoint_manager{}, memory_manager{system, 32, 12}, | 12 | : system{system_}, syncpoint_manager{}, |
| 13 | memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, | ||
| 13 | allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} | 14 | allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} |
| 14 | 15 | ||
| 16 | Host1x::~Host1x() = default; | ||
| 17 | |||
| 15 | } // namespace Host1x | 18 | } // namespace Host1x |
| 16 | 19 | ||
| 17 | } // namespace Tegra | 20 | } // namespace Tegra |
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 57082ae54..d72d97b7b 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 7 | 7 | ||
| 8 | #include "common/address_space.h" | 8 | #include "common/address_space.h" |
| 9 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 9 | #include "video_core/host1x/syncpoint_manager.h" | 10 | #include "video_core/host1x/syncpoint_manager.h" |
| 10 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 11 | 12 | ||
| @@ -20,6 +21,7 @@ namespace Host1x { | |||
| 20 | class Host1x { | 21 | class Host1x { |
| 21 | public: | 22 | public: |
| 22 | explicit Host1x(Core::System& system); | 23 | explicit Host1x(Core::System& system); |
| 24 | ~Host1x(); | ||
| 23 | 25 | ||
| 24 | SyncpointManager& GetSyncpointManager() { | 26 | SyncpointManager& GetSyncpointManager() { |
| 25 | return syncpoint_manager; | 27 | return syncpoint_manager; |
| @@ -29,14 +31,22 @@ public: | |||
| 29 | return syncpoint_manager; | 31 | return syncpoint_manager; |
| 30 | } | 32 | } |
| 31 | 33 | ||
| 32 | Tegra::MemoryManager& MemoryManager() { | 34 | Tegra::MaxwellDeviceMemoryManager& MemoryManager() { |
| 33 | return memory_manager; | 35 | return memory_manager; |
| 34 | } | 36 | } |
| 35 | 37 | ||
| 36 | const Tegra::MemoryManager& MemoryManager() const { | 38 | const Tegra::MaxwellDeviceMemoryManager& MemoryManager() const { |
| 37 | return memory_manager; | 39 | return memory_manager; |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 42 | Tegra::MemoryManager& GMMU() { | ||
| 43 | return gmmu_manager; | ||
| 44 | } | ||
| 45 | |||
| 46 | const Tegra::MemoryManager& GMMU() const { | ||
| 47 | return gmmu_manager; | ||
| 48 | } | ||
| 49 | |||
| 40 | Common::FlatAllocator<u32, 0, 32>& Allocator() { | 50 | Common::FlatAllocator<u32, 0, 32>& Allocator() { |
| 41 | return *allocator; | 51 | return *allocator; |
| 42 | } | 52 | } |
| @@ -48,7 +58,8 @@ public: | |||
| 48 | private: | 58 | private: |
| 49 | Core::System& system; | 59 | Core::System& system; |
| 50 | SyncpointManager syncpoint_manager; | 60 | SyncpointManager syncpoint_manager; |
| 51 | Tegra::MemoryManager memory_manager; | 61 | Tegra::MaxwellDeviceMemoryManager memory_manager; |
| 62 | Tegra::MemoryManager gmmu_manager; | ||
| 52 | std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator; | 63 | std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator; |
| 53 | }; | 64 | }; |
| 54 | 65 | ||
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 2a5eba415..d154746af 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -81,7 +81,7 @@ void Vic::Execute() { | |||
| 81 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); | 81 | LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); |
| 82 | return; | 82 | return; |
| 83 | } | 83 | } |
| 84 | const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; | 84 | const VicConfig config{host1x.GMMU().Read<u64>(config_struct_address + 0x20)}; |
| 85 | auto frame = nvdec_processor->GetFrame(); | 85 | auto frame = nvdec_processor->GetFrame(); |
| 86 | if (!frame) { | 86 | if (!frame) { |
| 87 | return; | 87 | return; |
| @@ -162,12 +162,12 @@ void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c | |||
| 162 | Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, | 162 | Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, |
| 163 | block_height, 0, width * 4); | 163 | block_height, 0, width * 4); |
| 164 | 164 | ||
| 165 | host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | 165 | host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); |
| 166 | } else { | 166 | } else { |
| 167 | // send pitch linear frame | 167 | // send pitch linear frame |
| 168 | const size_t linear_size = width * height * 4; | 168 | const size_t linear_size = width * height * 4; |
| 169 | host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 169 | host1x.GMMU().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, |
| 170 | linear_size); | 170 | linear_size); |
| 171 | } | 171 | } |
| 172 | } | 172 | } |
| 173 | 173 | ||
| @@ -193,8 +193,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c | |||
| 193 | const std::size_t dst = y * aligned_width; | 193 | const std::size_t dst = y * aligned_width; |
| 194 | std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); | 194 | std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); |
| 195 | } | 195 | } |
| 196 | host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), | 196 | host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), luma_buffer.size()); |
| 197 | luma_buffer.size()); | ||
| 198 | 197 | ||
| 199 | // Chroma | 198 | // Chroma |
| 200 | const std::size_t half_height = frame_height / 2; | 199 | const std::size_t half_height = frame_height / 2; |
| @@ -233,8 +232,8 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c | |||
| 233 | ASSERT(false); | 232 | ASSERT(false); |
| 234 | break; | 233 | break; |
| 235 | } | 234 | } |
| 236 | host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), | 235 | host1x.GMMU().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), |
| 237 | chroma_buffer.size()); | 236 | chroma_buffer.size()); |
| 238 | } | 237 | } |
| 239 | 238 | ||
| 240 | } // namespace Host1x | 239 | } // namespace Host1x |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d16040613..a52f8e486 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -7,25 +7,26 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/device_memory.h" | ||
| 11 | #include "core/hle/kernel/k_page_table.h" | 10 | #include "core/hle/kernel/k_page_table.h" |
| 12 | #include "core/hle/kernel/k_process.h" | 11 | #include "core/hle/kernel/k_process.h" |
| 12 | #include "video_core/guest_memory.h" | ||
| 13 | #include "video_core/host1x/host1x.h" | ||
| 13 | #include "video_core/invalidation_accumulator.h" | 14 | #include "video_core/invalidation_accumulator.h" |
| 14 | #include "video_core/memory_manager.h" | 15 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 16 | #include "video_core/renderer_base.h" | 17 | #include "video_core/renderer_base.h" |
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | using Core::Memory::GuestMemoryFlags; | 20 | using Tegra::Memory::GuestMemoryFlags; |
| 20 | 21 | ||
| 21 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | 22 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; |
| 22 | 23 | ||
| 23 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | 24 | MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, |
| 24 | u64 page_bits_) | 25 | u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_) |
| 25 | : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, | 26 | : system{system_}, memory{memory_}, address_space_bits{address_space_bits_}, |
| 26 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | 27 | page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, |
| 27 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | 28 | page_table{address_space_bits, address_space_bits + page_bits - 38, |
| 28 | page_bits != big_page_bits ? page_bits : 0}, | 29 | page_bits != big_page_bits ? page_bits : 0}, |
| 29 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( | 30 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
| 30 | 1, std::memory_order_acq_rel)}, | 31 | 1, std::memory_order_acq_rel)}, |
| 31 | accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { | 32 | accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { |
| @@ -42,11 +43,16 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 42 | big_page_table_mask = big_page_table_size - 1; | 43 | big_page_table_mask = big_page_table_size - 1; |
| 43 | 44 | ||
| 44 | big_entries.resize(big_page_table_size / 32, 0); | 45 | big_entries.resize(big_page_table_size / 32, 0); |
| 45 | big_page_table_cpu.resize(big_page_table_size); | 46 | big_page_table_dev.resize(big_page_table_size); |
| 46 | big_page_continuous.resize(big_page_table_size / continuous_bits, 0); | 47 | big_page_continuous.resize(big_page_table_size / continuous_bits, 0); |
| 47 | entries.resize(page_table_size / 32, 0); | 48 | entries.resize(page_table_size / 32, 0); |
| 48 | } | 49 | } |
| 49 | 50 | ||
| 51 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | ||
| 52 | u64 page_bits_) | ||
| 53 | : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_, | ||
| 54 | page_bits_) {} | ||
| 55 | |||
| 50 | MemoryManager::~MemoryManager() = default; | 56 | MemoryManager::~MemoryManager() = default; |
| 51 | 57 | ||
| 52 | template <bool is_big_page> | 58 | template <bool is_big_page> |
| @@ -100,7 +106,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu | |||
| 100 | } | 106 | } |
| 101 | 107 | ||
| 102 | template <MemoryManager::EntryType entry_type> | 108 | template <MemoryManager::EntryType entry_type> |
| 103 | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 109 | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 104 | PTEKind kind) { | 110 | PTEKind kind) { |
| 105 | [[maybe_unused]] u64 remaining_size{size}; | 111 | [[maybe_unused]] u64 remaining_size{size}; |
| 106 | if constexpr (entry_type == EntryType::Mapped) { | 112 | if constexpr (entry_type == EntryType::Mapped) { |
| @@ -114,9 +120,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 114 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); | 120 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); |
| 115 | } | 121 | } |
| 116 | if constexpr (entry_type == EntryType::Mapped) { | 122 | if constexpr (entry_type == EntryType::Mapped) { |
| 117 | const VAddr current_cpu_addr = cpu_addr + offset; | 123 | const DAddr current_dev_addr = dev_addr + offset; |
| 118 | const auto index = PageEntryIndex<false>(current_gpu_addr); | 124 | const auto index = PageEntryIndex<false>(current_gpu_addr); |
| 119 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | 125 | const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); |
| 120 | page_table[index] = sub_value; | 126 | page_table[index] = sub_value; |
| 121 | } | 127 | } |
| 122 | remaining_size -= page_size; | 128 | remaining_size -= page_size; |
| @@ -126,7 +132,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 126 | } | 132 | } |
| 127 | 133 | ||
| 128 | template <MemoryManager::EntryType entry_type> | 134 | template <MemoryManager::EntryType entry_type> |
| 129 | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | 135 | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, |
| 130 | size_t size, PTEKind kind) { | 136 | size_t size, PTEKind kind) { |
| 131 | [[maybe_unused]] u64 remaining_size{size}; | 137 | [[maybe_unused]] u64 remaining_size{size}; |
| 132 | for (u64 offset{}; offset < size; offset += big_page_size) { | 138 | for (u64 offset{}; offset < size; offset += big_page_size) { |
| @@ -137,20 +143,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 137 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); | 143 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); |
| 138 | } | 144 | } |
| 139 | if constexpr (entry_type == EntryType::Mapped) { | 145 | if constexpr (entry_type == EntryType::Mapped) { |
| 140 | const VAddr current_cpu_addr = cpu_addr + offset; | 146 | const DAddr current_dev_addr = dev_addr + offset; |
| 141 | const auto index = PageEntryIndex<true>(current_gpu_addr); | 147 | const auto index = PageEntryIndex<true>(current_gpu_addr); |
| 142 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | 148 | const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); |
| 143 | big_page_table_cpu[index] = sub_value; | 149 | big_page_table_dev[index] = sub_value; |
| 144 | const bool is_continuous = ([&] { | 150 | const bool is_continuous = ([&] { |
| 145 | uintptr_t base_ptr{ | 151 | uintptr_t base_ptr{ |
| 146 | reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; | 152 | reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))}; |
| 147 | if (base_ptr == 0) { | 153 | if (base_ptr == 0) { |
| 148 | return false; | 154 | return false; |
| 149 | } | 155 | } |
| 150 | for (VAddr start_cpu = current_cpu_addr + page_size; | 156 | for (DAddr start_cpu = current_dev_addr + page_size; |
| 151 | start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { | 157 | start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) { |
| 152 | base_ptr += page_size; | 158 | base_ptr += page_size; |
| 153 | auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); | 159 | auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu)); |
| 154 | if (next_ptr == 0 || base_ptr != next_ptr) { | 160 | if (next_ptr == 0 || base_ptr != next_ptr) { |
| 155 | return false; | 161 | return false; |
| 156 | } | 162 | } |
| @@ -172,12 +178,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
| 172 | rasterizer = rasterizer_; | 178 | rasterizer = rasterizer_; |
| 173 | } | 179 | } |
| 174 | 180 | ||
| 175 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, | 181 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind, |
| 176 | bool is_big_pages) { | 182 | bool is_big_pages) { |
| 177 | if (is_big_pages) [[likely]] { | 183 | if (is_big_pages) [[likely]] { |
| 178 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | 184 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); |
| 179 | } | 185 | } |
| 180 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | 186 | return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); |
| 181 | } | 187 | } |
| 182 | 188 | ||
| 183 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | 189 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { |
| @@ -202,7 +208,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 202 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 208 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| 203 | } | 209 | } |
| 204 | 210 | ||
| 205 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | 211 | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { |
| 206 | if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { | 212 | if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { |
| 207 | return std::nullopt; | 213 | return std::nullopt; |
| 208 | } | 214 | } |
| @@ -211,17 +217,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | |||
| 211 | return std::nullopt; | 217 | return std::nullopt; |
| 212 | } | 218 | } |
| 213 | 219 | ||
| 214 | const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | 220 | const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) |
| 215 | << cpu_page_bits; | 221 | << cpu_page_bits; |
| 216 | return cpu_addr_base + (gpu_addr & page_mask); | 222 | return dev_addr_base + (gpu_addr & page_mask); |
| 217 | } | 223 | } |
| 218 | 224 | ||
| 219 | const VAddr cpu_addr_base = | 225 | const DAddr dev_addr_base = |
| 220 | static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | 226 | static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; |
| 221 | return cpu_addr_base + (gpu_addr & big_page_mask); | 227 | return dev_addr_base + (gpu_addr & big_page_mask); |
| 222 | } | 228 | } |
| 223 | 229 | ||
| 224 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | 230 | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { |
| 225 | size_t page_index{addr >> page_bits}; | 231 | size_t page_index{addr >> page_bits}; |
| 226 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | 232 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; |
| 227 | while (page_index < page_last) { | 233 | while (page_index < page_last) { |
| @@ -274,7 +280,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | |||
| 274 | return {}; | 280 | return {}; |
| 275 | } | 281 | } |
| 276 | 282 | ||
| 277 | return memory.GetPointer(*address); | 283 | return memory.GetPointer<u8>(*address); |
| 278 | } | 284 | } |
| 279 | 285 | ||
| 280 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | 286 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { |
| @@ -283,7 +289,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | |||
| 283 | return {}; | 289 | return {}; |
| 284 | } | 290 | } |
| 285 | 291 | ||
| 286 | return memory.GetPointer(*address); | 292 | return memory.GetPointer<u8>(*address); |
| 287 | } | 293 | } |
| 288 | 294 | ||
| 289 | #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. | 295 | #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. |
| @@ -367,25 +373,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
| 367 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 373 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 368 | }; | 374 | }; |
| 369 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 375 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 370 | const VAddr cpu_addr_base = | 376 | const DAddr dev_addr_base = |
| 371 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 377 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 372 | if constexpr (is_safe) { | 378 | if constexpr (is_safe) { |
| 373 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 379 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 374 | } | 380 | } |
| 375 | u8* physical = memory.GetPointer(cpu_addr_base); | 381 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 376 | std::memcpy(dest_buffer, physical, copy_amount); | 382 | std::memcpy(dest_buffer, physical, copy_amount); |
| 377 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 383 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 378 | }; | 384 | }; |
| 379 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 385 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 380 | const VAddr cpu_addr_base = | 386 | const DAddr dev_addr_base = |
| 381 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 387 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 382 | if constexpr (is_safe) { | 388 | if constexpr (is_safe) { |
| 383 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 389 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 384 | } | 390 | } |
| 385 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { | 391 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { |
| 386 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | 392 | memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount); |
| 387 | } else { | 393 | } else { |
| 388 | u8* physical = memory.GetPointer(cpu_addr_base); | 394 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 389 | std::memcpy(dest_buffer, physical, copy_amount); | 395 | std::memcpy(dest_buffer, physical, copy_amount); |
| 390 | } | 396 | } |
| 391 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 397 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| @@ -416,25 +422,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe | |||
| 416 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 422 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| 417 | }; | 423 | }; |
| 418 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 424 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 419 | const VAddr cpu_addr_base = | 425 | const DAddr dev_addr_base = |
| 420 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 426 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 421 | if constexpr (is_safe) { | 427 | if constexpr (is_safe) { |
| 422 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 428 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 423 | } | 429 | } |
| 424 | u8* physical = memory.GetPointer(cpu_addr_base); | 430 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 425 | std::memcpy(physical, src_buffer, copy_amount); | 431 | std::memcpy(physical, src_buffer, copy_amount); |
| 426 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 432 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| 427 | }; | 433 | }; |
| 428 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 434 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 429 | const VAddr cpu_addr_base = | 435 | const DAddr dev_addr_base = |
| 430 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 436 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 431 | if constexpr (is_safe) { | 437 | if constexpr (is_safe) { |
| 432 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 438 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 433 | } | 439 | } |
| 434 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { | 440 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { |
| 435 | memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | 441 | memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount); |
| 436 | } else { | 442 | } else { |
| 437 | u8* physical = memory.GetPointer(cpu_addr_base); | 443 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 438 | std::memcpy(physical, src_buffer, copy_amount); | 444 | std::memcpy(physical, src_buffer, copy_amount); |
| 439 | } | 445 | } |
| 440 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 446 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| @@ -470,14 +476,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | |||
| 470 | [[maybe_unused]] std::size_t copy_amount) {}; | 476 | [[maybe_unused]] std::size_t copy_amount) {}; |
| 471 | 477 | ||
| 472 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 478 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 473 | const VAddr cpu_addr_base = | 479 | const DAddr dev_addr_base = |
| 474 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 480 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 475 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 481 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 476 | }; | 482 | }; |
| 477 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 483 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 478 | const VAddr cpu_addr_base = | 484 | const DAddr dev_addr_base = |
| 479 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 485 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 480 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 486 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 481 | }; | 487 | }; |
| 482 | auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | 488 | auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, |
| 483 | std::size_t copy_amount) { | 489 | std::size_t copy_amount) { |
| @@ -495,15 +501,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
| 495 | [[maybe_unused]] std::size_t copy_amount) { return false; }; | 501 | [[maybe_unused]] std::size_t copy_amount) { return false; }; |
| 496 | 502 | ||
| 497 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 503 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 498 | const VAddr cpu_addr_base = | 504 | const DAddr dev_addr_base = |
| 499 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 505 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 500 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | 506 | result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); |
| 501 | return result; | 507 | return result; |
| 502 | }; | 508 | }; |
| 503 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 509 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 504 | const VAddr cpu_addr_base = | 510 | const DAddr dev_addr_base = |
| 505 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 511 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 506 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | 512 | result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); |
| 507 | return result; | 513 | return result; |
| 508 | }; | 514 | }; |
| 509 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 515 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -517,7 +523,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
| 517 | } | 523 | } |
| 518 | 524 | ||
| 519 | size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | 525 | size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { |
| 520 | std::optional<VAddr> old_page_addr{}; | 526 | std::optional<DAddr> old_page_addr{}; |
| 521 | size_t range_so_far = 0; | 527 | size_t range_so_far = 0; |
| 522 | bool result{false}; | 528 | bool result{false}; |
| 523 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | 529 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |
| @@ -526,24 +532,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 526 | return true; | 532 | return true; |
| 527 | }; | 533 | }; |
| 528 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 534 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 529 | const VAddr cpu_addr_base = | 535 | const DAddr dev_addr_base = |
| 530 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 536 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 531 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 537 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 532 | result = true; | 538 | result = true; |
| 533 | return true; | 539 | return true; |
| 534 | } | 540 | } |
| 535 | range_so_far += copy_amount; | 541 | range_so_far += copy_amount; |
| 536 | old_page_addr = {cpu_addr_base + copy_amount}; | 542 | old_page_addr = {dev_addr_base + copy_amount}; |
| 537 | return false; | 543 | return false; |
| 538 | }; | 544 | }; |
| 539 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 545 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 540 | const VAddr cpu_addr_base = | 546 | const DAddr dev_addr_base = |
| 541 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 547 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 542 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 548 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 543 | return true; | 549 | return true; |
| 544 | } | 550 | } |
| 545 | range_so_far += copy_amount; | 551 | range_so_far += copy_amount; |
| 546 | old_page_addr = {cpu_addr_base + copy_amount}; | 552 | old_page_addr = {dev_addr_base + copy_amount}; |
| 547 | return false; | 553 | return false; |
| 548 | }; | 554 | }; |
| 549 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 555 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -568,14 +574,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 568 | [[maybe_unused]] std::size_t copy_amount) {}; | 574 | [[maybe_unused]] std::size_t copy_amount) {}; |
| 569 | 575 | ||
| 570 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 576 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 571 | const VAddr cpu_addr_base = | 577 | const DAddr dev_addr_base = |
| 572 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 578 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 573 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 579 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 574 | }; | 580 | }; |
| 575 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 581 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 576 | const VAddr cpu_addr_base = | 582 | const DAddr dev_addr_base = |
| 577 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 583 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 578 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 584 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 579 | }; | 585 | }; |
| 580 | auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, | 586 | auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, |
| 581 | std::size_t copy_amount) { | 587 | std::size_t copy_amount) { |
| @@ -587,7 +593,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 587 | 593 | ||
| 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | 594 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, |
| 589 | VideoCommon::CacheType which) { | 595 | VideoCommon::CacheType which) { |
| 590 | Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | 596 | Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( |
| 591 | *this, gpu_src_addr, size); | 597 | *this, gpu_src_addr, size); |
| 592 | data.SetAddressAndSize(gpu_dest_addr, size); | 598 | data.SetAddressAndSize(gpu_dest_addr, size); |
| 593 | FlushRegion(gpu_dest_addr, size, which); | 599 | FlushRegion(gpu_dest_addr, size, which); |
| @@ -600,18 +606,18 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| 600 | const std::size_t page{(page_index & big_page_mask) + size}; | 606 | const std::size_t page{(page_index & big_page_mask) + size}; |
| 601 | return page <= big_page_size; | 607 | return page <= big_page_size; |
| 602 | } | 608 | } |
| 603 | const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size}; | 609 | const std::size_t page{(gpu_addr & Core::DEVICE_PAGEMASK) + size}; |
| 604 | return page <= Core::Memory::YUZU_PAGESIZE; | 610 | return page <= Core::DEVICE_PAGESIZE; |
| 605 | } | 611 | } |
| 606 | if (GetEntry<false>(gpu_addr) != EntryType::Mapped) { | 612 | if (GetEntry<false>(gpu_addr) != EntryType::Mapped) { |
| 607 | return false; | 613 | return false; |
| 608 | } | 614 | } |
| 609 | const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size}; | 615 | const std::size_t page{(gpu_addr & Core::DEVICE_PAGEMASK) + size}; |
| 610 | return page <= Core::Memory::YUZU_PAGESIZE; | 616 | return page <= Core::DEVICE_PAGESIZE; |
| 611 | } | 617 | } |
| 612 | 618 | ||
| 613 | bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { | 619 | bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { |
| 614 | std::optional<VAddr> old_page_addr{}; | 620 | std::optional<DAddr> old_page_addr{}; |
| 615 | bool result{true}; | 621 | bool result{true}; |
| 616 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | 622 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |
| 617 | std::size_t copy_amount) { | 623 | std::size_t copy_amount) { |
| @@ -619,23 +625,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const | |||
| 619 | return true; | 625 | return true; |
| 620 | }; | 626 | }; |
| 621 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 627 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 622 | const VAddr cpu_addr_base = | 628 | const DAddr dev_addr_base = |
| 623 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 629 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 624 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 630 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 625 | result = false; | 631 | result = false; |
| 626 | return true; | 632 | return true; |
| 627 | } | 633 | } |
| 628 | old_page_addr = {cpu_addr_base + copy_amount}; | 634 | old_page_addr = {dev_addr_base + copy_amount}; |
| 629 | return false; | 635 | return false; |
| 630 | }; | 636 | }; |
| 631 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 637 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 632 | const VAddr cpu_addr_base = | 638 | const DAddr dev_addr_base = |
| 633 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 639 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 634 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 640 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 635 | result = false; | 641 | result = false; |
| 636 | return true; | 642 | return true; |
| 637 | } | 643 | } |
| 638 | old_page_addr = {cpu_addr_base + copy_amount}; | 644 | old_page_addr = {dev_addr_base + copy_amount}; |
| 639 | return false; | 645 | return false; |
| 640 | }; | 646 | }; |
| 641 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 647 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -678,11 +684,11 @@ template <bool is_gpu_address> | |||
| 678 | void MemoryManager::GetSubmappedRangeImpl( | 684 | void MemoryManager::GetSubmappedRangeImpl( |
| 679 | GPUVAddr gpu_addr, std::size_t size, | 685 | GPUVAddr gpu_addr, std::size_t size, |
| 680 | boost::container::small_vector< | 686 | boost::container::small_vector< |
| 681 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) | 687 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result) |
| 682 | const { | 688 | const { |
| 683 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | 689 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>> |
| 684 | last_segment{}; | 690 | last_segment{}; |
| 685 | std::optional<VAddr> old_page_addr{}; | 691 | std::optional<DAddr> old_page_addr{}; |
| 686 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | 692 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
| 687 | [[maybe_unused]] std::size_t offset, | 693 | [[maybe_unused]] std::size_t offset, |
| 688 | [[maybe_unused]] std::size_t copy_amount) { | 694 | [[maybe_unused]] std::size_t copy_amount) { |
| @@ -694,20 +700,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
| 694 | const auto extend_size_big = [this, &split, &old_page_addr, | 700 | const auto extend_size_big = [this, &split, &old_page_addr, |
| 695 | &last_segment](std::size_t page_index, std::size_t offset, | 701 | &last_segment](std::size_t page_index, std::size_t offset, |
| 696 | std::size_t copy_amount) { | 702 | std::size_t copy_amount) { |
| 697 | const VAddr cpu_addr_base = | 703 | const DAddr dev_addr_base = |
| 698 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 704 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 699 | if (old_page_addr) { | 705 | if (old_page_addr) { |
| 700 | if (*old_page_addr != cpu_addr_base) { | 706 | if (*old_page_addr != dev_addr_base) { |
| 701 | split(0, 0, 0); | 707 | split(0, 0, 0); |
| 702 | } | 708 | } |
| 703 | } | 709 | } |
| 704 | old_page_addr = {cpu_addr_base + copy_amount}; | 710 | old_page_addr = {dev_addr_base + copy_amount}; |
| 705 | if (!last_segment) { | 711 | if (!last_segment) { |
| 706 | if constexpr (is_gpu_address) { | 712 | if constexpr (is_gpu_address) { |
| 707 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | 713 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
| 708 | last_segment = {new_base_addr, copy_amount}; | 714 | last_segment = {new_base_addr, copy_amount}; |
| 709 | } else { | 715 | } else { |
| 710 | last_segment = {cpu_addr_base, copy_amount}; | 716 | last_segment = {dev_addr_base, copy_amount}; |
| 711 | } | 717 | } |
| 712 | } else { | 718 | } else { |
| 713 | last_segment->second += copy_amount; | 719 | last_segment->second += copy_amount; |
| @@ -716,20 +722,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
| 716 | const auto extend_size_short = [this, &split, &old_page_addr, | 722 | const auto extend_size_short = [this, &split, &old_page_addr, |
| 717 | &last_segment](std::size_t page_index, std::size_t offset, | 723 | &last_segment](std::size_t page_index, std::size_t offset, |
| 718 | std::size_t copy_amount) { | 724 | std::size_t copy_amount) { |
| 719 | const VAddr cpu_addr_base = | 725 | const DAddr dev_addr_base = |
| 720 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 726 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 721 | if (old_page_addr) { | 727 | if (old_page_addr) { |
| 722 | if (*old_page_addr != cpu_addr_base) { | 728 | if (*old_page_addr != dev_addr_base) { |
| 723 | split(0, 0, 0); | 729 | split(0, 0, 0); |
| 724 | } | 730 | } |
| 725 | } | 731 | } |
| 726 | old_page_addr = {cpu_addr_base + copy_amount}; | 732 | old_page_addr = {dev_addr_base + copy_amount}; |
| 727 | if (!last_segment) { | 733 | if (!last_segment) { |
| 728 | if constexpr (is_gpu_address) { | 734 | if constexpr (is_gpu_address) { |
| 729 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | 735 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
| 730 | last_segment = {new_base_addr, copy_amount}; | 736 | last_segment = {new_base_addr, copy_amount}; |
| 731 | } else { | 737 | } else { |
| 732 | last_segment = {cpu_addr_base, copy_amount}; | 738 | last_segment = {dev_addr_base, copy_amount}; |
| 733 | } | 739 | } |
| 734 | } else { | 740 | } else { |
| 735 | last_segment->second += copy_amount; | 741 | last_segment->second += copy_amount; |
| @@ -756,9 +762,12 @@ void MemoryManager::FlushCaching() { | |||
| 756 | } | 762 | } |
| 757 | 763 | ||
| 758 | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | 764 | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { |
| 759 | auto cpu_addr = GpuToCpuAddress(src_addr); | 765 | if (!IsContinuousRange(src_addr, size)) { |
| 760 | if (cpu_addr) { | 766 | return nullptr; |
| 761 | return memory.GetSpan(*cpu_addr, size); | 767 | } |
| 768 | auto dev_addr = GpuToCpuAddress(src_addr); | ||
| 769 | if (dev_addr) { | ||
| 770 | return memory.GetSpan(*dev_addr, size); | ||
| 762 | } | 771 | } |
| 763 | return nullptr; | 772 | return nullptr; |
| 764 | } | 773 | } |
| @@ -767,9 +776,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | |||
| 767 | if (!IsContinuousRange(src_addr, size)) { | 776 | if (!IsContinuousRange(src_addr, size)) { |
| 768 | return nullptr; | 777 | return nullptr; |
| 769 | } | 778 | } |
| 770 | auto cpu_addr = GpuToCpuAddress(src_addr); | 779 | auto dev_addr = GpuToCpuAddress(src_addr); |
| 771 | if (cpu_addr) { | 780 | if (dev_addr) { |
| 772 | return memory.GetSpan(*cpu_addr, size); | 781 | return memory.GetSpan(*dev_addr, size); |
| 773 | } | 782 | } |
| 774 | return nullptr; | 783 | return nullptr; |
| 775 | } | 784 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 9b311b9e5..c5255f36c 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -15,8 +15,8 @@ | |||
| 15 | #include "common/range_map.h" | 15 | #include "common/range_map.h" |
| 16 | #include "common/scratch_buffer.h" | 16 | #include "common/scratch_buffer.h" |
| 17 | #include "common/virtual_buffer.h" | 17 | #include "common/virtual_buffer.h" |
| 18 | #include "core/memory.h" | ||
| 19 | #include "video_core/cache_types.h" | 18 | #include "video_core/cache_types.h" |
| 19 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 20 | #include "video_core/pte_kind.h" | 20 | #include "video_core/pte_kind.h" |
| 21 | 21 | ||
| 22 | namespace VideoCore { | 22 | namespace VideoCore { |
| @@ -28,10 +28,6 @@ class InvalidationAccumulator; | |||
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | namespace Core { | 30 | namespace Core { |
| 31 | class DeviceMemory; | ||
| 32 | namespace Memory { | ||
| 33 | class Memory; | ||
| 34 | } // namespace Memory | ||
| 35 | class System; | 31 | class System; |
| 36 | } // namespace Core | 32 | } // namespace Core |
| 37 | 33 | ||
| @@ -41,6 +37,9 @@ class MemoryManager final { | |||
| 41 | public: | 37 | public: |
| 42 | explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, | 38 | explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, |
| 43 | u64 big_page_bits_ = 16, u64 page_bits_ = 12); | 39 | u64 big_page_bits_ = 16, u64 page_bits_ = 12); |
| 40 | explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, | ||
| 41 | u64 address_space_bits_ = 40, u64 big_page_bits_ = 16, | ||
| 42 | u64 page_bits_ = 12); | ||
| 44 | ~MemoryManager(); | 43 | ~MemoryManager(); |
| 45 | 44 | ||
| 46 | size_t GetID() const { | 45 | size_t GetID() const { |
| @@ -50,9 +49,9 @@ public: | |||
| 50 | /// Binds a renderer to the memory manager. | 49 | /// Binds a renderer to the memory manager. |
| 51 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | 50 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 52 | 51 | ||
| 53 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 52 | [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 54 | 53 | ||
| 55 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | 54 | [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; |
| 56 | 55 | ||
| 57 | template <typename T> | 56 | template <typename T> |
| 58 | [[nodiscard]] T Read(GPUVAddr addr) const; | 57 | [[nodiscard]] T Read(GPUVAddr addr) const; |
| @@ -69,7 +68,7 @@ public: | |||
| 69 | if (!address) { | 68 | if (!address) { |
| 70 | return {}; | 69 | return {}; |
| 71 | } | 70 | } |
| 72 | return memory.GetPointer(*address); | 71 | return memory.GetPointer<T>(*address); |
| 73 | } | 72 | } |
| 74 | 73 | ||
| 75 | template <typename T> | 74 | template <typename T> |
| @@ -110,7 +109,7 @@ public: | |||
| 110 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | 109 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 111 | 110 | ||
| 112 | /** | 111 | /** |
| 113 | * Checks if a gpu region is mapped by a single range of cpu addresses. | 112 | * Checks if a gpu region is mapped by a single range of device addresses. |
| 114 | */ | 113 | */ |
| 115 | [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; | 114 | [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 116 | 115 | ||
| @@ -120,14 +119,14 @@ public: | |||
| 120 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | 119 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 121 | 120 | ||
| 122 | /** | 121 | /** |
| 123 | * Returns a vector with all the subranges of cpu addresses mapped beneath. | 122 | * Returns a vector with all the subranges of device addresses mapped beneath. |
| 124 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | 123 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty |
| 125 | * vector will be returned; | 124 | * vector will be returned; |
| 126 | */ | 125 | */ |
| 127 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( | 126 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( |
| 128 | GPUVAddr gpu_addr, std::size_t size) const; | 127 | GPUVAddr gpu_addr, std::size_t size) const; |
| 129 | 128 | ||
| 130 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | 129 | GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, |
| 131 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | 130 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |
| 132 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); | 131 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); |
| 133 | void Unmap(GPUVAddr gpu_addr, std::size_t size); | 132 | void Unmap(GPUVAddr gpu_addr, std::size_t size); |
| @@ -186,12 +185,11 @@ private: | |||
| 186 | void GetSubmappedRangeImpl( | 185 | void GetSubmappedRangeImpl( |
| 187 | GPUVAddr gpu_addr, std::size_t size, | 186 | GPUVAddr gpu_addr, std::size_t size, |
| 188 | boost::container::small_vector< | 187 | boost::container::small_vector< |
| 189 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& | 188 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& |
| 190 | result) const; | 189 | result) const; |
| 191 | 190 | ||
| 192 | Core::System& system; | 191 | Core::System& system; |
| 193 | Core::Memory::Memory& memory; | 192 | MaxwellDeviceMemoryManager& memory; |
| 194 | Core::DeviceMemory& device_memory; | ||
| 195 | 193 | ||
| 196 | const u64 address_space_bits; | 194 | const u64 address_space_bits; |
| 197 | const u64 page_bits; | 195 | const u64 page_bits; |
| @@ -218,11 +216,11 @@ private: | |||
| 218 | std::vector<u64> big_entries; | 216 | std::vector<u64> big_entries; |
| 219 | 217 | ||
| 220 | template <EntryType entry_type> | 218 | template <EntryType entry_type> |
| 221 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 219 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 222 | PTEKind kind); | 220 | PTEKind kind); |
| 223 | 221 | ||
| 224 | template <EntryType entry_type> | 222 | template <EntryType entry_type> |
| 225 | GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 223 | GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 226 | PTEKind kind); | 224 | PTEKind kind); |
| 227 | 225 | ||
| 228 | template <bool is_big_page> | 226 | template <bool is_big_page> |
| @@ -233,11 +231,11 @@ private: | |||
| 233 | 231 | ||
| 234 | Common::MultiLevelPageTable<u32> page_table; | 232 | Common::MultiLevelPageTable<u32> page_table; |
| 235 | Common::RangeMap<GPUVAddr, PTEKind> kind_map; | 233 | Common::RangeMap<GPUVAddr, PTEKind> kind_map; |
| 236 | Common::VirtualBuffer<u32> big_page_table_cpu; | 234 | Common::VirtualBuffer<u32> big_page_table_dev; |
| 237 | 235 | ||
| 238 | std::vector<u64> big_page_continuous; | 236 | std::vector<u64> big_page_continuous; |
| 239 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; | 237 | boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{}; |
| 240 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; | 238 | boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{}; |
| 241 | 239 | ||
| 242 | mutable std::mutex guard; | 240 | mutable std::mutex guard; |
| 243 | 241 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index a64404ce4..4861b123a 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -18,9 +18,9 @@ | |||
| 18 | 18 | ||
| 19 | #include "common/assert.h" | 19 | #include "common/assert.h" |
| 20 | #include "common/settings.h" | 20 | #include "common/settings.h" |
| 21 | #include "core/memory.h" | ||
| 22 | #include "video_core/control/channel_state_cache.h" | 21 | #include "video_core/control/channel_state_cache.h" |
| 23 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 24 | #include "video_core/memory_manager.h" | 24 | #include "video_core/memory_manager.h" |
| 25 | #include "video_core/rasterizer_interface.h" | 25 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | 26 | #include "video_core/texture_cache/slot_vector.h" |
| @@ -102,18 +102,19 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo | |||
| 102 | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 102 | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 103 | public: | 103 | public: |
| 104 | explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | 104 | explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, |
| 105 | Core::Memory::Memory& cpu_memory_) | 105 | Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 106 | : rasterizer{rasterizer_}, | 106 | : rasterizer{rasterizer_}, |
| 107 | // Use reinterpret_cast instead of static_cast as workaround for | 107 | // Use reinterpret_cast instead of static_cast as workaround for |
| 108 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | 108 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |
| 109 | cpu_memory{cpu_memory_}, streams{{ | 109 | device_memory{device_memory_}, |
| 110 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 110 | streams{{ |
| 111 | VideoCore::QueryType::SamplesPassed}}, | 111 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 112 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 112 | VideoCore::QueryType::SamplesPassed}}, |
| 113 | VideoCore::QueryType::PrimitivesGenerated}}, | 113 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 114 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 114 | VideoCore::QueryType::PrimitivesGenerated}}, |
| 115 | VideoCore::QueryType::TfbPrimitivesWritten}}, | 115 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 116 | }} { | 116 | VideoCore::QueryType::TfbPrimitivesWritten}}, |
| 117 | }} { | ||
| 117 | (void)slot_async_jobs.insert(); // Null value | 118 | (void)slot_async_jobs.insert(); // Null value |
| 118 | } | 119 | } |
| 119 | 120 | ||
| @@ -322,13 +323,14 @@ private: | |||
| 322 | local_lock.unlock(); | 323 | local_lock.unlock(); |
| 323 | if (timestamp) { | 324 | if (timestamp) { |
| 324 | u64 timestamp_value = *timestamp; | 325 | u64 timestamp_value = *timestamp; |
| 325 | cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | 326 | device_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, |
| 326 | cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | 327 | sizeof(u64)); |
| 328 | device_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||
| 327 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | 329 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, |
| 328 | VideoCommon::CacheType::NoQueryCache); | 330 | VideoCommon::CacheType::NoQueryCache); |
| 329 | } else { | 331 | } else { |
| 330 | u32 small_value = static_cast<u32>(value); | 332 | u32 small_value = static_cast<u32>(value); |
| 331 | cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | 333 | device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); |
| 332 | rasterizer.InvalidateRegion(address, sizeof(u32), | 334 | rasterizer.InvalidateRegion(address, sizeof(u32), |
| 333 | VideoCommon::CacheType::NoQueryCache); | 335 | VideoCommon::CacheType::NoQueryCache); |
| 334 | } | 336 | } |
| @@ -342,7 +344,7 @@ private: | |||
| 342 | SlotVector<AsyncJob> slot_async_jobs; | 344 | SlotVector<AsyncJob> slot_async_jobs; |
| 343 | 345 | ||
| 344 | VideoCore::RasterizerInterface& rasterizer; | 346 | VideoCore::RasterizerInterface& rasterizer; |
| 345 | Core::Memory::Memory& cpu_memory; | 347 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 346 | 348 | ||
| 347 | mutable std::recursive_mutex mutex; | 349 | mutable std::recursive_mutex mutex; |
| 348 | 350 | ||
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 1d786b3a7..aca6a6447 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | |||
| 23 | 23 | ||
| 24 | class QueryBase { | 24 | class QueryBase { |
| 25 | public: | 25 | public: |
| 26 | VAddr guest_address{}; | 26 | DAddr guest_address{}; |
| 27 | QueryFlagBits flags{}; | 27 | QueryFlagBits flags{}; |
| 28 | u64 value{}; | 28 | u64 value{}; |
| 29 | 29 | ||
| @@ -32,7 +32,7 @@ protected: | |||
| 32 | QueryBase() = default; | 32 | QueryBase() = default; |
| 33 | 33 | ||
| 34 | // Parameterized constructor | 34 | // Parameterized constructor |
| 35 | QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | 35 | QueryBase(DAddr address, QueryFlagBits flags_, u64 value_) |
| 36 | : guest_address(address), flags(flags_), value{value_} {} | 36 | : guest_address(address), flags(flags_), value{value_} {} |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 94f0c4466..08b779055 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -15,9 +15,9 @@ | |||
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/scope_exit.h" | 16 | #include "common/scope_exit.h" |
| 17 | #include "common/settings.h" | 17 | #include "common/settings.h" |
| 18 | #include "core/memory.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/gpu.h" | 19 | #include "video_core/gpu.h" |
| 20 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 21 | #include "video_core/memory_manager.h" | 21 | #include "video_core/memory_manager.h" |
| 22 | #include "video_core/query_cache/bank_base.h" | 22 | #include "video_core/query_cache/bank_base.h" |
| 23 | #include "video_core/query_cache/query_base.h" | 23 | #include "video_core/query_cache/query_base.h" |
| @@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 113 | using RuntimeType = typename Traits::RuntimeType; | 113 | using RuntimeType = typename Traits::RuntimeType; |
| 114 | 114 | ||
| 115 | QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | 115 | QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, |
| 116 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) | 116 | Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, |
| 117 | Tegra::GPU& gpu_) | ||
| 117 | : owner{owner_}, rasterizer{rasterizer_}, | 118 | : owner{owner_}, rasterizer{rasterizer_}, |
| 118 | cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { | 119 | device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { |
| 119 | streamer_mask = 0; | 120 | streamer_mask = 0; |
| 120 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | 121 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |
| 121 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | 122 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |
| @@ -158,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 158 | 159 | ||
| 159 | QueryCacheBase<Traits>* owner; | 160 | QueryCacheBase<Traits>* owner; |
| 160 | VideoCore::RasterizerInterface& rasterizer; | 161 | VideoCore::RasterizerInterface& rasterizer; |
| 161 | Core::Memory::Memory& cpu_memory; | 162 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 162 | RuntimeType& runtime; | 163 | RuntimeType& runtime; |
| 163 | Tegra::GPU& gpu; | 164 | Tegra::GPU& gpu; |
| 164 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | 165 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |
| @@ -171,10 +172,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 171 | template <typename Traits> | 172 | template <typename Traits> |
| 172 | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | 173 | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, |
| 173 | VideoCore::RasterizerInterface& rasterizer_, | 174 | VideoCore::RasterizerInterface& rasterizer_, |
| 174 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) | 175 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 176 | RuntimeType& runtime_) | ||
| 175 | : cached_queries{} { | 177 | : cached_queries{} { |
| 176 | impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | 178 | impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( |
| 177 | this, rasterizer_, cpu_memory_, runtime_, gpu_); | 179 | this, rasterizer_, device_memory_, runtime_, gpu_); |
| 178 | } | 180 | } |
| 179 | 181 | ||
| 180 | template <typename Traits> | 182 | template <typename Traits> |
| @@ -240,7 +242,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 240 | if (!cpu_addr_opt) [[unlikely]] { | 242 | if (!cpu_addr_opt) [[unlikely]] { |
| 241 | return; | 243 | return; |
| 242 | } | 244 | } |
| 243 | VAddr cpu_addr = *cpu_addr_opt; | 245 | DAddr cpu_addr = *cpu_addr_opt; |
| 244 | const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | 246 | const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); |
| 245 | auto* query = streamer->GetQuery(new_query_id); | 247 | auto* query = streamer->GetQuery(new_query_id); |
| 246 | if (is_fence) { | 248 | if (is_fence) { |
| @@ -250,13 +252,12 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 250 | query_location.stream_id.Assign(static_cast<u32>(streamer_id)); | 252 | query_location.stream_id.Assign(static_cast<u32>(streamer_id)); |
| 251 | query_location.query_id.Assign(static_cast<u32>(new_query_id)); | 253 | query_location.query_id.Assign(static_cast<u32>(new_query_id)); |
| 252 | const auto gen_caching_indexing = [](VAddr cur_addr) { | 254 | const auto gen_caching_indexing = [](VAddr cur_addr) { |
| 253 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | 255 | return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS, |
| 254 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | 256 | static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK)); |
| 255 | }; | 257 | }; |
| 256 | u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | 258 | u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr); |
| 257 | u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | 259 | u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8); |
| 258 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | 260 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
| 259 | |||
| 260 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | 261 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |
| 261 | pointer, pointer_timestamp] { | 262 | pointer, pointer_timestamp] { |
| 262 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | 263 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |
| @@ -323,8 +324,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 323 | template <typename Traits> | 324 | template <typename Traits> |
| 324 | void QueryCacheBase<Traits>::UnregisterPending() { | 325 | void QueryCacheBase<Traits>::UnregisterPending() { |
| 325 | const auto gen_caching_indexing = [](VAddr cur_addr) { | 326 | const auto gen_caching_indexing = [](VAddr cur_addr) { |
| 326 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | 327 | return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS, |
| 327 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | 328 | static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK)); |
| 328 | }; | 329 | }; |
| 329 | std::scoped_lock lock(cache_mutex); | 330 | std::scoped_lock lock(cache_mutex); |
| 330 | for (QueryLocation loc : impl->pending_unregister) { | 331 | for (QueryLocation loc : impl->pending_unregister) { |
| @@ -388,7 +389,7 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | |||
| 388 | } | 389 | } |
| 389 | VAddr cpu_addr = *cpu_addr_opt; | 390 | VAddr cpu_addr = *cpu_addr_opt; |
| 390 | std::scoped_lock lock(cache_mutex); | 391 | std::scoped_lock lock(cache_mutex); |
| 391 | auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); | 392 | auto it1 = cached_queries.find(cpu_addr >> Core::DEVICE_PAGEBITS); |
| 392 | if (it1 == cached_queries.end()) { | 393 | if (it1 == cached_queries.end()) { |
| 393 | return VideoCommon::LookupData{ | 394 | return VideoCommon::LookupData{ |
| 394 | .address = cpu_addr, | 395 | .address = cpu_addr, |
| @@ -396,10 +397,10 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | |||
| 396 | }; | 397 | }; |
| 397 | } | 398 | } |
| 398 | auto& sub_container = it1->second; | 399 | auto& sub_container = it1->second; |
| 399 | auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); | 400 | auto it_current = sub_container.find(cpu_addr & Core::DEVICE_PAGEMASK); |
| 400 | 401 | ||
| 401 | if (it_current == sub_container.end()) { | 402 | if (it_current == sub_container.end()) { |
| 402 | auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); | 403 | auto it_current_2 = sub_container.find((cpu_addr & Core::DEVICE_PAGEMASK) + 4); |
| 403 | if (it_current_2 == sub_container.end()) { | 404 | if (it_current_2 == sub_container.end()) { |
| 404 | return VideoCommon::LookupData{ | 405 | return VideoCommon::LookupData{ |
| 405 | .address = cpu_addr, | 406 | .address = cpu_addr, |
| @@ -559,7 +560,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo | |||
| 559 | } | 560 | } |
| 560 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | 561 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |
| 561 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | 562 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |
| 562 | auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); | 563 | auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address); |
| 563 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | 564 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
| 564 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | 565 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |
| 565 | return false; | 566 | return false; |
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h index 07be421c6..c12fb75ef 100644 --- a/src/video_core/query_cache/query_cache_base.h +++ b/src/video_core/query_cache/query_cache_base.h | |||
| @@ -13,15 +13,11 @@ | |||
| 13 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 14 | #include "common/bit_field.h" | 14 | #include "common/bit_field.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "core/memory.h" | ||
| 17 | #include "video_core/control/channel_state_cache.h" | 16 | #include "video_core/control/channel_state_cache.h" |
| 17 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 18 | #include "video_core/query_cache/query_base.h" | 18 | #include "video_core/query_cache/query_base.h" |
| 19 | #include "video_core/query_cache/types.h" | 19 | #include "video_core/query_cache/types.h" |
| 20 | 20 | ||
| 21 | namespace Core::Memory { | ||
| 22 | class Memory; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCore { | 21 | namespace VideoCore { |
| 26 | class RasterizerInterface; | 22 | class RasterizerInterface; |
| 27 | } | 23 | } |
| @@ -53,7 +49,8 @@ public: | |||
| 53 | }; | 49 | }; |
| 54 | 50 | ||
| 55 | explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | 51 | explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, |
| 56 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); | 52 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 53 | RuntimeType& runtime_); | ||
| 57 | 54 | ||
| 58 | ~QueryCacheBase(); | 55 | ~QueryCacheBase(); |
| 59 | 56 | ||
| @@ -125,10 +122,10 @@ protected: | |||
| 125 | const u64 addr_begin = addr; | 122 | const u64 addr_begin = addr; |
| 126 | const u64 addr_end = addr_begin + size; | 123 | const u64 addr_end = addr_begin + size; |
| 127 | 124 | ||
| 128 | const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; | 125 | const u64 page_end = addr_end >> Core::DEVICE_PAGEBITS; |
| 129 | std::scoped_lock lock(cache_mutex); | 126 | std::scoped_lock lock(cache_mutex); |
| 130 | for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { | 127 | for (u64 page = addr_begin >> Core::DEVICE_PAGEBITS; page <= page_end; ++page) { |
| 131 | const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; | 128 | const u64 page_start = page << Core::DEVICE_PAGEBITS; |
| 132 | const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { | 129 | const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { |
| 133 | const u64 cache_begin = page_start + query_location; | 130 | const u64 cache_begin = page_start + query_location; |
| 134 | const u64 cache_end = cache_begin + sizeof(u32); | 131 | const u64 cache_end = cache_begin + sizeof(u32); |
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp deleted file mode 100644 index f200a650f..000000000 --- a/src/video_core/rasterizer_accelerated.cpp +++ /dev/null | |||
| @@ -1,72 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <atomic> | ||
| 5 | |||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/div_ceil.h" | ||
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/rasterizer_accelerated.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | using namespace Core::Memory; | ||
| 15 | |||
| 16 | RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) | ||
| 17 | : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} | ||
| 18 | |||
| 19 | RasterizerAccelerated::~RasterizerAccelerated() = default; | ||
| 20 | |||
| 21 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 22 | u64 uncache_begin = 0; | ||
| 23 | u64 cache_begin = 0; | ||
| 24 | u64 uncache_bytes = 0; | ||
| 25 | u64 cache_bytes = 0; | ||
| 26 | |||
| 27 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 28 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | ||
| 29 | for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { | ||
| 30 | std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||
| 31 | |||
| 32 | if (delta > 0) { | ||
| 33 | ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); | ||
| 34 | } else if (delta < 0) { | ||
| 35 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||
| 36 | } else { | ||
| 37 | ASSERT_MSG(false, "Delta must be non-zero!"); | ||
| 38 | } | ||
| 39 | |||
| 40 | // Adds or subtracts 1, as count is a unsigned 8-bit value | ||
| 41 | count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | ||
| 42 | |||
| 43 | // Assume delta is either -1 or 1 | ||
| 44 | if (count.load(std::memory_order::relaxed) == 0) { | ||
| 45 | if (uncache_bytes == 0) { | ||
| 46 | uncache_begin = page; | ||
| 47 | } | ||
| 48 | uncache_bytes += YUZU_PAGESIZE; | ||
| 49 | } else if (uncache_bytes > 0) { | ||
| 50 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, | ||
| 51 | false); | ||
| 52 | uncache_bytes = 0; | ||
| 53 | } | ||
| 54 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||
| 55 | if (cache_bytes == 0) { | ||
| 56 | cache_begin = page; | ||
| 57 | } | ||
| 58 | cache_bytes += YUZU_PAGESIZE; | ||
| 59 | } else if (cache_bytes > 0) { | ||
| 60 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||
| 61 | cache_bytes = 0; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | if (uncache_bytes > 0) { | ||
| 65 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); | ||
| 66 | } | ||
| 67 | if (cache_bytes > 0) { | ||
| 68 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h deleted file mode 100644 index e6c0ea87a..000000000 --- a/src/video_core/rasterizer_accelerated.h +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <atomic> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/rasterizer_interface.h" | ||
| 11 | |||
| 12 | namespace Core::Memory { | ||
| 13 | class Memory; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCore { | ||
| 17 | |||
| 18 | /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. | ||
| 19 | class RasterizerAccelerated : public RasterizerInterface { | ||
| 20 | public: | ||
| 21 | explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); | ||
| 22 | ~RasterizerAccelerated() override; | ||
| 23 | |||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | ||
| 25 | |||
| 26 | private: | ||
| 27 | class CacheEntry final { | ||
| 28 | public: | ||
| 29 | CacheEntry() = default; | ||
| 30 | |||
| 31 | std::atomic_uint16_t& Count(std::size_t page) { | ||
| 32 | return values[page & 3]; | ||
| 33 | } | ||
| 34 | |||
| 35 | const std::atomic_uint16_t& Count(std::size_t page) const { | ||
| 36 | return values[page & 3]; | ||
| 37 | } | ||
| 38 | |||
| 39 | private: | ||
| 40 | std::array<std::atomic_uint16_t, 4> values{}; | ||
| 41 | }; | ||
| 42 | static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); | ||
| 43 | |||
| 44 | using CachedPages = std::array<CacheEntry, 0x2000000>; | ||
| 45 | std::unique_ptr<CachedPages> cached_pages; | ||
| 46 | Core::Memory::Memory& cpu_memory; | ||
| 47 | }; | ||
| 48 | |||
| 49 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 49224ca85..8fa4e4d9a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -86,35 +86,35 @@ public: | |||
| 86 | virtual void FlushAll() = 0; | 86 | virtual void FlushAll() = 0; |
| 87 | 87 | ||
| 88 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 88 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 89 | virtual void FlushRegion(VAddr addr, u64 size, | 89 | virtual void FlushRegion(DAddr addr, u64 size, |
| 90 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 90 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 91 | 91 | ||
| 92 | /// Check if the the specified memory area requires flushing to CPU Memory. | 92 | /// Check if the the specified memory area requires flushing to CPU Memory. |
| 93 | virtual bool MustFlushRegion(VAddr addr, u64 size, | 93 | virtual bool MustFlushRegion(DAddr addr, u64 size, |
| 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 95 | 95 | ||
| 96 | virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; | 96 | virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0; |
| 97 | 97 | ||
| 98 | /// Notify rasterizer that any caches of the specified region should be invalidated | 98 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 99 | virtual void InvalidateRegion(VAddr addr, u64 size, | 99 | virtual void InvalidateRegion(DAddr addr, u64 size, |
| 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 101 | 101 | ||
| 102 | virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | 102 | virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { |
| 103 | for (const auto& [cpu_addr, size] : sequences) { | 103 | for (const auto& [cpu_addr, size] : sequences) { |
| 104 | InvalidateRegion(cpu_addr, size); | 104 | InvalidateRegion(cpu_addr, size); |
| 105 | } | 105 | } |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | /// Notify rasterizer that any caches of the specified region are desync with guest | 108 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 109 | virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; | 109 | virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0; |
| 110 | 110 | ||
| 111 | virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | 111 | virtual bool OnCPUWrite(PAddr addr, u64 size) = 0; |
| 112 | 112 | ||
| 113 | /// Sync memory between guest and host. | 113 | /// Sync memory between guest and host. |
| 114 | virtual void InvalidateGPUCache() = 0; | 114 | virtual void InvalidateGPUCache() = 0; |
| 115 | 115 | ||
| 116 | /// Unmap memory range | 116 | /// Unmap memory range |
| 117 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | 117 | virtual void UnmapMemory(DAddr addr, u64 size) = 0; |
| 118 | 118 | ||
| 119 | /// Remap GPU memory range. This means underneath backing memory changed | 119 | /// Remap GPU memory range. This means underneath backing memory changed |
| 120 | virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; | 120 | virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; |
| @@ -122,7 +122,7 @@ public: | |||
| 122 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 122 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 123 | /// and invalidated | 123 | /// and invalidated |
| 124 | virtual void FlushAndInvalidateRegion( | 124 | virtual void FlushAndInvalidateRegion( |
| 125 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 125 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 126 | 126 | ||
| 127 | /// Notify the host renderer to wait for previous primitive and compute operations. | 127 | /// Notify the host renderer to wait for previous primitive and compute operations. |
| 128 | virtual void WaitForIdle() = 0; | 128 | virtual void WaitForIdle() = 0; |
| @@ -157,13 +157,10 @@ public: | |||
| 157 | 157 | ||
| 158 | /// Attempt to use a faster method to display the framebuffer to screen | 158 | /// Attempt to use a faster method to display the framebuffer to screen |
| 159 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | 159 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 160 | VAddr framebuffer_addr, u32 pixel_stride) { | 160 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 161 | return false; | 161 | return false; |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | /// Increase/decrease the number of object in pages touching the specified region | ||
| 165 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | ||
| 166 | |||
| 167 | /// Initialize disk cached resources for the game being emulated | 164 | /// Initialize disk cached resources for the game being emulated |
| 168 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 165 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 169 | const DiskResourceLoadCallback& callback) {} | 166 | const DiskResourceLoadCallback& callback) {} |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 4f1d5b548..abfabb65b 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/alignment.h" | 4 | #include "common/alignment.h" |
| 5 | #include "core/memory.h" | ||
| 6 | #include "video_core/control/channel_state.h" | 5 | #include "video_core/control/channel_state.h" |
| 7 | #include "video_core/host1x/host1x.h" | 6 | #include "video_core/host1x/host1x.h" |
| 8 | #include "video_core/memory_manager.h" | 7 | #include "video_core/memory_manager.h" |
| @@ -19,8 +18,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | |||
| 19 | return true; | 18 | return true; |
| 20 | } | 19 | } |
| 21 | 20 | ||
| 22 | RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) | 21 | RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {} |
| 23 | : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {} | ||
| 24 | RasterizerNull::~RasterizerNull() = default; | 22 | RasterizerNull::~RasterizerNull() = default; |
| 25 | 23 | ||
| 26 | void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | 24 | void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} |
| @@ -45,25 +43,25 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr | |||
| 45 | u32 size) {} | 43 | u32 size) {} |
| 46 | void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} | 44 | void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} |
| 47 | void RasterizerNull::FlushAll() {} | 45 | void RasterizerNull::FlushAll() {} |
| 48 | void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 46 | void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 49 | bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { | 47 | bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) { |
| 50 | return false; | 48 | return false; |
| 51 | } | 49 | } |
| 52 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 50 | void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 53 | bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | 51 | bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) { |
| 54 | return false; | 52 | return false; |
| 55 | } | 53 | } |
| 56 | void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | 54 | void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {} |
| 57 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | 55 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) { |
| 58 | VideoCore::RasterizerDownloadArea new_area{ | 56 | VideoCore::RasterizerDownloadArea new_area{ |
| 59 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 57 | .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE), |
| 60 | .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | 58 | .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE), |
| 61 | .preemtive = true, | 59 | .preemtive = true, |
| 62 | }; | 60 | }; |
| 63 | return new_area; | 61 | return new_area; |
| 64 | } | 62 | } |
| 65 | void RasterizerNull::InvalidateGPUCache() {} | 63 | void RasterizerNull::InvalidateGPUCache() {} |
| 66 | void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} | 64 | void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} |
| 67 | void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} | 65 | void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} |
| 68 | void RasterizerNull::SignalFence(std::function<void()>&& func) { | 66 | void RasterizerNull::SignalFence(std::function<void()>&& func) { |
| 69 | func(); | 67 | func(); |
| @@ -78,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||
| 78 | } | 76 | } |
| 79 | void RasterizerNull::SignalReference() {} | 77 | void RasterizerNull::SignalReference() {} |
| 80 | void RasterizerNull::ReleaseFences(bool) {} | 78 | void RasterizerNull::ReleaseFences(bool) {} |
| 81 | void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 79 | void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 82 | void RasterizerNull::WaitForIdle() {} | 80 | void RasterizerNull::WaitForIdle() {} |
| 83 | void RasterizerNull::FragmentBarrier() {} | 81 | void RasterizerNull::FragmentBarrier() {} |
| 84 | void RasterizerNull::TiledCacheBarrier() {} | 82 | void RasterizerNull::TiledCacheBarrier() {} |
| @@ -95,7 +93,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac | |||
| 95 | void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 93 | void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 96 | std::span<const u8> memory) {} | 94 | std::span<const u8> memory) {} |
| 97 | bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 95 | bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 98 | VAddr framebuffer_addr, u32 pixel_stride) { | 96 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 99 | return true; | 97 | return true; |
| 100 | } | 98 | } |
| 101 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 99 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 23001eeb8..a5789604f 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 7 | #include "video_core/control/channel_state_cache.h" | 7 | #include "video_core/control/channel_state_cache.h" |
| 8 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 9 | #include "video_core/rasterizer_accelerated.h" | ||
| 10 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 11 | 10 | ||
| 12 | namespace Core { | 11 | namespace Core { |
| @@ -32,10 +31,10 @@ public: | |||
| 32 | } | 31 | } |
| 33 | }; | 32 | }; |
| 34 | 33 | ||
| 35 | class RasterizerNull final : public VideoCore::RasterizerAccelerated, | 34 | class RasterizerNull final : public VideoCore::RasterizerInterface, |
| 36 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 35 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 37 | public: | 36 | public: |
| 38 | explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); | 37 | explicit RasterizerNull(Tegra::GPU& gpu); |
| 39 | ~RasterizerNull() override; | 38 | ~RasterizerNull() override; |
| 40 | 39 | ||
| 41 | void Draw(bool is_indexed, u32 instance_count) override; | 40 | void Draw(bool is_indexed, u32 instance_count) override; |
| @@ -48,17 +47,17 @@ public: | |||
| 48 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 47 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 49 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 48 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 50 | void FlushAll() override; | 49 | void FlushAll() override; |
| 51 | void FlushRegion(VAddr addr, u64 size, | 50 | void FlushRegion(DAddr addr, u64 size, |
| 52 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 51 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 53 | bool MustFlushRegion(VAddr addr, u64 size, | 52 | bool MustFlushRegion(DAddr addr, u64 size, |
| 54 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 55 | void InvalidateRegion(VAddr addr, u64 size, | 54 | void InvalidateRegion(DAddr addr, u64 size, |
| 56 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 57 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 56 | void OnCacheInvalidation(DAddr addr, u64 size) override; |
| 58 | bool OnCPUWrite(VAddr addr, u64 size) override; | 57 | bool OnCPUWrite(DAddr addr, u64 size) override; |
| 59 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 58 | VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; |
| 60 | void InvalidateGPUCache() override; | 59 | void InvalidateGPUCache() override; |
| 61 | void UnmapMemory(VAddr addr, u64 size) override; | 60 | void UnmapMemory(DAddr addr, u64 size) override; |
| 62 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 61 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 63 | void SignalFence(std::function<void()>&& func) override; | 62 | void SignalFence(std::function<void()>&& func) override; |
| 64 | void SyncOperation(std::function<void()>&& func) override; | 63 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -66,7 +65,7 @@ public: | |||
| 66 | void SignalReference() override; | 65 | void SignalReference() override; |
| 67 | void ReleaseFences(bool force) override; | 66 | void ReleaseFences(bool force) override; |
| 68 | void FlushAndInvalidateRegion( | 67 | void FlushAndInvalidateRegion( |
| 69 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 68 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 70 | void WaitForIdle() override; | 69 | void WaitForIdle() override; |
| 71 | void FragmentBarrier() override; | 70 | void FragmentBarrier() override; |
| 72 | void TiledCacheBarrier() override; | 71 | void TiledCacheBarrier() override; |
| @@ -78,7 +77,7 @@ public: | |||
| 78 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 77 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 79 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 78 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 80 | std::span<const u8> memory) override; | 79 | std::span<const u8> memory) override; |
| 81 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 80 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 82 | u32 pixel_stride) override; | 81 | u32 pixel_stride) override; |
| 83 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 82 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 84 | const VideoCore::DiskResourceLoadCallback& callback) override; | 83 | const VideoCore::DiskResourceLoadCallback& callback) override; |
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index be92cc2f4..078feb925 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp | |||
| @@ -7,10 +7,9 @@ | |||
| 7 | 7 | ||
| 8 | namespace Null { | 8 | namespace Null { |
| 9 | 9 | ||
| 10 | RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | 10 | RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 11 | Tegra::GPU& gpu, | ||
| 12 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 11 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 13 | : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} | 12 | : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {} |
| 14 | 13 | ||
| 15 | RendererNull::~RendererNull() = default; | 14 | RendererNull::~RendererNull() = default; |
| 16 | 15 | ||
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 967ff5645..9531b43f6 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h | |||
| @@ -13,8 +13,7 @@ namespace Null { | |||
| 13 | 13 | ||
| 14 | class RendererNull final : public VideoCore::RendererBase { | 14 | class RendererNull final : public VideoCore::RendererBase { |
| 15 | public: | 15 | public: |
| 16 | explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | 16 | explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 17 | Tegra::GPU& gpu, | ||
| 18 | std::unique_ptr<Core::Frontend::GraphicsContext> context); | 17 | std::unique_ptr<Core::Frontend::GraphicsContext> context); |
| 19 | ~RendererNull() override; | 18 | ~RendererNull() override; |
| 20 | 19 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 517ac14dd..ade72e1f9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{ | |||
| 47 | } // Anonymous namespace | 47 | } // Anonymous namespace |
| 48 | 48 | ||
| 49 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | 49 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 50 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | 50 | : VideoCommon::BufferBase(null_params) {} |
| 51 | 51 | ||
| 52 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 52 | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) |
| 53 | VAddr cpu_addr_, u64 size_bytes_) | 53 | : VideoCommon::BufferBase(cpu_addr_, size_bytes_) { |
| 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 55 | buffer.Create(); | 54 | buffer.Create(); |
| 56 | if (runtime.device.HasDebuggingToolAttached()) { | 55 | if (runtime.device.HasDebuggingToolAttached()) { |
| 57 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | 56 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 2c18de166..af34c272b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache_base.h" | 11 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 12 | #include "video_core/buffer_cache/memory_tracker_base.h" | 12 | #include "video_core/buffer_cache/memory_tracker_base.h" |
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 16 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | 15 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" |
| @@ -19,10 +18,9 @@ namespace OpenGL { | |||
| 19 | 18 | ||
| 20 | class BufferCacheRuntime; | 19 | class BufferCacheRuntime; |
| 21 | 20 | ||
| 22 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | 21 | class Buffer : public VideoCommon::BufferBase { |
| 23 | public: | 22 | public: |
| 24 | explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, | 23 | explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr, u64 size_bytes); |
| 25 | u64 size_bytes); | ||
| 26 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | 24 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); |
| 27 | 25 | ||
| 28 | void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; | 26 | void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; |
| @@ -244,7 +242,7 @@ struct BufferCacheParams { | |||
| 244 | using Runtime = OpenGL::BufferCacheRuntime; | 242 | using Runtime = OpenGL::BufferCacheRuntime; |
| 245 | using Buffer = OpenGL::Buffer; | 243 | using Buffer = OpenGL::Buffer; |
| 246 | using Async_Buffer = OpenGL::StagingBufferMap; | 244 | using Async_Buffer = OpenGL::StagingBufferMap; |
| 247 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | 245 | using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; |
| 248 | 246 | ||
| 249 | static constexpr bool IS_OPENGL = true; | 247 | static constexpr bool IS_OPENGL = true; |
| 250 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | 248 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index fef7360ed..2147d587f 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -35,8 +35,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 35 | 35 | ||
| 36 | } // Anonymous namespace | 36 | } // Anonymous namespace |
| 37 | 37 | ||
| 38 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | 38 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, |
| 39 | : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { | 39 | Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 40 | : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} { | ||
| 40 | EnableCounters(); | 41 | EnableCounters(); |
| 41 | } | 42 | } |
| 42 | 43 | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 0721e0b3d..38118f355 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 11 | #include "video_core/query_cache.h" | 12 | #include "video_core/query_cache.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -28,7 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| 28 | class QueryCache final | 29 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | 30 | : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 31 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | 32 | explicit QueryCache(RasterizerOpenGL& rasterizer_, |
| 33 | Tegra::MaxwellDeviceMemoryManager& device_memory_); | ||
| 32 | ~QueryCache(); | 34 | ~QueryCache(); |
| 33 | 35 | ||
| 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); | 36 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7a5fad735..d5354ef2d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy | |||
| 70 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 71 | 71 | ||
| 72 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 72 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 73 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 73 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 74 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 74 | const Device& device_, ScreenInfo& screen_info_, |
| 75 | StateTracker& state_tracker_) | 75 | ProgramManager& program_manager_, StateTracker& state_tracker_) |
| 76 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), | 76 | : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), |
| 77 | program_manager(program_manager_), state_tracker(state_tracker_), | 77 | program_manager(program_manager_), state_tracker(state_tracker_), |
| 78 | texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), | 78 | texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), |
| 79 | texture_cache(texture_cache_runtime, *this), | 79 | texture_cache(texture_cache_runtime, device_memory_), |
| 80 | buffer_cache_runtime(device, staging_buffer_pool), | 80 | buffer_cache_runtime(device, staging_buffer_pool), |
| 81 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 81 | buffer_cache(device_memory_, buffer_cache_runtime), |
| 82 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | 82 | shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache, |
| 83 | state_tracker, gpu.ShaderNotify()), | 83 | program_manager, state_tracker, gpu.ShaderNotify()), |
| 84 | query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), | 84 | query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache), |
| 85 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 85 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 86 | blit_image(program_manager_) {} | 86 | blit_image(program_manager_) {} |
| 87 | 87 | ||
| @@ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { | |||
| 475 | 475 | ||
| 476 | void RasterizerOpenGL::FlushAll() {} | 476 | void RasterizerOpenGL::FlushAll() {} |
| 477 | 477 | ||
| 478 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 478 | void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 479 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 479 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 480 | if (addr == 0 || size == 0) { | 480 | if (addr == 0 || size == 0) { |
| 481 | return; | 481 | return; |
| @@ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
| 493 | } | 493 | } |
| 494 | } | 494 | } |
| 495 | 495 | ||
| 496 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 496 | bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 497 | if ((True(which & VideoCommon::CacheType::BufferCache))) { | 497 | if ((True(which & VideoCommon::CacheType::BufferCache))) { |
| 498 | std::scoped_lock lock{buffer_cache.mutex}; | 498 | std::scoped_lock lock{buffer_cache.mutex}; |
| 499 | if (buffer_cache.IsRegionGpuModified(addr, size)) { | 499 | if (buffer_cache.IsRegionGpuModified(addr, size)) { |
| @@ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
| 510 | return false; | 510 | return false; |
| 511 | } | 511 | } |
| 512 | 512 | ||
| 513 | VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { | 513 | VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) { |
| 514 | { | 514 | { |
| 515 | std::scoped_lock lock{texture_cache.mutex}; | 515 | std::scoped_lock lock{texture_cache.mutex}; |
| 516 | auto area = texture_cache.GetFlushArea(addr, size); | 516 | auto area = texture_cache.GetFlushArea(addr, size); |
| @@ -526,14 +526,14 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 | |||
| 526 | } | 526 | } |
| 527 | } | 527 | } |
| 528 | VideoCore::RasterizerDownloadArea new_area{ | 528 | VideoCore::RasterizerDownloadArea new_area{ |
| 529 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 529 | .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE), |
| 530 | .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | 530 | .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE), |
| 531 | .preemtive = true, | 531 | .preemtive = true, |
| 532 | }; | 532 | }; |
| 533 | return new_area; | 533 | return new_area; |
| 534 | } | 534 | } |
| 535 | 535 | ||
| 536 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 536 | void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 537 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 537 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 538 | if (addr == 0 || size == 0) { | 538 | if (addr == 0 || size == 0) { |
| 539 | return; | 539 | return; |
| @@ -554,7 +554,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 554 | } | 554 | } |
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | 557 | bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) { |
| 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 559 | if (addr == 0 || size == 0) { | 559 | if (addr == 0 || size == 0) { |
| 560 | return false; | 560 | return false; |
| @@ -576,8 +576,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 576 | return false; | 576 | return false; |
| 577 | } | 577 | } |
| 578 | 578 | ||
| 579 | void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | 579 | void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) { |
| 580 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 580 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 581 | |||
| 581 | if (addr == 0 || size == 0) { | 582 | if (addr == 0 || size == 0) { |
| 582 | return; | 583 | return; |
| 583 | } | 584 | } |
| @@ -596,7 +597,7 @@ void RasterizerOpenGL::InvalidateGPUCache() { | |||
| 596 | gpu.InvalidateGPUCache(); | 597 | gpu.InvalidateGPUCache(); |
| 597 | } | 598 | } |
| 598 | 599 | ||
| 599 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 600 | void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) { |
| 600 | { | 601 | { |
| 601 | std::scoped_lock lock{texture_cache.mutex}; | 602 | std::scoped_lock lock{texture_cache.mutex}; |
| 602 | texture_cache.UnmapMemory(addr, size); | 603 | texture_cache.UnmapMemory(addr, size); |
| @@ -635,7 +636,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) { | |||
| 635 | fence_manager.WaitPendingFences(force); | 636 | fence_manager.WaitPendingFences(force); |
| 636 | } | 637 | } |
| 637 | 638 | ||
| 638 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | 639 | void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size, |
| 639 | VideoCommon::CacheType which) { | 640 | VideoCommon::CacheType which) { |
| 640 | if (Settings::IsGPULevelExtreme()) { | 641 | if (Settings::IsGPULevelExtreme()) { |
| 641 | FlushRegion(addr, size, which); | 642 | FlushRegion(addr, size, which); |
| @@ -739,7 +740,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| 739 | } | 740 | } |
| 740 | 741 | ||
| 741 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 742 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 742 | VAddr framebuffer_addr, u32 pixel_stride) { | 743 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 743 | if (framebuffer_addr == 0) { | 744 | if (framebuffer_addr == 0) { |
| 744 | return false; | 745 | return false; |
| 745 | } | 746 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ce3460938..34aa73526 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "video_core/control/channel_state_cache.h" | 15 | #include "video_core/control/channel_state_cache.h" |
| 16 | #include "video_core/engines/maxwell_dma.h" | 16 | #include "video_core/engines/maxwell_dma.h" |
| 17 | #include "video_core/rasterizer_accelerated.h" | ||
| 18 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 19 | #include "video_core/renderer_opengl/blit_image.h" | 18 | #include "video_core/renderer_opengl/blit_image.h" |
| 20 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 19 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| @@ -72,13 +71,13 @@ private: | |||
| 72 | TextureCache& texture_cache; | 71 | TextureCache& texture_cache; |
| 73 | }; | 72 | }; |
| 74 | 73 | ||
| 75 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, | 74 | class RasterizerOpenGL : public VideoCore::RasterizerInterface, |
| 76 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 75 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 77 | public: | 76 | public: |
| 78 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 77 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 79 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 78 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 80 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 79 | const Device& device_, ScreenInfo& screen_info_, |
| 81 | StateTracker& state_tracker_); | 80 | ProgramManager& program_manager_, StateTracker& state_tracker_); |
| 82 | ~RasterizerOpenGL() override; | 81 | ~RasterizerOpenGL() override; |
| 83 | 82 | ||
| 84 | void Draw(bool is_indexed, u32 instance_count) override; | 83 | void Draw(bool is_indexed, u32 instance_count) override; |
| @@ -92,17 +91,17 @@ public: | |||
| 92 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 91 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 93 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 92 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 94 | void FlushAll() override; | 93 | void FlushAll() override; |
| 95 | void FlushRegion(VAddr addr, u64 size, | 94 | void FlushRegion(DAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 95 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 97 | bool MustFlushRegion(VAddr addr, u64 size, | 96 | bool MustFlushRegion(DAddr addr, u64 size, |
| 98 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 99 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 98 | VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override; |
| 100 | void InvalidateRegion(VAddr addr, u64 size, | 99 | void InvalidateRegion(DAddr addr, u64 size, |
| 101 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 102 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 101 | void OnCacheInvalidation(PAddr addr, u64 size) override; |
| 103 | bool OnCPUWrite(VAddr addr, u64 size) override; | 102 | bool OnCPUWrite(PAddr addr, u64 size) override; |
| 104 | void InvalidateGPUCache() override; | 103 | void InvalidateGPUCache() override; |
| 105 | void UnmapMemory(VAddr addr, u64 size) override; | 104 | void UnmapMemory(DAddr addr, u64 size) override; |
| 106 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 107 | void SignalFence(std::function<void()>&& func) override; | 106 | void SignalFence(std::function<void()>&& func) override; |
| 108 | void SyncOperation(std::function<void()>&& func) override; | 107 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -110,7 +109,7 @@ public: | |||
| 110 | void SignalReference() override; | 109 | void SignalReference() override; |
| 111 | void ReleaseFences(bool force = true) override; | 110 | void ReleaseFences(bool force = true) override; |
| 112 | void FlushAndInvalidateRegion( | 111 | void FlushAndInvalidateRegion( |
| 113 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 112 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 114 | void WaitForIdle() override; | 113 | void WaitForIdle() override; |
| 115 | void FragmentBarrier() override; | 114 | void FragmentBarrier() override; |
| 116 | void TiledCacheBarrier() override; | 115 | void TiledCacheBarrier() override; |
| @@ -123,7 +122,7 @@ public: | |||
| 123 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 122 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 124 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 123 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 125 | std::span<const u8> memory) override; | 124 | std::span<const u8> memory) override; |
| 126 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 125 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 127 | u32 pixel_stride) override; | 126 | u32 pixel_stride) override; |
| 128 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 127 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 129 | const VideoCore::DiskResourceLoadCallback& callback) override; | 128 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| @@ -235,6 +234,7 @@ private: | |||
| 235 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | 234 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); |
| 236 | 235 | ||
| 237 | Tegra::GPU& gpu; | 236 | Tegra::GPU& gpu; |
| 237 | Tegra::MaxwellDeviceMemoryManager& device_memory; | ||
| 238 | 238 | ||
| 239 | const Device& device; | 239 | const Device& device; |
| 240 | ScreenInfo& screen_info; | 240 | ScreenInfo& screen_info; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 30df41b7d..50462cdde 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs | |||
| 168 | } | 168 | } |
| 169 | } // Anonymous namespace | 169 | } // Anonymous namespace |
| 170 | 170 | ||
| 171 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 171 | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 172 | const Device& device_, TextureCache& texture_cache_, | 172 | Core::Frontend::EmuWindow& emu_window_, const Device& device_, |
| 173 | BufferCache& buffer_cache_, ProgramManager& program_manager_, | 173 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 174 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) | 174 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 175 | : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, | 175 | VideoCore::ShaderNotify& shader_notify_) |
| 176 | : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_}, | ||
| 176 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, | 177 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, |
| 177 | state_tracker{state_tracker_}, shader_notify{shader_notify_}, | 178 | state_tracker{state_tracker_}, shader_notify{shader_notify_}, |
| 178 | use_asynchronous_shaders{device.UseAsynchronousShaders()}, | 179 | use_asynchronous_shaders{device.UseAsynchronousShaders()}, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6b9732fca..5ac413529 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | class MemoryManager; | 19 | class MemoryManager; |
| 20 | } | 20 | } // namespace Tegra |
| 21 | 21 | ||
| 22 | namespace OpenGL { | 22 | namespace OpenGL { |
| 23 | 23 | ||
| @@ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | |||
| 28 | 28 | ||
| 29 | class ShaderCache : public VideoCommon::ShaderCache { | 29 | class ShaderCache : public VideoCommon::ShaderCache { |
| 30 | public: | 30 | public: |
| 31 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 31 | explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 32 | const Device& device_, TextureCache& texture_cache_, | 32 | Core::Frontend::EmuWindow& emu_window_, const Device& device_, |
| 33 | BufferCache& buffer_cache_, ProgramManager& program_manager_, | 33 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 34 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); | 34 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 35 | VideoCore::ShaderNotify& shader_notify_); | ||
| 35 | ~ShaderCache(); | 36 | ~ShaderCache(); |
| 36 | 37 | ||
| 37 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 38 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2933718b6..b75376fdb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "common/telemetry.h" | 15 | #include "common/telemetry.h" |
| 16 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 17 | #include "core/frontend/emu_window.h" | 17 | #include "core/frontend/emu_window.h" |
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/telemetry_session.h" | 18 | #include "core/telemetry_session.h" |
| 20 | #include "video_core/host_shaders/ffx_a_h.h" | 19 | #include "video_core/host_shaders/ffx_a_h.h" |
| 21 | #include "video_core/host_shaders/ffx_fsr1_h.h" | 20 | #include "video_core/host_shaders/ffx_fsr1_h.h" |
| @@ -144,12 +143,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 144 | 143 | ||
| 145 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 144 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| 146 | Core::Frontend::EmuWindow& emu_window_, | 145 | Core::Frontend::EmuWindow& emu_window_, |
| 147 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 146 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 148 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 147 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 149 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | 148 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |
| 150 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, | 149 | emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, |
| 151 | state_tracker{}, program_manager{device}, | 150 | state_tracker{}, program_manager{device}, |
| 152 | rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | 151 | rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, |
| 152 | state_tracker) { | ||
| 153 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | 153 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 154 | glEnable(GL_DEBUG_OUTPUT); | 154 | glEnable(GL_DEBUG_OUTPUT); |
| 155 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | 155 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); |
| @@ -242,7 +242,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 242 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; | 242 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 243 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( | 243 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( |
| 244 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | 244 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; |
| 245 | const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | 245 | const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; |
| 246 | const std::span<const u8> input_data(host_ptr, size_in_bytes); | 246 | const std::span<const u8> input_data(host_ptr, size_in_bytes); |
| 247 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | 247 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, |
| 248 | framebuffer.width, framebuffer.height, 1, block_height_log2, | 248 | framebuffer.width, framebuffer.height, 1, block_height_log2, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b70607635..18699610a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase { | |||
| 61 | public: | 61 | public: |
| 62 | explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 62 | explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| 63 | Core::Frontend::EmuWindow& emu_window_, | 63 | Core::Frontend::EmuWindow& emu_window_, |
| 64 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 64 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 65 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 65 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 66 | ~RendererOpenGL() override; | 66 | ~RendererOpenGL() override; |
| 67 | 67 | ||
| @@ -101,7 +101,7 @@ private: | |||
| 101 | 101 | ||
| 102 | Core::TelemetrySession& telemetry_session; | 102 | Core::TelemetrySession& telemetry_session; |
| 103 | Core::Frontend::EmuWindow& emu_window; | 103 | Core::Frontend::EmuWindow& emu_window; |
| 104 | Core::Memory::Memory& cpu_memory; | 104 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 105 | Tegra::GPU& gpu; | 105 | Tegra::GPU& gpu; |
| 106 | 106 | ||
| 107 | Device device; | 107 | Device device; |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 71c783709..850c34a3a 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 13 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 14 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 14 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 15 | #include "video_core/texture_cache/texture_cache.h" | ||
| 16 | #include "video_core/texture_cache/types.h" | 15 | #include "video_core/texture_cache/types.h" |
| 17 | #include "video_core/vulkan_common/vulkan_device.h" | 16 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | 17 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 100b70918..1631276c6 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl | |||
| 82 | 82 | ||
| 83 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | 83 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, |
| 84 | Core::Frontend::EmuWindow& emu_window, | 84 | Core::Frontend::EmuWindow& emu_window, |
| 85 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 85 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 86 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | 86 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |
| 87 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | 87 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), |
| 88 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | 88 | device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())), |
| 89 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 89 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 90 | Settings::values.renderer_debug.GetValue())), | 90 | Settings::values.renderer_debug.GetValue())), |
| 91 | debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) | 91 | debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) |
| @@ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 97 | render_window.GetFramebufferLayout().height), | 97 | render_window.GetFramebufferLayout().height), |
| 98 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, | 98 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, |
| 99 | surface), | 99 | surface), |
| 100 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, | 100 | blit_screen(device_memory, render_window, device, memory_allocator, swapchain, |
| 101 | scheduler, screen_info), | 101 | present_manager, scheduler, screen_info), |
| 102 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | 102 | rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, |
| 103 | state_tracker, scheduler) { | 103 | state_tracker, scheduler) { |
| 104 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | 104 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { |
| 105 | turbo_mode.emplace(instance, dld); | 105 | turbo_mode.emplace(instance, dld); |
| @@ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 128 | screen_info.width = framebuffer->width; | 128 | screen_info.width = framebuffer->width; |
| 129 | screen_info.height = framebuffer->height; | 129 | screen_info.height = framebuffer->height; |
| 130 | 130 | ||
| 131 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 131 | const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 132 | const bool use_accelerated = | 132 | const bool use_accelerated = |
| 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 134 | RenderScreenshot(*framebuffer, use_accelerated); | 134 | RenderScreenshot(*framebuffer, use_accelerated); |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 14e257cf7..11c52287a 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -7,12 +7,12 @@ | |||
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <variant> | 8 | #include <variant> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 11 | |||
| 12 | #include "common/dynamic_library.h" | 10 | #include "common/dynamic_library.h" |
| 11 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 13 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 14 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 13 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 15 | #include "video_core/renderer_vulkan/vk_present_manager.h" | 14 | #include "video_core/renderer_vulkan/vk_present_manager.h" |
| 15 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 17 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 17 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 18 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 18 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| @@ -42,7 +42,7 @@ class RendererVulkan final : public VideoCore::RendererBase { | |||
| 42 | public: | 42 | public: |
| 43 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | 43 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |
| 44 | Core::Frontend::EmuWindow& emu_window, | 44 | Core::Frontend::EmuWindow& emu_window, |
| 45 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 45 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 46 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 46 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 47 | ~RendererVulkan() override; | 47 | ~RendererVulkan() override; |
| 48 | 48 | ||
| @@ -62,7 +62,7 @@ private: | |||
| 62 | void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); | 62 | void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); |
| 63 | 63 | ||
| 64 | Core::TelemetrySession& telemetry_session; | 64 | Core::TelemetrySession& telemetry_session; |
| 65 | Core::Memory::Memory& cpu_memory; | 65 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 66 | Tegra::GPU& gpu; | 66 | Tegra::GPU& gpu; |
| 67 | 67 | ||
| 68 | std::shared_ptr<Common::DynamicLibrary> library; | 68 | std::shared_ptr<Common::DynamicLibrary> library; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 60432f5ad..610f27c84 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 15 | #include "core/core.h" | 15 | #include "core/core.h" |
| 16 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| 17 | #include "core/memory.h" | ||
| 18 | #include "video_core/gpu.h" | 17 | #include "video_core/gpu.h" |
| 18 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 19 | #include "video_core/host_shaders/fxaa_frag_spv.h" | 19 | #include "video_core/host_shaders/fxaa_frag_spv.h" |
| 20 | #include "video_core/host_shaders/fxaa_vert_spv.h" | 20 | #include "video_core/host_shaders/fxaa_vert_spv.h" |
| 21 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" | 21 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" |
| @@ -121,11 +121,12 @@ struct BlitScreen::BufferData { | |||
| 121 | // Unaligned image data goes here | 121 | // Unaligned image data goes here |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
| 124 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | 124 | BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 125 | const Device& device_, MemoryAllocator& memory_allocator_, | 125 | Core::Frontend::EmuWindow& render_window_, const Device& device_, |
| 126 | Swapchain& swapchain_, PresentManager& present_manager_, | 126 | MemoryAllocator& memory_allocator_, Swapchain& swapchain_, |
| 127 | Scheduler& scheduler_, const ScreenInfo& screen_info_) | 127 | PresentManager& present_manager_, Scheduler& scheduler_, |
| 128 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | 128 | const ScreenInfo& screen_info_) |
| 129 | : device_memory{device_memory_}, render_window{render_window_}, device{device_}, | ||
| 129 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, | 130 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, |
| 130 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 131 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { |
| 131 | resource_ticks.resize(image_count); | 132 | resource_ticks.resize(image_count); |
| @@ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 219 | if (!use_accelerated) { | 220 | if (!use_accelerated) { |
| 220 | const u64 image_offset = GetRawImageOffset(framebuffer); | 221 | const u64 image_offset = GetRawImageOffset(framebuffer); |
| 221 | 222 | ||
| 222 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 223 | const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 223 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 224 | const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); |
| 224 | 225 | ||
| 225 | // TODO(Rodrigo): Read this from HLE | 226 | // TODO(Rodrigo): Read this from HLE |
| 226 | constexpr u32 block_height_log2 = 4; | 227 | constexpr u32 block_height_log2 = 4; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 78b32416d..3eff76009 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | 7 | ||
| 8 | #include "core/frontend/framebuffer_layout.h" | 8 | #include "core/frontend/framebuffer_layout.h" |
| 9 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 9 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 10 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 12 | ||
| @@ -13,10 +14,6 @@ namespace Core { | |||
| 13 | class System; | 14 | class System; |
| 14 | } | 15 | } |
| 15 | 16 | ||
| 16 | namespace Core::Memory { | ||
| 17 | class Memory; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace Core::Frontend { | 17 | namespace Core::Frontend { |
| 21 | class EmuWindow; | 18 | class EmuWindow; |
| 22 | } | 19 | } |
| @@ -56,8 +53,9 @@ struct ScreenInfo { | |||
| 56 | 53 | ||
| 57 | class BlitScreen { | 54 | class BlitScreen { |
| 58 | public: | 55 | public: |
| 59 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, | 56 | explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, |
| 60 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | 57 | Core::Frontend::EmuWindow& render_window, const Device& device, |
| 58 | MemoryAllocator& memory_manager, Swapchain& swapchain, | ||
| 61 | PresentManager& present_manager, Scheduler& scheduler, | 59 | PresentManager& present_manager, Scheduler& scheduler, |
| 62 | const ScreenInfo& screen_info); | 60 | const ScreenInfo& screen_info); |
| 63 | ~BlitScreen(); | 61 | ~BlitScreen(); |
| @@ -109,7 +107,7 @@ private: | |||
| 109 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | 107 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |
| 110 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; | 108 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; |
| 111 | 109 | ||
| 112 | Core::Memory::Memory& cpu_memory; | 110 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 113 | Core::Frontend::EmuWindow& render_window; | 111 | Core::Frontend::EmuWindow& render_window; |
| 114 | const Device& device; | 112 | const Device& device; |
| 115 | MemoryAllocator& memory_allocator; | 113 | MemoryAllocator& memory_allocator; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 3c61799fa..31001d142 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 79 | } // Anonymous namespace | 79 | } // Anonymous namespace |
| 80 | 80 | ||
| 81 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) | 81 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) |
| 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { | 82 | : VideoCommon::BufferBase(null_params), tracker{4096} { |
| 83 | if (runtime.device.HasNullDescriptor()) { | 83 | if (runtime.device.HasNullDescriptor()) { |
| 84 | return; | 84 | return; |
| 85 | } | 85 | } |
| @@ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p | |||
| 88 | is_null = true; | 88 | is_null = true; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 91 | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) |
| 92 | VAddr cpu_addr_, u64 size_bytes_) | 92 | : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device}, |
| 93 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | 93 | buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} { |
| 94 | device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, | ||
| 95 | tracker{SizeBytes()} { | ||
| 96 | if (runtime.device.HasDebuggingToolAttached()) { | 94 | if (runtime.device.HasDebuggingToolAttached()) { |
| 97 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 95 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 98 | } | 96 | } |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index dc300d7cb..e273f4988 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -23,11 +23,10 @@ struct HostVertexBinding; | |||
| 23 | 23 | ||
| 24 | class BufferCacheRuntime; | 24 | class BufferCacheRuntime; |
| 25 | 25 | ||
| 26 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | 26 | class Buffer : public VideoCommon::BufferBase { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | 28 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); |
| 29 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 29 | explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_); |
| 30 | VAddr cpu_addr_, u64 size_bytes_); | ||
| 31 | 30 | ||
| 32 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | 31 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); |
| 33 | 32 | ||
| @@ -173,7 +172,7 @@ struct BufferCacheParams { | |||
| 173 | using Runtime = Vulkan::BufferCacheRuntime; | 172 | using Runtime = Vulkan::BufferCacheRuntime; |
| 174 | using Buffer = Vulkan::Buffer; | 173 | using Buffer = Vulkan::Buffer; |
| 175 | using Async_Buffer = Vulkan::StagingBufferRef; | 174 | using Async_Buffer = Vulkan::StagingBufferRef; |
| 176 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | 175 | using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; |
| 177 | 176 | ||
| 178 | static constexpr bool IS_OPENGL = false; | 177 | static constexpr bool IS_OPENGL = false; |
| 179 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | 178 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f2fd2670f..ec6b3a4b0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 19 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 21 | #include "video_core/shader_notify.h" | 21 | #include "video_core/shader_notify.h" |
| 22 | #include "video_core/texture_cache/texture_cache.h" | ||
| 22 | #include "video_core/vulkan_common/vulkan_device.h" | 23 | #include "video_core/vulkan_common/vulkan_device.h" |
| 23 | 24 | ||
| 24 | #if defined(_MSC_VER) && defined(NDEBUG) | 25 | #if defined(_MSC_VER) && defined(NDEBUG) |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1841198d..1e1821b10 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 30 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 31 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 31 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 33 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 35 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 34 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 36 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 35 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| @@ -299,12 +298,13 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
| 299 | return std::memcmp(&rhs, this, Size()) == 0; | 298 | return std::memcmp(&rhs, this, Size()) == 0; |
| 300 | } | 299 | } |
| 301 | 300 | ||
| 302 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, | 301 | PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 303 | Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | 302 | const Device& device_, Scheduler& scheduler_, |
| 303 | DescriptorPool& descriptor_pool_, | ||
| 304 | GuestDescriptorQueue& guest_descriptor_queue_, | 304 | GuestDescriptorQueue& guest_descriptor_queue_, |
| 305 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | 305 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 306 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) | 306 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |
| 307 | : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, | 307 | : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_}, |
| 308 | descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, | 308 | descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, |
| 309 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, | 309 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, |
| 310 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, | 310 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e323ea0fd..797700128 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "shader_recompiler/object_pool.h" | 20 | #include "shader_recompiler/object_pool.h" |
| 21 | #include "shader_recompiler/profile.h" | 21 | #include "shader_recompiler/profile.h" |
| 22 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 24 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 26 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| @@ -79,7 +80,6 @@ class ComputePipeline; | |||
| 79 | class DescriptorPool; | 80 | class DescriptorPool; |
| 80 | class Device; | 81 | class Device; |
| 81 | class PipelineStatistics; | 82 | class PipelineStatistics; |
| 82 | class RasterizerVulkan; | ||
| 83 | class RenderPassCache; | 83 | class RenderPassCache; |
| 84 | class Scheduler; | 84 | class Scheduler; |
| 85 | 85 | ||
| @@ -99,8 +99,8 @@ struct ShaderPools { | |||
| 99 | 99 | ||
| 100 | class PipelineCache : public VideoCommon::ShaderCache { | 100 | class PipelineCache : public VideoCommon::ShaderCache { |
| 101 | public: | 101 | public: |
| 102 | explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, | 102 | explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device, |
| 103 | DescriptorPool& descriptor_pool, | 103 | Scheduler& scheduler, DescriptorPool& descriptor_pool, |
| 104 | GuestDescriptorQueue& guest_descriptor_queue, | 104 | GuestDescriptorQueue& guest_descriptor_queue, |
| 105 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | 105 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 106 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); | 106 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ad4caf688..7cbc9c73c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -13,9 +13,10 @@ | |||
| 13 | 13 | ||
| 14 | #include "common/bit_util.h" | 14 | #include "common/bit_util.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "core/memory.h" | ||
| 17 | #include "video_core/engines/draw_manager.h" | 16 | #include "video_core/engines/draw_manager.h" |
| 17 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 18 | #include "video_core/query_cache/query_cache.h" | 18 | #include "video_core/query_cache/query_cache.h" |
| 19 | #include "video_core/rasterizer_interface.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 20 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 21 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| @@ -102,7 +103,7 @@ private: | |||
| 102 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | 103 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; |
| 103 | 104 | ||
| 104 | struct HostSyncValues { | 105 | struct HostSyncValues { |
| 105 | VAddr address; | 106 | DAddr address; |
| 106 | size_t size; | 107 | size_t size; |
| 107 | size_t offset; | 108 | size_t offset; |
| 108 | 109 | ||
| @@ -317,7 +318,7 @@ public: | |||
| 317 | pending_sync.clear(); | 318 | pending_sync.clear(); |
| 318 | } | 319 | } |
| 319 | 320 | ||
| 320 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 321 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 321 | [[maybe_unused]] std::optional<u32> subreport) override { | 322 | [[maybe_unused]] std::optional<u32> subreport) override { |
| 322 | PauseCounter(); | 323 | PauseCounter(); |
| 323 | auto index = BuildQuery(); | 324 | auto index = BuildQuery(); |
| @@ -738,7 +739,7 @@ public: | |||
| 738 | pending_sync.clear(); | 739 | pending_sync.clear(); |
| 739 | } | 740 | } |
| 740 | 741 | ||
| 741 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 742 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 742 | std::optional<u32> subreport_) override { | 743 | std::optional<u32> subreport_) override { |
| 743 | auto index = BuildQuery(); | 744 | auto index = BuildQuery(); |
| 744 | auto* new_query = GetQuery(index); | 745 | auto* new_query = GetQuery(index); |
| @@ -769,9 +770,9 @@ public: | |||
| 769 | return index; | 770 | return index; |
| 770 | } | 771 | } |
| 771 | 772 | ||
| 772 | std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { | 773 | std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) { |
| 773 | if (last_queries[stream] != 0) { | 774 | if (last_queries[stream] != 0) { |
| 774 | std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); | 775 | std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); |
| 775 | return result; | 776 | return result; |
| 776 | } | 777 | } |
| 777 | return std::nullopt; | 778 | return std::nullopt; |
| @@ -974,7 +975,7 @@ private: | |||
| 974 | size_t buffers_count{}; | 975 | size_t buffers_count{}; |
| 975 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | 976 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; |
| 976 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | 977 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; |
| 977 | std::array<VAddr, NUM_STREAMS> last_queries; | 978 | std::array<DAddr, NUM_STREAMS> last_queries; |
| 978 | std::array<size_t, NUM_STREAMS> last_queries_stride; | 979 | std::array<size_t, NUM_STREAMS> last_queries_stride; |
| 979 | Maxwell3D::Regs::PrimitiveTopology out_topology; | 980 | Maxwell3D::Regs::PrimitiveTopology out_topology; |
| 980 | u64 streams_mask; | 981 | u64 streams_mask; |
| @@ -987,7 +988,7 @@ public: | |||
| 987 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} | 988 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} |
| 988 | 989 | ||
| 989 | // Parameterized constructor | 990 | // Parameterized constructor |
| 990 | PrimitivesQueryBase(bool has_timestamp, VAddr address) | 991 | PrimitivesQueryBase(bool has_timestamp, DAddr address) |
| 991 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { | 992 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { |
| 992 | if (has_timestamp) { | 993 | if (has_timestamp) { |
| 993 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; | 994 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; |
| @@ -995,7 +996,7 @@ public: | |||
| 995 | } | 996 | } |
| 996 | 997 | ||
| 997 | u64 stride{}; | 998 | u64 stride{}; |
| 998 | VAddr dependant_address{}; | 999 | DAddr dependant_address{}; |
| 999 | Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; | 1000 | Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; |
| 1000 | size_t dependant_index{}; | 1001 | size_t dependant_index{}; |
| 1001 | bool dependant_manage{}; | 1002 | bool dependant_manage{}; |
| @@ -1005,15 +1006,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive | |||
| 1005 | public: | 1006 | public: |
| 1006 | explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, | 1007 | explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, |
| 1007 | TFBCounterStreamer& tfb_streamer_, | 1008 | TFBCounterStreamer& tfb_streamer_, |
| 1008 | Core::Memory::Memory& cpu_memory_) | 1009 | Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 1009 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, | 1010 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, |
| 1010 | tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { | 1011 | tfb_streamer{tfb_streamer_}, device_memory{device_memory_} { |
| 1011 | MakeDependent(&tfb_streamer); | 1012 | MakeDependent(&tfb_streamer); |
| 1012 | } | 1013 | } |
| 1013 | 1014 | ||
| 1014 | ~PrimitivesSucceededStreamer() = default; | 1015 | ~PrimitivesSucceededStreamer() = default; |
| 1015 | 1016 | ||
| 1016 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 1017 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 1017 | std::optional<u32> subreport_) override { | 1018 | std::optional<u32> subreport_) override { |
| 1018 | auto index = BuildQuery(); | 1019 | auto index = BuildQuery(); |
| 1019 | auto* new_query = GetQuery(index); | 1020 | auto* new_query = GetQuery(index); |
| @@ -1063,6 +1064,8 @@ public: | |||
| 1063 | } | 1064 | } |
| 1064 | }); | 1065 | }); |
| 1065 | } | 1066 | } |
| 1067 | auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address); | ||
| 1068 | ASSERT(ptr != nullptr); | ||
| 1066 | 1069 | ||
| 1067 | new_query->dependant_manage = must_manage_dependance; | 1070 | new_query->dependant_manage = must_manage_dependance; |
| 1068 | pending_flush_queries.push_back(index); | 1071 | pending_flush_queries.push_back(index); |
| @@ -1100,7 +1103,7 @@ public: | |||
| 1100 | num_vertices = dependant_query->value / query->stride; | 1103 | num_vertices = dependant_query->value / query->stride; |
| 1101 | tfb_streamer.Free(query->dependant_index); | 1104 | tfb_streamer.Free(query->dependant_index); |
| 1102 | } else { | 1105 | } else { |
| 1103 | u8* pointer = cpu_memory.GetPointer(query->dependant_address); | 1106 | u8* pointer = device_memory.GetPointer<u8>(query->dependant_address); |
| 1104 | u32 result; | 1107 | u32 result; |
| 1105 | std::memcpy(&result, pointer, sizeof(u32)); | 1108 | std::memcpy(&result, pointer, sizeof(u32)); |
| 1106 | num_vertices = static_cast<u64>(result) / query->stride; | 1109 | num_vertices = static_cast<u64>(result) / query->stride; |
| @@ -1137,7 +1140,7 @@ public: | |||
| 1137 | private: | 1140 | private: |
| 1138 | QueryCacheRuntime& runtime; | 1141 | QueryCacheRuntime& runtime; |
| 1139 | TFBCounterStreamer& tfb_streamer; | 1142 | TFBCounterStreamer& tfb_streamer; |
| 1140 | Core::Memory::Memory& cpu_memory; | 1143 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 1141 | 1144 | ||
| 1142 | // syncing queue | 1145 | // syncing queue |
| 1143 | std::vector<size_t> pending_sync; | 1146 | std::vector<size_t> pending_sync; |
| @@ -1152,12 +1155,13 @@ private: | |||
| 1152 | 1155 | ||
| 1153 | struct QueryCacheRuntimeImpl { | 1156 | struct QueryCacheRuntimeImpl { |
| 1154 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | 1157 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, |
| 1155 | Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, | 1158 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 1156 | const Device& device_, const MemoryAllocator& memory_allocator_, | 1159 | Vulkan::BufferCache& buffer_cache_, const Device& device_, |
| 1157 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | 1160 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, |
| 1161 | StagingBufferPool& staging_pool_, | ||
| 1158 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 1162 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 1159 | DescriptorPool& descriptor_pool) | 1163 | DescriptorPool& descriptor_pool) |
| 1160 | : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, | 1164 | : rasterizer{rasterizer_}, device_memory{device_memory_}, |
| 1161 | buffer_cache{buffer_cache_}, device{device_}, | 1165 | buffer_cache{buffer_cache_}, device{device_}, |
| 1162 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | 1166 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, |
| 1163 | guest_streamer(0, runtime), | 1167 | guest_streamer(0, runtime), |
| @@ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1168 | scheduler, memory_allocator, staging_pool), | 1172 | scheduler, memory_allocator, staging_pool), |
| 1169 | primitives_succeeded_streamer( | 1173 | primitives_succeeded_streamer( |
| 1170 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, | 1174 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, |
| 1171 | cpu_memory_), | 1175 | device_memory_), |
| 1172 | primitives_needed_minus_succeeded_streamer( | 1176 | primitives_needed_minus_succeeded_streamer( |
| 1173 | static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), | 1177 | static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), |
| 1174 | hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { | 1178 | hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { |
| @@ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1195 | } | 1199 | } |
| 1196 | 1200 | ||
| 1197 | VideoCore::RasterizerInterface* rasterizer; | 1201 | VideoCore::RasterizerInterface* rasterizer; |
| 1198 | Core::Memory::Memory& cpu_memory; | 1202 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 1199 | Vulkan::BufferCache& buffer_cache; | 1203 | Vulkan::BufferCache& buffer_cache; |
| 1200 | 1204 | ||
| 1201 | const Device& device; | 1205 | const Device& device; |
| @@ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1210 | PrimitivesSucceededStreamer primitives_succeeded_streamer; | 1214 | PrimitivesSucceededStreamer primitives_succeeded_streamer; |
| 1211 | VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; | 1215 | VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; |
| 1212 | 1216 | ||
| 1213 | std::vector<std::pair<VAddr, VAddr>> little_cache; | 1217 | std::vector<std::pair<DAddr, DAddr>> little_cache; |
| 1214 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | 1218 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| 1215 | std::vector<size_t> redirect_cache; | 1219 | std::vector<size_t> redirect_cache; |
| 1216 | std::vector<std::vector<VkBufferCopy>> copies_setup; | 1220 | std::vector<std::vector<VkBufferCopy>> copies_setup; |
| @@ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl { | |||
| 1229 | }; | 1233 | }; |
| 1230 | 1234 | ||
| 1231 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | 1235 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 1232 | Core::Memory::Memory& cpu_memory_, | 1236 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 1233 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | 1237 | Vulkan::BufferCache& buffer_cache_, const Device& device_, |
| 1234 | const MemoryAllocator& memory_allocator_, | 1238 | const MemoryAllocator& memory_allocator_, |
| 1235 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | 1239 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 1236 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 1240 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 1237 | DescriptorPool& descriptor_pool) { | 1241 | DescriptorPool& descriptor_pool) { |
| 1238 | impl = std::make_unique<QueryCacheRuntimeImpl>( | 1242 | impl = std::make_unique<QueryCacheRuntimeImpl>( |
| 1239 | *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | 1243 | *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, |
| 1240 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | 1244 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); |
| 1241 | } | 1245 | } |
| 1242 | 1246 | ||
| @@ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo | |||
| 1309 | ResumeHostConditionalRendering(); | 1313 | ResumeHostConditionalRendering(); |
| 1310 | } | 1314 | } |
| 1311 | 1315 | ||
| 1312 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { | 1316 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { |
| 1313 | VkBuffer to_resolve; | 1317 | VkBuffer to_resolve; |
| 1314 | u32 to_resolve_offset; | 1318 | u32 to_resolve_offset; |
| 1315 | { | 1319 | { |
| @@ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1350 | return false; | 1354 | return false; |
| 1351 | } | 1355 | } |
| 1352 | 1356 | ||
| 1353 | const auto check_in_bc = [&](VAddr address) { | 1357 | const auto check_in_bc = [&](DAddr address) { |
| 1354 | return impl->buffer_cache.IsRegionGpuModified(address, 8); | 1358 | return impl->buffer_cache.IsRegionGpuModified(address, 8); |
| 1355 | }; | 1359 | }; |
| 1356 | const auto check_value = [&](VAddr address) { | 1360 | const auto check_value = [&](DAddr address) { |
| 1357 | u8* ptr = impl->cpu_memory.GetPointer(address); | 1361 | u8* ptr = impl->device_memory.GetPointer<u8>(address); |
| 1358 | u64 value{}; | 1362 | u64 value{}; |
| 1359 | std::memcpy(&value, ptr, sizeof(value)); | 1363 | std::memcpy(&value, ptr, sizeof(value)); |
| 1360 | return value == 0; | 1364 | return value == 0; |
| @@ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba | |||
| 1477 | for (auto& sync_val : values) { | 1481 | for (auto& sync_val : values) { |
| 1478 | total_size += sync_val.size; | 1482 | total_size += sync_val.size; |
| 1479 | bool found = false; | 1483 | bool found = false; |
| 1480 | VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | 1484 | DAddr base = Common::AlignDown(sync_val.address, Core::DEVICE_PAGESIZE); |
| 1481 | VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | 1485 | DAddr base_end = base + Core::DEVICE_PAGESIZE; |
| 1482 | for (size_t i = 0; i < impl->little_cache.size(); i++) { | 1486 | for (size_t i = 0; i < impl->little_cache.size(); i++) { |
| 1483 | const auto set_found = [&] { | 1487 | const auto set_found = [&] { |
| 1484 | impl->redirect_cache.push_back(i); | 1488 | impl->redirect_cache.push_back(i); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index e9a1ea169..f6151123e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl; | |||
| 27 | class QueryCacheRuntime { | 27 | class QueryCacheRuntime { |
| 28 | public: | 28 | public: |
| 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 30 | Core::Memory::Memory& cpu_memory_, | 30 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, |
| 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, |
| 33 | StagingBufferPool& staging_pool_, | 33 | StagingBufferPool& staging_pool_, |
| @@ -61,7 +61,7 @@ public: | |||
| 61 | 61 | ||
| 62 | private: | 62 | private: |
| 63 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | 63 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |
| 64 | void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); | 64 | void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal); |
| 65 | friend struct QueryCacheRuntimeImpl; | 65 | friend struct QueryCacheRuntimeImpl; |
| 66 | std::unique_ptr<QueryCacheRuntimeImpl> impl; | 66 | std::unique_ptr<QueryCacheRuntimeImpl> impl; |
| 67 | }; | 67 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 241fc34be..5bf41b81f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "video_core/engines/draw_manager.h" | 18 | #include "video_core/engines/draw_manager.h" |
| 19 | #include "video_core/engines/kepler_compute.h" | 19 | #include "video_core/engines/kepler_compute.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 21 | #include "video_core/renderer_vulkan/blit_image.h" | 22 | #include "video_core/renderer_vulkan/blit_image.h" |
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 23 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| @@ -163,10 +164,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | |||
| 163 | } // Anonymous namespace | 164 | } // Anonymous namespace |
| 164 | 165 | ||
| 165 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 166 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 166 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | 167 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 167 | const Device& device_, MemoryAllocator& memory_allocator_, | 168 | ScreenInfo& screen_info_, const Device& device_, |
| 168 | StateTracker& state_tracker_, Scheduler& scheduler_) | 169 | MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, |
| 169 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, | 170 | Scheduler& scheduler_) |
| 171 | : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, | ||
| 170 | memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, | 172 | memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 171 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 173 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 172 | guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), | 174 | guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), |
| @@ -174,14 +176,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 174 | texture_cache_runtime{ | 176 | texture_cache_runtime{ |
| 175 | device, scheduler, memory_allocator, staging_pool, | 177 | device, scheduler, memory_allocator, staging_pool, |
| 176 | blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, | 178 | blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, |
| 177 | texture_cache(texture_cache_runtime, *this), | 179 | texture_cache(texture_cache_runtime, device_memory), |
| 178 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 180 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 179 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | 181 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |
| 180 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 182 | buffer_cache(device_memory, buffer_cache_runtime), |
| 181 | query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | 183 | query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler, |
| 182 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), | 184 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), |
| 183 | query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | 185 | query_cache(gpu, *this, device_memory, query_cache_runtime), |
| 184 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | 186 | pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue, |
| 185 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 187 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 186 | accelerate_dma(buffer_cache, texture_cache, scheduler), | 188 | accelerate_dma(buffer_cache, texture_cache, scheduler), |
| 187 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 189 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| @@ -508,7 +510,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in | |||
| 508 | 510 | ||
| 509 | void RasterizerVulkan::FlushAll() {} | 511 | void RasterizerVulkan::FlushAll() {} |
| 510 | 512 | ||
| 511 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 513 | void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 512 | if (addr == 0 || size == 0) { | 514 | if (addr == 0 || size == 0) { |
| 513 | return; | 515 | return; |
| 514 | } | 516 | } |
| @@ -525,7 +527,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
| 525 | } | 527 | } |
| 526 | } | 528 | } |
| 527 | 529 | ||
| 528 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 530 | bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 529 | if ((True(which & VideoCommon::CacheType::BufferCache))) { | 531 | if ((True(which & VideoCommon::CacheType::BufferCache))) { |
| 530 | std::scoped_lock lock{buffer_cache.mutex}; | 532 | std::scoped_lock lock{buffer_cache.mutex}; |
| 531 | if (buffer_cache.IsRegionGpuModified(addr, size)) { | 533 | if (buffer_cache.IsRegionGpuModified(addr, size)) { |
| @@ -542,7 +544,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
| 542 | return false; | 544 | return false; |
| 543 | } | 545 | } |
| 544 | 546 | ||
| 545 | VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { | 547 | VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) { |
| 546 | { | 548 | { |
| 547 | std::scoped_lock lock{texture_cache.mutex}; | 549 | std::scoped_lock lock{texture_cache.mutex}; |
| 548 | auto area = texture_cache.GetFlushArea(addr, size); | 550 | auto area = texture_cache.GetFlushArea(addr, size); |
| @@ -551,14 +553,14 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 | |||
| 551 | } | 553 | } |
| 552 | } | 554 | } |
| 553 | VideoCore::RasterizerDownloadArea new_area{ | 555 | VideoCore::RasterizerDownloadArea new_area{ |
| 554 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 556 | .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE), |
| 555 | .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | 557 | .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE), |
| 556 | .preemtive = true, | 558 | .preemtive = true, |
| 557 | }; | 559 | }; |
| 558 | return new_area; | 560 | return new_area; |
| 559 | } | 561 | } |
| 560 | 562 | ||
| 561 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 563 | void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 562 | if (addr == 0 || size == 0) { | 564 | if (addr == 0 || size == 0) { |
| 563 | return; | 565 | return; |
| 564 | } | 566 | } |
| @@ -578,7 +580,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 578 | } | 580 | } |
| 579 | } | 581 | } |
| 580 | 582 | ||
| 581 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | 583 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { |
| 582 | { | 584 | { |
| 583 | std::scoped_lock lock{texture_cache.mutex}; | 585 | std::scoped_lock lock{texture_cache.mutex}; |
| 584 | for (const auto& [addr, size] : sequences) { | 586 | for (const auto& [addr, size] : sequences) { |
| @@ -599,7 +601,7 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||
| 599 | } | 601 | } |
| 600 | } | 602 | } |
| 601 | 603 | ||
| 602 | bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 604 | bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) { |
| 603 | if (addr == 0 || size == 0) { | 605 | if (addr == 0 || size == 0) { |
| 604 | return false; | 606 | return false; |
| 605 | } | 607 | } |
| @@ -620,7 +622,7 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 620 | return false; | 622 | return false; |
| 621 | } | 623 | } |
| 622 | 624 | ||
| 623 | void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | 625 | void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) { |
| 624 | if (addr == 0 || size == 0) { | 626 | if (addr == 0 || size == 0) { |
| 625 | return; | 627 | return; |
| 626 | } | 628 | } |
| @@ -640,7 +642,7 @@ void RasterizerVulkan::InvalidateGPUCache() { | |||
| 640 | gpu.InvalidateGPUCache(); | 642 | gpu.InvalidateGPUCache(); |
| 641 | } | 643 | } |
| 642 | 644 | ||
| 643 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 645 | void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) { |
| 644 | { | 646 | { |
| 645 | std::scoped_lock lock{texture_cache.mutex}; | 647 | std::scoped_lock lock{texture_cache.mutex}; |
| 646 | texture_cache.UnmapMemory(addr, size); | 648 | texture_cache.UnmapMemory(addr, size); |
| @@ -679,7 +681,7 @@ void RasterizerVulkan::ReleaseFences(bool force) { | |||
| 679 | fence_manager.WaitPendingFences(force); | 681 | fence_manager.WaitPendingFences(force); |
| 680 | } | 682 | } |
| 681 | 683 | ||
| 682 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | 684 | void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size, |
| 683 | VideoCommon::CacheType which) { | 685 | VideoCommon::CacheType which) { |
| 684 | if (Settings::IsGPULevelExtreme()) { | 686 | if (Settings::IsGPULevelExtreme()) { |
| 685 | FlushRegion(addr, size, which); | 687 | FlushRegion(addr, size, which); |
| @@ -782,7 +784,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| 782 | } | 784 | } |
| 783 | 785 | ||
| 784 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 786 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 785 | VAddr framebuffer_addr, u32 pixel_stride) { | 787 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 786 | if (!framebuffer_addr) { | 788 | if (!framebuffer_addr) { |
| 787 | return false; | 789 | return false; |
| 788 | } | 790 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ad069556c..881ee0993 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -7,14 +7,13 @@ | |||
| 7 | 7 | ||
| 8 | #include <boost/container/static_vector.hpp> | 8 | #include <boost/container/static_vector.hpp> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 11 | |||
| 12 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 13 | #include "video_core/control/channel_state_cache.h" | 11 | #include "video_core/control/channel_state_cache.h" |
| 14 | #include "video_core/engines/maxwell_dma.h" | 12 | #include "video_core/engines/maxwell_dma.h" |
| 15 | #include "video_core/rasterizer_accelerated.h" | 13 | #include "video_core/host1x/gpu_device_memory_manager.h" |
| 16 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 17 | #include "video_core/renderer_vulkan/blit_image.h" | 15 | #include "video_core/renderer_vulkan/blit_image.h" |
| 16 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 17 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 19 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 18 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 20 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 19 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| @@ -34,10 +33,14 @@ namespace Core::Frontend { | |||
| 34 | class EmuWindow; | 33 | class EmuWindow; |
| 35 | } | 34 | } |
| 36 | 35 | ||
| 37 | namespace Tegra::Engines { | 36 | namespace Tegra { |
| 37 | |||
| 38 | namespace Engines { | ||
| 38 | class Maxwell3D; | 39 | class Maxwell3D; |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | } // namespace Tegra | ||
| 43 | |||
| 41 | namespace Vulkan { | 44 | namespace Vulkan { |
| 42 | 45 | ||
| 43 | struct ScreenInfo; | 46 | struct ScreenInfo; |
| @@ -70,13 +73,14 @@ private: | |||
| 70 | Scheduler& scheduler; | 73 | Scheduler& scheduler; |
| 71 | }; | 74 | }; |
| 72 | 75 | ||
| 73 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | 76 | class RasterizerVulkan final : public VideoCore::RasterizerInterface, |
| 74 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 77 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 75 | public: | 78 | public: |
| 76 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 79 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 77 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | 80 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 78 | const Device& device_, MemoryAllocator& memory_allocator_, | 81 | ScreenInfo& screen_info_, const Device& device_, |
| 79 | StateTracker& state_tracker_, Scheduler& scheduler_); | 82 | MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, |
| 83 | Scheduler& scheduler_); | ||
| 80 | ~RasterizerVulkan() override; | 84 | ~RasterizerVulkan() override; |
| 81 | 85 | ||
| 82 | void Draw(bool is_indexed, u32 instance_count) override; | 86 | void Draw(bool is_indexed, u32 instance_count) override; |
| @@ -90,18 +94,18 @@ public: | |||
| 90 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 94 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 91 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 95 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 92 | void FlushAll() override; | 96 | void FlushAll() override; |
| 93 | void FlushRegion(VAddr addr, u64 size, | 97 | void FlushRegion(DAddr addr, u64 size, |
| 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 98 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 95 | bool MustFlushRegion(VAddr addr, u64 size, | 99 | bool MustFlushRegion(DAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 97 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 101 | VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; |
| 98 | void InvalidateRegion(VAddr addr, u64 size, | 102 | void InvalidateRegion(DAddr addr, u64 size, |
| 99 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 103 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 100 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | 104 | void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override; |
| 101 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 105 | void OnCacheInvalidation(DAddr addr, u64 size) override; |
| 102 | bool OnCPUWrite(VAddr addr, u64 size) override; | 106 | bool OnCPUWrite(DAddr addr, u64 size) override; |
| 103 | void InvalidateGPUCache() override; | 107 | void InvalidateGPUCache() override; |
| 104 | void UnmapMemory(VAddr addr, u64 size) override; | 108 | void UnmapMemory(DAddr addr, u64 size) override; |
| 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 109 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 106 | void SignalFence(std::function<void()>&& func) override; | 110 | void SignalFence(std::function<void()>&& func) override; |
| 107 | void SyncOperation(std::function<void()>&& func) override; | 111 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -109,7 +113,7 @@ public: | |||
| 109 | void SignalReference() override; | 113 | void SignalReference() override; |
| 110 | void ReleaseFences(bool force = true) override; | 114 | void ReleaseFences(bool force = true) override; |
| 111 | void FlushAndInvalidateRegion( | 115 | void FlushAndInvalidateRegion( |
| 112 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 116 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 113 | void WaitForIdle() override; | 117 | void WaitForIdle() override; |
| 114 | void FragmentBarrier() override; | 118 | void FragmentBarrier() override; |
| 115 | void TiledCacheBarrier() override; | 119 | void TiledCacheBarrier() override; |
| @@ -122,7 +126,7 @@ public: | |||
| 122 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 126 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 123 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 127 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 124 | std::span<const u8> memory) override; | 128 | std::span<const u8> memory) override; |
| 125 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 129 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 126 | u32 pixel_stride) override; | 130 | u32 pixel_stride) override; |
| 127 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 131 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 128 | const VideoCore::DiskResourceLoadCallback& callback) override; | 132 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| @@ -176,6 +180,7 @@ private: | |||
| 176 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | 180 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); |
| 177 | 181 | ||
| 178 | Tegra::GPU& gpu; | 182 | Tegra::GPU& gpu; |
| 183 | Tegra::MaxwellDeviceMemoryManager& device_memory; | ||
| 179 | 184 | ||
| 180 | ScreenInfo& screen_info; | 185 | ScreenInfo& screen_info; |
| 181 | const Device& device; | 186 | const Device& device; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index e81cd031b..2af32c8f2 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 13 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| 15 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 15 | #include "video_core/memory_manager.h" | 16 | #include "video_core/memory_manager.h" |
| 16 | #include "video_core/shader_cache.h" | 17 | #include "video_core/shader_cache.h" |
| 17 | #include "video_core/shader_environment.h" | 18 | #include "video_core/shader_environment.h" |
| @@ -34,7 +35,8 @@ void ShaderCache::SyncGuestHost() { | |||
| 34 | RemovePendingShaders(); | 35 | RemovePendingShaders(); |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | 38 | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 39 | : device_memory{device_memory_} {} | ||
| 38 | 40 | ||
| 39 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | 41 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { |
| 40 | auto& dirty{maxwell3d->dirty.flags}; | 42 | auto& dirty{maxwell3d->dirty.flags}; |
| @@ -132,7 +134,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t | |||
| 132 | 134 | ||
| 133 | storage.push_back(std::move(data)); | 135 | storage.push_back(std::move(data)); |
| 134 | 136 | ||
| 135 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | 137 | device_memory.UpdatePagesCachedCount(addr, size, 1); |
| 136 | } | 138 | } |
| 137 | 139 | ||
| 138 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | 140 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { |
| @@ -209,7 +211,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { | |||
| 209 | 211 | ||
| 210 | const VAddr addr = entry->addr_start; | 212 | const VAddr addr = entry->addr_start; |
| 211 | const size_t size = entry->addr_end - addr; | 213 | const size_t size = entry->addr_end - addr; |
| 212 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | 214 | device_memory.UpdatePagesCachedCount(addr, size, -1); |
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | 217 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index a76896620..fd9bf2562 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/polyfill_ranges.h" | 15 | #include "common/polyfill_ranges.h" |
| 16 | #include "video_core/control/channel_state_cache.h" | 16 | #include "video_core/control/channel_state_cache.h" |
| 17 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 17 | #include "video_core/rasterizer_interface.h" | 18 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/shader_environment.h" | 19 | #include "video_core/shader_environment.h" |
| 19 | 20 | ||
| @@ -77,7 +78,7 @@ protected: | |||
| 77 | } | 78 | } |
| 78 | }; | 79 | }; |
| 79 | 80 | ||
| 80 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); | 81 | explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory); |
| 81 | 82 | ||
| 82 | /// @brief Update the hashes and information of shader stages | 83 | /// @brief Update the hashes and information of shader stages |
| 83 | /// @param unique_hashes Shader hashes to store into when a stage is enabled | 84 | /// @param unique_hashes Shader hashes to store into when a stage is enabled |
| @@ -145,7 +146,7 @@ private: | |||
| 145 | /// @brief Create a new shader entry and register it | 146 | /// @brief Create a new shader entry and register it |
| 146 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | 147 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); |
| 147 | 148 | ||
| 148 | VideoCore::RasterizerInterface& rasterizer; | 149 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 149 | 150 | ||
| 150 | mutable std::mutex lookup_mutex; | 151 | mutable std::mutex lookup_mutex; |
| 151 | std::mutex invalidation_mutex; | 152 | std::mutex invalidation_mutex; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..7398ed2ec 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -8,10 +8,11 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| 11 | #include "core/memory.h" | ||
| 12 | #include "video_core/control/channel_state.h" | 11 | #include "video_core/control/channel_state.h" |
| 13 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 14 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/guest_memory.h" | ||
| 15 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 15 | #include "video_core/texture_cache/image_view_base.h" | 16 | #include "video_core/texture_cache/image_view_base.h" |
| 16 | #include "video_core/texture_cache/samples_helper.h" | 17 | #include "video_core/texture_cache/samples_helper.h" |
| 17 | #include "video_core/texture_cache/texture_cache_base.h" | 18 | #include "video_core/texture_cache/texture_cache_base.h" |
| @@ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 27 | using namespace Common::Literals; | 28 | using namespace Common::Literals; |
| 28 | 29 | ||
| 29 | template <class P> | 30 | template <class P> |
| 30 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) | 31 | TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 31 | : runtime{runtime_}, rasterizer{rasterizer_} { | 32 | : runtime{runtime_}, device_memory{device_memory_} { |
| 32 | // Configure null sampler | 33 | // Configure null sampler |
| 33 | TSCEntry sampler_descriptor{}; | 34 | TSCEntry sampler_descriptor{}; |
| 34 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | 35 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
| @@ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 49 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 50 | 51 | ||
| 51 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 52 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | 53 | const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 53 | const s64 min_spacing_expected = device_memory - 1_GiB; | 54 | const s64 min_spacing_expected = device_local_memory - 1_GiB; |
| 54 | const s64 min_spacing_critical = device_memory - 512_MiB; | 55 | const s64 min_spacing_critical = device_local_memory - 512_MiB; |
| 55 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | 56 | const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); |
| 56 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | 57 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 57 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | 58 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 58 | expected_memory = static_cast<u64>( | 59 | expected_memory = static_cast<u64>( |
| 59 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | 60 | std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), |
| 60 | DEFAULT_EXPECTED_MEMORY)); | 61 | DEFAULT_EXPECTED_MEMORY)); |
| 61 | critical_memory = static_cast<u64>( | 62 | critical_memory = static_cast<u64>( |
| 62 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | 63 | std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), |
| 63 | DEFAULT_CRITICAL_MEMORY)); | 64 | DEFAULT_CRITICAL_MEMORY)); |
| 64 | minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); | 65 | minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2); |
| 65 | } else { | 66 | } else { |
| 66 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 67 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 67 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 68 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| @@ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | |||
| 513 | } | 514 | } |
| 514 | 515 | ||
| 515 | template <class P> | 516 | template <class P> |
| 516 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | 517 | void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) { |
| 517 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | 518 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |
| 518 | if (True(image.flags & ImageFlagBits::CpuModified)) { | 519 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 519 | return; | 520 | return; |
| @@ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 526 | } | 527 | } |
| 527 | 528 | ||
| 528 | template <class P> | 529 | template <class P> |
| 529 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 530 | void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { |
| 530 | boost::container::small_vector<ImageId, 16> images; | 531 | boost::container::small_vector<ImageId, 16> images; |
| 531 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 532 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 532 | if (!image.IsSafeDownload()) { | 533 | if (!image.IsSafeDownload()) { |
| @@ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 553 | } | 554 | } |
| 554 | 555 | ||
| 555 | template <class P> | 556 | template <class P> |
| 556 | std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, | 557 | std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr, |
| 557 | u64 size) { | 558 | u64 size) { |
| 558 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 559 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| 559 | ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { | 560 | ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { |
| @@ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 579 | } | 580 | } |
| 580 | 581 | ||
| 581 | template <class P> | 582 | template <class P> |
| 582 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 583 | void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) { |
| 583 | boost::container::small_vector<ImageId, 16> deleted_images; | 584 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 584 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 585 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 585 | for (const ImageId id : deleted_images) { | 586 | for (const ImageId id : deleted_images) { |
| @@ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 713 | 714 | ||
| 714 | template <class P> | 715 | template <class P> |
| 715 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( | 716 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( |
| 716 | const Tegra::FramebufferConfig& config, VAddr cpu_addr) { | 717 | const Tegra::FramebufferConfig& config, DAddr cpu_addr) { |
| 717 | // TODO: Properly implement this | 718 | // TODO: Properly implement this |
| 718 | const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); | 719 | const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); |
| 719 | if (it == page_table.end()) { | 720 | if (it == page_table.end()) { |
| @@ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep | |||
| 940 | } | 941 | } |
| 941 | 942 | ||
| 942 | template <class P> | 943 | template <class P> |
| 943 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 944 | bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 944 | bool is_modified = false; | 945 | bool is_modified = false; |
| 945 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | 946 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| 946 | if (False(image.flags & ImageFlagBits::GpuModified)) { | 947 | if (False(image.flags & ImageFlagBits::GpuModified)) { |
| @@ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 1059 | return; | 1060 | return; |
| 1060 | } | 1061 | } |
| 1061 | 1062 | ||
| 1062 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | 1063 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1063 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | 1064 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1064 | 1065 | ||
| 1065 | if (True(image.flags & ImageFlagBits::Converted)) { | 1066 | if (True(image.flags & ImageFlagBits::Converted)) { |
| @@ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| 1124 | template <class P> | 1125 | template <class P> |
| 1125 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1126 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1126 | RelaxedOptions options) { | 1127 | RelaxedOptions options) { |
| 1127 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1128 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1128 | if (!cpu_addr) { | 1129 | if (!cpu_addr) { |
| 1129 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | 1130 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1130 | if (!cpu_addr) { | 1131 | if (!cpu_addr) { |
| @@ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | |||
| 1265 | 1266 | ||
| 1266 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; | 1267 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; |
| 1267 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | 1268 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1268 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | 1269 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1269 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | 1270 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1270 | 1271 | ||
| 1271 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | 1272 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, |
| @@ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) { | |||
| 1339 | template <class P> | 1340 | template <class P> |
| 1340 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1341 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1341 | RelaxedOptions options) { | 1342 | RelaxedOptions options) { |
| 1342 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1343 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1343 | if (!cpu_addr) { | 1344 | if (!cpu_addr) { |
| 1344 | const auto size = CalculateGuestSizeInBytes(info); | 1345 | const auto size = CalculateGuestSizeInBytes(info); |
| 1345 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); | 1346 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); |
| 1346 | if (!cpu_addr) { | 1347 | if (!cpu_addr) { |
| 1347 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | 1348 | const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; |
| 1348 | virtual_invalid_space += Common::AlignUp(size, 32); | 1349 | virtual_invalid_space += Common::AlignUp(size, 32); |
| 1349 | cpu_addr = std::optional<VAddr>(fake_addr); | 1350 | cpu_addr = std::optional<DAddr>(fake_addr); |
| 1350 | } | 1351 | } |
| 1351 | } | 1352 | } |
| 1352 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | 1353 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
| @@ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1362 | } | 1363 | } |
| 1363 | 1364 | ||
| 1364 | template <class P> | 1365 | template <class P> |
| 1365 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | 1366 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { |
| 1366 | ImageInfo new_info = info; | 1367 | ImageInfo new_info = info; |
| 1367 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | 1368 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |
| 1368 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1369 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| @@ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag | |||
| 1650 | 1651 | ||
| 1651 | template <class P> | 1652 | template <class P> |
| 1652 | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | 1653 | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { |
| 1653 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1654 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1654 | if (!cpu_addr) { | 1655 | if (!cpu_addr) { |
| 1655 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | 1656 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1656 | if (!cpu_addr) { | 1657 | if (!cpu_addr) { |
| @@ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd | |||
| 1780 | 1781 | ||
| 1781 | template <class P> | 1782 | template <class P> |
| 1782 | template <typename Func> | 1783 | template <typename Func> |
| 1783 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | 1784 | void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) { |
| 1784 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | 1785 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |
| 1785 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1786 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1786 | boost::container::small_vector<ImageId, 32> images; | 1787 | boost::container::small_vector<ImageId, 32> images; |
| @@ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, | |||
| 1924 | template <class P> | 1925 | template <class P> |
| 1925 | template <typename Func> | 1926 | template <typename Func> |
| 1926 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | 1927 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { |
| 1927 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | 1928 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type; |
| 1928 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | 1929 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; |
| 1929 | const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | 1930 | const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |
| 1930 | for (const auto& [gpu_addr, size] : segments) { | 1931 | for (const auto& [gpu_addr, size] : segments) { |
| 1931 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1932 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1932 | ASSERT(cpu_addr); | 1933 | ASSERT(cpu_addr); |
| 1933 | if constexpr (RETURNS_BOOL) { | 1934 | if constexpr (RETURNS_BOOL) { |
| 1934 | if (func(gpu_addr, *cpu_addr, size)) { | 1935 | if (func(gpu_addr, *cpu_addr, size)) { |
| @@ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1980 | } | 1981 | } |
| 1981 | boost::container::small_vector<ImageViewId, 16> sparse_maps; | 1982 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1982 | ForEachSparseSegment( | 1983 | ForEachSparseSegment( |
| 1983 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1984 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { |
| 1984 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1985 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| 1985 | ForEachCPUPage(cpu_addr, size, | 1986 | ForEachCPUPage(cpu_addr, size, |
| 1986 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | 1987 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); |
| @@ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 2048 | auto& sparse_maps = it->second; | 2049 | auto& sparse_maps = it->second; |
| 2049 | for (auto& map_view_id : sparse_maps) { | 2050 | for (auto& map_view_id : sparse_maps) { |
| 2050 | const auto& map_range = slot_map_views[map_view_id]; | 2051 | const auto& map_range = slot_map_views[map_view_id]; |
| 2051 | const VAddr cpu_addr = map_range.cpu_addr; | 2052 | const DAddr cpu_addr = map_range.cpu_addr; |
| 2052 | const std::size_t size = map_range.size; | 2053 | const std::size_t size = map_range.size; |
| 2053 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | 2054 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { |
| 2054 | const auto page_it = page_table.find(page); | 2055 | const auto page_it = page_table.find(page); |
| @@ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2080 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 2081 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 2081 | image.flags |= ImageFlagBits::Tracked; | 2082 | image.flags |= ImageFlagBits::Tracked; |
| 2082 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2083 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2083 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 2084 | device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 2084 | return; | 2085 | return; |
| 2085 | } | 2086 | } |
| 2086 | if (True(image.flags & ImageFlagBits::Registered)) { | 2087 | if (True(image.flags & ImageFlagBits::Registered)) { |
| @@ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2089 | auto& sparse_maps = it->second; | 2090 | auto& sparse_maps = it->second; |
| 2090 | for (auto& map_view_id : sparse_maps) { | 2091 | for (auto& map_view_id : sparse_maps) { |
| 2091 | const auto& map = slot_map_views[map_view_id]; | 2092 | const auto& map = slot_map_views[map_view_id]; |
| 2092 | const VAddr cpu_addr = map.cpu_addr; | 2093 | const DAddr cpu_addr = map.cpu_addr; |
| 2093 | const std::size_t size = map.size; | 2094 | const std::size_t size = map.size; |
| 2094 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2095 | device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 2095 | } | 2096 | } |
| 2096 | return; | 2097 | return; |
| 2097 | } | 2098 | } |
| 2098 | ForEachSparseSegment(image, | 2099 | ForEachSparseSegment(image, |
| 2099 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 2100 | [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { |
| 2100 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2101 | device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 2101 | }); | 2102 | }); |
| 2102 | } | 2103 | } |
| 2103 | 2104 | ||
| @@ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2106 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | 2107 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 2107 | image.flags &= ~ImageFlagBits::Tracked; | 2108 | image.flags &= ~ImageFlagBits::Tracked; |
| 2108 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2109 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2109 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | 2110 | device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); |
| 2110 | return; | 2111 | return; |
| 2111 | } | 2112 | } |
| 2112 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | 2113 | ASSERT(True(image.flags & ImageFlagBits::Registered)); |
| @@ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2115 | auto& sparse_maps = it->second; | 2116 | auto& sparse_maps = it->second; |
| 2116 | for (auto& map_view_id : sparse_maps) { | 2117 | for (auto& map_view_id : sparse_maps) { |
| 2117 | const auto& map = slot_map_views[map_view_id]; | 2118 | const auto& map = slot_map_views[map_view_id]; |
| 2118 | const VAddr cpu_addr = map.cpu_addr; | 2119 | const DAddr cpu_addr = map.cpu_addr; |
| 2119 | const std::size_t size = map.size; | 2120 | const std::size_t size = map.size; |
| 2120 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 2121 | device_memory.UpdatePagesCachedCount(cpu_addr, size, -1); |
| 2121 | } | 2122 | } |
| 2122 | } | 2123 | } |
| 2123 | 2124 | ||
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 6caf75b46..8699d40d4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -36,9 +36,11 @@ | |||
| 36 | #include "video_core/texture_cache/types.h" | 36 | #include "video_core/texture_cache/types.h" |
| 37 | #include "video_core/textures/texture.h" | 37 | #include "video_core/textures/texture.h" |
| 38 | 38 | ||
| 39 | namespace Tegra::Control { | 39 | namespace Tegra { |
| 40 | namespace Control { | ||
| 40 | struct ChannelState; | 41 | struct ChannelState; |
| 41 | } | 42 | } |
| 43 | } // namespace Tegra | ||
| 42 | 44 | ||
| 43 | namespace VideoCommon { | 45 | namespace VideoCommon { |
| 44 | 46 | ||
| @@ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
| 126 | }; | 128 | }; |
| 127 | 129 | ||
| 128 | public: | 130 | public: |
| 129 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); | 131 | explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&); |
| 130 | 132 | ||
| 131 | /// Notify the cache that a new frame has been queued | 133 | /// Notify the cache that a new frame has been queued |
| 132 | void TickFrame(); | 134 | void TickFrame(); |
| @@ -190,15 +192,15 @@ public: | |||
| 190 | Framebuffer* GetFramebuffer(); | 192 | Framebuffer* GetFramebuffer(); |
| 191 | 193 | ||
| 192 | /// Mark images in a range as modified from the CPU | 194 | /// Mark images in a range as modified from the CPU |
| 193 | void WriteMemory(VAddr cpu_addr, size_t size); | 195 | void WriteMemory(DAddr cpu_addr, size_t size); |
| 194 | 196 | ||
| 195 | /// Download contents of host images to guest memory in a region | 197 | /// Download contents of host images to guest memory in a region |
| 196 | void DownloadMemory(VAddr cpu_addr, size_t size); | 198 | void DownloadMemory(DAddr cpu_addr, size_t size); |
| 197 | 199 | ||
| 198 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 200 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size); |
| 199 | 201 | ||
| 200 | /// Remove images in a region | 202 | /// Remove images in a region |
| 201 | void UnmapMemory(VAddr cpu_addr, size_t size); | 203 | void UnmapMemory(DAddr cpu_addr, size_t size); |
| 202 | 204 | ||
| 203 | /// Remove images in a region | 205 | /// Remove images in a region |
| 204 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | 206 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); |
| @@ -210,7 +212,7 @@ public: | |||
| 210 | 212 | ||
| 211 | /// Try to find a cached image view in the given CPU address | 213 | /// Try to find a cached image view in the given CPU address |
| 212 | [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, | 214 | [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, |
| 213 | VAddr cpu_addr); | 215 | DAddr cpu_addr); |
| 214 | 216 | ||
| 215 | /// Return true when there are uncommitted images to be downloaded | 217 | /// Return true when there are uncommitted images to be downloaded |
| 216 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | 218 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| @@ -235,7 +237,7 @@ public: | |||
| 235 | GPUVAddr address = 0, size_t size = 0); | 237 | GPUVAddr address = 0, size_t size = 0); |
| 236 | 238 | ||
| 237 | /// Return true when a CPU region is modified from the GPU | 239 | /// Return true when a CPU region is modified from the GPU |
| 238 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 240 | [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); |
| 239 | 241 | ||
| 240 | [[nodiscard]] bool IsRescaling() const noexcept; | 242 | [[nodiscard]] bool IsRescaling() const noexcept; |
| 241 | 243 | ||
| @@ -252,7 +254,7 @@ public: | |||
| 252 | private: | 254 | private: |
| 253 | /// Iterate over all page indices in a range | 255 | /// Iterate over all page indices in a range |
| 254 | template <typename Func> | 256 | template <typename Func> |
| 255 | static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | 257 | static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) { |
| 256 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | 258 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 257 | const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; | 259 | const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; |
| 258 | for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { | 260 | for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { |
| @@ -326,7 +328,7 @@ private: | |||
| 326 | 328 | ||
| 327 | /// Create a new image and join perfectly matching existing images | 329 | /// Create a new image and join perfectly matching existing images |
| 328 | /// Remove joined images from the cache | 330 | /// Remove joined images from the cache |
| 329 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 331 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr); |
| 330 | 332 | ||
| 331 | [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); | 333 | [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); |
| 332 | 334 | ||
| @@ -349,7 +351,7 @@ private: | |||
| 349 | 351 | ||
| 350 | /// Iterates over all the images in a region calling func | 352 | /// Iterates over all the images in a region calling func |
| 351 | template <typename Func> | 353 | template <typename Func> |
| 352 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | 354 | void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func); |
| 353 | 355 | ||
| 354 | template <typename Func> | 356 | template <typename Func> |
| 355 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | 357 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); |
| @@ -421,7 +423,7 @@ private: | |||
| 421 | 423 | ||
| 422 | Runtime& runtime; | 424 | Runtime& runtime; |
| 423 | 425 | ||
| 424 | VideoCore::RasterizerInterface& rasterizer; | 426 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 425 | std::deque<TextureCacheGPUMap> gpu_page_table_storage; | 427 | std::deque<TextureCacheGPUMap> gpu_page_table_storage; |
| 426 | 428 | ||
| 427 | RenderTargets render_targets; | 429 | RenderTargets render_targets; |
| @@ -432,7 +434,7 @@ private: | |||
| 432 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 434 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 433 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | 435 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 434 | 436 | ||
| 435 | VAddr virtual_invalid_space{}; | 437 | DAddr virtual_invalid_space{}; |
| 436 | 438 | ||
| 437 | bool has_deleted_images = false; | 439 | bool has_deleted_images = false; |
| 438 | bool is_rescaling = false; | 440 | bool is_rescaling = false; |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index fcf70068e..1a6f0d1ad 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -20,9 +20,9 @@ | |||
| 20 | #include "common/div_ceil.h" | 20 | #include "common/div_ceil.h" |
| 21 | #include "common/scratch_buffer.h" | 21 | #include "common/scratch_buffer.h" |
| 22 | #include "common/settings.h" | 22 | #include "common/settings.h" |
| 23 | #include "core/memory.h" | ||
| 24 | #include "video_core/compatible_formats.h" | 23 | #include "video_core/compatible_formats.h" |
| 25 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/guest_memory.h" | ||
| 26 | #include "video_core/memory_manager.h" | 26 | #include "video_core/memory_manager.h" |
| 27 | #include "video_core/surface.h" | 27 | #include "video_core/surface.h" |
| 28 | #include "video_core/texture_cache/decode_bc.h" | 28 | #include "video_core/texture_cache/decode_bc.h" |
| @@ -552,7 +552,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 552 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 552 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 553 | const std::span<const u8> src = input.subspan(host_offset); | 553 | const std::span<const u8> src = input.subspan(host_offset); |
| 554 | { | 554 | { |
| 555 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | 555 | Tegra::Memory::GpuGuestMemoryScoped<u8, |
| 556 | Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||
| 556 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); | 557 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); |
| 557 | 558 | ||
| 558 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 559 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index b42d48416..0efb7b49d 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/settings.h" | 7 | #include "common/settings.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 10 | #include "video_core/host1x/host1x.h" | ||
| 9 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 10 | #include "video_core/renderer_null/renderer_null.h" | 12 | #include "video_core/renderer_null/renderer_null.h" |
| 11 | #include "video_core/renderer_opengl/renderer_opengl.h" | 13 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| @@ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | |||
| 18 | Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | 20 | Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 19 | std::unique_ptr<Core::Frontend::GraphicsContext> context) { | 21 | std::unique_ptr<Core::Frontend::GraphicsContext> context) { |
| 20 | auto& telemetry_session = system.TelemetrySession(); | 22 | auto& telemetry_session = system.TelemetrySession(); |
| 21 | auto& cpu_memory = system.ApplicationMemory(); | 23 | auto& device_memory = system.Host1x().MemoryManager(); |
| 22 | 24 | ||
| 23 | switch (Settings::values.renderer_backend.GetValue()) { | 25 | switch (Settings::values.renderer_backend.GetValue()) { |
| 24 | case Settings::RendererBackend::OpenGL: | 26 | case Settings::RendererBackend::OpenGL: |
| 25 | return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, | 27 | return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, |
| 26 | gpu, std::move(context)); | 28 | device_memory, gpu, std::move(context)); |
| 27 | case Settings::RendererBackend::Vulkan: | 29 | case Settings::RendererBackend::Vulkan: |
| 28 | return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, | 30 | return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, |
| 29 | gpu, std::move(context)); | 31 | device_memory, gpu, std::move(context)); |
| 30 | case Settings::RendererBackend::Null: | 32 | case Settings::RendererBackend::Null: |
| 31 | return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, | 33 | return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context)); |
| 32 | std::move(context)); | ||
| 33 | default: | 34 | default: |
| 34 | return nullptr; | 35 | return nullptr; |
| 35 | } | 36 | } |