diff options
| author | 2023-12-25 07:32:16 +0100 | |
|---|---|---|
| committer | 2024-01-18 21:12:30 -0500 | |
| commit | 0a2536a0df1f4aea406f2132d3edda0430acc9d1 (patch) | |
| tree | c0ad53890581c9c7e180c5ccb3b66e3c63e3ba64 | |
| parent | SMMU: Implement backing CPU page protect/unprotect (diff) | |
| download | yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.gz yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.xz yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.zip | |
SMMU: Initial adaptation to video_core.
79 files changed, 1262 insertions, 1263 deletions
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp index 3c214ec00..d9fc8c3e0 100644 --- a/src/audio_core/device/device_session.cpp +++ b/src/audio_core/device/device_session.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| 11 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 12 | #include "core/guest_memory.h" | ||
| 12 | 13 | ||
| 13 | #include "core/hle/kernel/k_process.h" | 14 | #include "core/hle/kernel/k_process.h" |
| 14 | 15 | ||
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index 911dae3c1..77a33a87a 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/logging/log.h" | 10 | #include "common/logging/log.h" |
| 11 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| 12 | #include "core/memory.h" | 12 | #include "core/memory.h" |
| 13 | #include "core/guest_memory.h" | ||
| 13 | 14 | ||
| 14 | namespace AudioCore::Renderer { | 15 | namespace AudioCore::Renderer { |
| 15 | 16 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 293d9647b..ca54eb6c6 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -37,6 +37,8 @@ add_library(core STATIC | |||
| 37 | debugger/gdbstub_arch.h | 37 | debugger/gdbstub_arch.h |
| 38 | debugger/gdbstub.cpp | 38 | debugger/gdbstub.cpp |
| 39 | debugger/gdbstub.h | 39 | debugger/gdbstub.h |
| 40 | device_memory_manager.h | ||
| 41 | device_memory_manager.inc | ||
| 40 | device_memory.cpp | 42 | device_memory.cpp |
| 41 | device_memory.h | 43 | device_memory.h |
| 42 | file_sys/fssystem/fs_i_storage.h | 44 | file_sys/fssystem/fs_i_storage.h |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 461eea9c8..04e1f13ff 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -651,7 +651,7 @@ size_t System::GetCurrentHostThreadID() const { | |||
| 651 | return impl->kernel.GetCurrentHostThreadID(); | 651 | return impl->kernel.GetCurrentHostThreadID(); |
| 652 | } | 652 | } |
| 653 | 653 | ||
| 654 | void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | 654 | void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) { |
| 655 | return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); | 655 | return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); |
| 656 | } | 656 | } |
| 657 | 657 | ||
diff --git a/src/core/core.h b/src/core/core.h index ba5add0dc..20ec2ffff 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -224,7 +224,7 @@ public: | |||
| 224 | /// Prepare the core emulation for a reschedule | 224 | /// Prepare the core emulation for a reschedule |
| 225 | void PrepareReschedule(u32 core_index); | 225 | void PrepareReschedule(u32 core_index); |
| 226 | 226 | ||
| 227 | void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | 227 | void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback); |
| 228 | 228 | ||
| 229 | [[nodiscard]] size_t GetCurrentHostThreadID() const; | 229 | [[nodiscard]] size_t GetCurrentHostThreadID() const; |
| 230 | 230 | ||
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 71b95016c..1a63cbd09 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h | |||
| @@ -3,10 +3,11 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <deque> | ||
| 7 | #include <memory> | ||
| 8 | #include <array> | 6 | #include <array> |
| 9 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <deque> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 10 | 11 | ||
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "common/virtual_buffer.h" | 13 | #include "common/virtual_buffer.h" |
| @@ -48,26 +49,54 @@ public: | |||
| 48 | template <typename T> | 49 | template <typename T> |
| 49 | const T* GetPointer(DAddr address) const; | 50 | const T* GetPointer(DAddr address) const; |
| 50 | 51 | ||
| 52 | DAddr GetAddressFromPAddr(PAddr address) const { | ||
| 53 | DAddr subbits = static_cast<DAddr>(address & page_mask); | ||
| 54 | return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; | ||
| 55 | } | ||
| 56 | |||
| 57 | PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { | ||
| 58 | PAddr subbits = static_cast<PAddr>(address & page_mask); | ||
| 59 | auto paddr = compressed_physical_ptr[(address >> page_bits)]; | ||
| 60 | if (paddr == 0) { | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits; | ||
| 64 | } | ||
| 65 | |||
| 51 | template <typename T> | 66 | template <typename T> |
| 52 | void Write(DAddr address, T value); | 67 | void Write(DAddr address, T value); |
| 53 | 68 | ||
| 54 | template <typename T> | 69 | template <typename T> |
| 55 | T Read(DAddr address) const; | 70 | T Read(DAddr address) const; |
| 56 | 71 | ||
| 72 | const u8* GetSpan(const DAddr src_addr, const std::size_t size) const { | ||
| 73 | return nullptr; | ||
| 74 | } | ||
| 75 | |||
| 76 | u8* GetSpan(const DAddr src_addr, const std::size_t size) { | ||
| 77 | return nullptr; | ||
| 78 | } | ||
| 79 | |||
| 57 | void ReadBlock(DAddr address, void* dest_pointer, size_t size); | 80 | void ReadBlock(DAddr address, void* dest_pointer, size_t size); |
| 58 | void WriteBlock(DAddr address, void* src_pointer, size_t size); | 81 | void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); |
| 82 | void WriteBlock(DAddr address, const void* src_pointer, size_t size); | ||
| 83 | void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); | ||
| 59 | 84 | ||
| 60 | size_t RegisterProcess(Memory::Memory* memory); | 85 | size_t RegisterProcess(Memory::Memory* memory); |
| 61 | void UnregisterProcess(size_t id); | 86 | void UnregisterProcess(size_t id); |
| 62 | 87 | ||
| 63 | void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | 88 | void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); |
| 64 | 89 | ||
| 90 | static constexpr size_t AS_BITS = Traits::device_virtual_bits; | ||
| 91 | |||
| 65 | private: | 92 | private: |
| 66 | static constexpr bool supports_pinning = Traits::supports_pinning; | 93 | static constexpr bool supports_pinning = Traits::supports_pinning; |
| 67 | static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | 94 | static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; |
| 68 | static constexpr size_t device_as_size = 1ULL << device_virtual_bits; | 95 | static constexpr size_t device_as_size = 1ULL << device_virtual_bits; |
| 69 | static constexpr size_t physical_max_bits = 33; | 96 | static constexpr size_t physical_max_bits = 33; |
| 70 | static constexpr size_t page_bits = 12; | 97 | static constexpr size_t page_bits = 12; |
| 98 | static constexpr size_t page_size = 1ULL << page_bits; | ||
| 99 | static constexpr size_t page_mask = page_size - 1ULL; | ||
| 71 | static constexpr u32 physical_address_base = 1U << page_bits; | 100 | static constexpr u32 physical_address_base = 1U << page_bits; |
| 72 | 101 | ||
| 73 | template <typename T> | 102 | template <typename T> |
| @@ -136,11 +165,15 @@ private: | |||
| 136 | private: | 165 | private: |
| 137 | std::array<std::atomic_uint16_t, subentries> values{}; | 166 | std::array<std::atomic_uint16_t, subentries> values{}; |
| 138 | }; | 167 | }; |
| 139 | static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); | 168 | static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), |
| 169 | "CounterEntry should be 8 bytes!"); | ||
| 140 | 170 | ||
| 141 | static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; | 171 | static constexpr size_t num_counter_entries = |
| 172 | (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||
| 142 | using CachedPages = std::array<CounterEntry, num_counter_entries>; | 173 | using CachedPages = std::array<CounterEntry, num_counter_entries>; |
| 143 | std::unique_ptr<CachedPages> cached_pages; | 174 | std::unique_ptr<CachedPages> cached_pages; |
| 175 | std::mutex counter_guard; | ||
| 176 | std::mutex mapping_guard; | ||
| 144 | }; | 177 | }; |
| 145 | 178 | ||
| 146 | } // namespace Core \ No newline at end of file | 179 | } // namespace Core \ No newline at end of file |
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 77410f72f..8c5f82d31 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc | |||
| @@ -105,7 +105,8 @@ template <typename Traits> | |||
| 105 | DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) | 105 | DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) |
| 106 | : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, | 106 | : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, |
| 107 | interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | 107 | interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), |
| 108 | compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { | 108 | compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)), |
| 109 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | ||
| 109 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | 110 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |
| 110 | cached_pages = std::make_unique<CachedPages>(); | 111 | cached_pages = std::make_unique<CachedPages>(); |
| 111 | } | 112 | } |
| @@ -144,10 +145,10 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
| 144 | Core::Memory::Memory* process_memory = registered_processes[process_id]; | 145 | Core::Memory::Memory* process_memory = registered_processes[process_id]; |
| 145 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | 146 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |
| 146 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | 147 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |
| 147 | std::atomic_thread_fence(std::memory_order_acquire); | 148 | std::scoped_lock lk(mapping_guard); |
| 148 | for (size_t i = 0; i < num_pages; i++) { | 149 | for (size_t i = 0; i < num_pages; i++) { |
| 149 | const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; | 150 | const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; |
| 150 | auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); | 151 | auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); |
| 151 | if (ptr == nullptr) [[unlikely]] { | 152 | if (ptr == nullptr) [[unlikely]] { |
| 152 | compressed_physical_ptr[start_page_d + i] = 0; | 153 | compressed_physical_ptr[start_page_d + i] = 0; |
| 153 | continue; | 154 | continue; |
| @@ -157,14 +158,14 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
| 157 | compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | 158 | compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); |
| 158 | InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | 159 | InsertCPUBacking(start_page_d + i, new_vaddress, process_id); |
| 159 | } | 160 | } |
| 160 | std::atomic_thread_fence(std::memory_order_release); | ||
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | template <typename Traits> | 163 | template <typename Traits> |
| 164 | void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | 164 | void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { |
| 165 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | 165 | size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |
| 166 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | 166 | size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |
| 167 | std::atomic_thread_fence(std::memory_order_acquire); | 167 | interface->InvalidateRegion(address, size); |
| 168 | std::scoped_lock lk(mapping_guard); | ||
| 168 | for (size_t i = 0; i < num_pages; i++) { | 169 | for (size_t i = 0; i < num_pages; i++) { |
| 169 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; | 170 | auto phys_addr = compressed_physical_ptr[start_page_d + i]; |
| 170 | compressed_physical_ptr[start_page_d + i] = 0; | 171 | compressed_physical_ptr[start_page_d + i] = 0; |
| @@ -173,7 +174,6 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | |||
| 173 | compressed_device_addr[phys_addr - 1] = 0; | 174 | compressed_device_addr[phys_addr - 1] = 0; |
| 174 | } | 175 | } |
| 175 | } | 176 | } |
| 176 | std::atomic_thread_fence(std::memory_order_release); | ||
| 177 | } | 177 | } |
| 178 | 178 | ||
| 179 | template <typename Traits> | 179 | template <typename Traits> |
| @@ -256,6 +256,45 @@ void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto o | |||
| 256 | 256 | ||
| 257 | template <typename Traits> | 257 | template <typename Traits> |
| 258 | void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { | 258 | void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { |
| 259 | interface->FlushRegion(address, size); | ||
| 260 | WalkBlock( | ||
| 261 | address, size, | ||
| 262 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 263 | LOG_ERROR( | ||
| 264 | HW_Memory, | ||
| 265 | "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 266 | current_vaddr, address, size); | ||
| 267 | std::memset(dest_pointer, 0, copy_amount); | ||
| 268 | }, | ||
| 269 | [&](size_t copy_amount, const u8* const src_ptr) { | ||
| 270 | std::memcpy(dest_pointer, src_ptr, copy_amount); | ||
| 271 | }, | ||
| 272 | [&](const std::size_t copy_amount) { | ||
| 273 | dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; | ||
| 274 | }); | ||
| 275 | } | ||
| 276 | |||
| 277 | template <typename Traits> | ||
| 278 | void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) { | ||
| 279 | WalkBlock( | ||
| 280 | address, size, | ||
| 281 | [&](size_t copy_amount, DAddr current_vaddr) { | ||
| 282 | LOG_ERROR( | ||
| 283 | HW_Memory, | ||
| 284 | "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 285 | current_vaddr, address, size); | ||
| 286 | }, | ||
| 287 | [&](size_t copy_amount, u8* const dst_ptr) { | ||
| 288 | std::memcpy(dst_ptr, src_pointer, copy_amount); | ||
| 289 | }, | ||
| 290 | [&](const std::size_t copy_amount) { | ||
| 291 | src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||
| 292 | }); | ||
| 293 | interface->InvalidateRegion(address, size); | ||
| 294 | } | ||
| 295 | |||
| 296 | template <typename Traits> | ||
| 297 | void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { | ||
| 259 | WalkBlock( | 298 | WalkBlock( |
| 260 | address, size, | 299 | address, size, |
| 261 | [&](size_t copy_amount, DAddr current_vaddr) { | 300 | [&](size_t copy_amount, DAddr current_vaddr) { |
| @@ -274,7 +313,8 @@ void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, s | |||
| 274 | } | 313 | } |
| 275 | 314 | ||
| 276 | template <typename Traits> | 315 | template <typename Traits> |
| 277 | void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, size_t size) { | 316 | void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer, |
| 317 | size_t size) { | ||
| 278 | WalkBlock( | 318 | WalkBlock( |
| 279 | address, size, | 319 | address, size, |
| 280 | [&](size_t copy_amount, DAddr current_vaddr) { | 320 | [&](size_t copy_amount, DAddr current_vaddr) { |
| @@ -287,7 +327,7 @@ void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, s | |||
| 287 | std::memcpy(dst_ptr, src_pointer, copy_amount); | 327 | std::memcpy(dst_ptr, src_pointer, copy_amount); |
| 288 | }, | 328 | }, |
| 289 | [&](const std::size_t copy_amount) { | 329 | [&](const std::size_t copy_amount) { |
| 290 | src_pointer = static_cast<u8*>(src_pointer) + copy_amount; | 330 | src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; |
| 291 | }); | 331 | }); |
| 292 | } | 332 | } |
| 293 | 333 | ||
| @@ -313,6 +353,18 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) { | |||
| 313 | 353 | ||
| 314 | template <typename Traits> | 354 | template <typename Traits> |
| 315 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | 355 | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { |
| 356 | bool locked = false; | ||
| 357 | auto lock = [&] { | ||
| 358 | if (!locked) { | ||
| 359 | counter_guard.lock(); | ||
| 360 | locked = true; | ||
| 361 | } | ||
| 362 | }; | ||
| 363 | SCOPE_EXIT({ | ||
| 364 | if (locked) { | ||
| 365 | counter_guard.unlock(); | ||
| 366 | } | ||
| 367 | }); | ||
| 316 | u64 uncache_begin = 0; | 368 | u64 uncache_begin = 0; |
| 317 | u64 cache_begin = 0; | 369 | u64 cache_begin = 0; |
| 318 | u64 uncache_bytes = 0; | 370 | u64 uncache_bytes = 0; |
| @@ -347,6 +399,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 347 | } | 399 | } |
| 348 | uncache_bytes += Memory::YUZU_PAGESIZE; | 400 | uncache_bytes += Memory::YUZU_PAGESIZE; |
| 349 | } else if (uncache_bytes > 0) { | 401 | } else if (uncache_bytes > 0) { |
| 402 | lock(); | ||
| 350 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, | 403 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, |
| 351 | uncache_bytes, false); | 404 | uncache_bytes, false); |
| 352 | uncache_bytes = 0; | 405 | uncache_bytes = 0; |
| @@ -357,6 +410,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 357 | } | 410 | } |
| 358 | cache_bytes += Memory::YUZU_PAGESIZE; | 411 | cache_bytes += Memory::YUZU_PAGESIZE; |
| 359 | } else if (cache_bytes > 0) { | 412 | } else if (cache_bytes > 0) { |
| 413 | lock(); | ||
| 360 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | 414 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, |
| 361 | true); | 415 | true); |
| 362 | cache_bytes = 0; | 416 | cache_bytes = 0; |
| @@ -364,10 +418,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 364 | vpage++; | 418 | vpage++; |
| 365 | } | 419 | } |
| 366 | if (uncache_bytes > 0) { | 420 | if (uncache_bytes > 0) { |
| 421 | lock(); | ||
| 367 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | 422 | MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, |
| 368 | false); | 423 | false); |
| 369 | } | 424 | } |
| 370 | if (cache_bytes > 0) { | 425 | if (cache_bytes > 0) { |
| 426 | lock(); | ||
| 371 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | 427 | MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, |
| 372 | true); | 428 | true); |
| 373 | } | 429 | } |
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9687531e8..f1abf4f83 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -23,7 +23,7 @@ public: | |||
| 23 | 23 | ||
| 24 | ~GPUDirtyMemoryManager() = default; | 24 | ~GPUDirtyMemoryManager() = default; |
| 25 | 25 | ||
| 26 | void Collect(VAddr address, size_t size) { | 26 | void Collect(PAddr address, size_t size) { |
| 27 | TransformAddress t = BuildTransform(address, size); | 27 | TransformAddress t = BuildTransform(address, size); |
| 28 | TransformAddress tmp, original; | 28 | TransformAddress tmp, original; |
| 29 | do { | 29 | do { |
| @@ -47,7 +47,7 @@ public: | |||
| 47 | std::memory_order_relaxed)); | 47 | std::memory_order_relaxed)); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | void Gather(std::function<void(VAddr, size_t)>& callback) { | 50 | void Gather(std::function<void(PAddr, size_t)>& callback) { |
| 51 | { | 51 | { |
| 52 | std::scoped_lock lk(guard); | 52 | std::scoped_lock lk(guard); |
| 53 | TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | 53 | TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); |
| @@ -65,7 +65,7 @@ public: | |||
| 65 | mask = mask >> empty_bits; | 65 | mask = mask >> empty_bits; |
| 66 | 66 | ||
| 67 | const size_t continuous_bits = std::countr_one(mask); | 67 | const size_t continuous_bits = std::countr_one(mask); |
| 68 | callback((static_cast<VAddr>(transform.address) << page_bits) + offset, | 68 | callback((static_cast<PAddr>(transform.address) << page_bits) + offset, |
| 69 | continuous_bits << align_bits); | 69 | continuous_bits << align_bits); |
| 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
| 71 | offset += continuous_bits << align_bits; | 71 | offset += continuous_bits << align_bits; |
| @@ -89,7 +89,7 @@ private: | |||
| 89 | constexpr static size_t align_mask = align_size - 1; | 89 | constexpr static size_t align_mask = align_size - 1; |
| 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; |
| 91 | 91 | ||
| 92 | bool IsValid(VAddr address) { | 92 | bool IsValid(PAddr address) { |
| 93 | return address < (1ULL << 39); | 93 | return address < (1ULL << 39); |
| 94 | } | 94 | } |
| 95 | 95 | ||
| @@ -103,7 +103,7 @@ private: | |||
| 103 | return mask; | 103 | return mask; |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | TransformAddress BuildTransform(VAddr address, size_t size) { | 106 | TransformAddress BuildTransform(PAddr address, size_t size) { |
| 107 | const size_t minor_address = address & page_mask; | 107 | const size_t minor_address = address & page_mask; |
| 108 | const size_t minor_bit = minor_address >> align_bits; | 108 | const size_t minor_bit = minor_address >> align_bits; |
| 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h new file mode 100644 index 000000000..0b349cc17 --- /dev/null +++ b/src/core/guest_memory.h | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <iterator> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/scratch_buffer.h" | ||
| 13 | #include "core/memory.h" | ||
| 14 | |||
| 15 | namespace Core::Memory { | ||
| 16 | |||
| 17 | enum GuestMemoryFlags : u32 { | ||
| 18 | Read = 1 << 0, | ||
| 19 | Write = 1 << 1, | ||
| 20 | Safe = 1 << 2, | ||
| 21 | Cached = 1 << 3, | ||
| 22 | |||
| 23 | SafeRead = Read | Safe, | ||
| 24 | SafeWrite = Write | Safe, | ||
| 25 | SafeReadWrite = SafeRead | SafeWrite, | ||
| 26 | SafeReadCachedWrite = SafeReadWrite | Cached, | ||
| 27 | |||
| 28 | UnsafeRead = Read, | ||
| 29 | UnsafeWrite = Write, | ||
| 30 | UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||
| 31 | UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||
| 32 | }; | ||
| 33 | |||
| 34 | namespace { | ||
| 35 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 36 | class GuestMemory { | ||
| 37 | using iterator = T*; | ||
| 38 | using const_iterator = const T*; | ||
| 39 | using value_type = T; | ||
| 40 | using element_type = T; | ||
| 41 | using iterator_category = std::contiguous_iterator_tag; | ||
| 42 | |||
| 43 | public: | ||
| 44 | GuestMemory() = delete; | ||
| 45 | explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||
| 46 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 47 | : m_memory{memory}, m_addr{addr}, m_size{size} { | ||
| 48 | static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||
| 49 | if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||
| 50 | Read(addr, size, backup); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | ~GuestMemory() = default; | ||
| 55 | |||
| 56 | T* data() noexcept { | ||
| 57 | return m_data_span.data(); | ||
| 58 | } | ||
| 59 | |||
| 60 | const T* data() const noexcept { | ||
| 61 | return m_data_span.data(); | ||
| 62 | } | ||
| 63 | |||
| 64 | size_t size() const noexcept { | ||
| 65 | return m_size; | ||
| 66 | } | ||
| 67 | |||
| 68 | size_t size_bytes() const noexcept { | ||
| 69 | return this->size() * sizeof(T); | ||
| 70 | } | ||
| 71 | |||
| 72 | [[nodiscard]] T* begin() noexcept { | ||
| 73 | return this->data(); | ||
| 74 | } | ||
| 75 | |||
| 76 | [[nodiscard]] const T* begin() const noexcept { | ||
| 77 | return this->data(); | ||
| 78 | } | ||
| 79 | |||
| 80 | [[nodiscard]] T* end() noexcept { | ||
| 81 | return this->data() + this->size(); | ||
| 82 | } | ||
| 83 | |||
| 84 | [[nodiscard]] const T* end() const noexcept { | ||
| 85 | return this->data() + this->size(); | ||
| 86 | } | ||
| 87 | |||
| 88 | T& operator[](size_t index) noexcept { | ||
| 89 | return m_data_span[index]; | ||
| 90 | } | ||
| 91 | |||
| 92 | const T& operator[](size_t index) const noexcept { | ||
| 93 | return m_data_span[index]; | ||
| 94 | } | ||
| 95 | |||
| 96 | void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||
| 97 | m_addr = addr; | ||
| 98 | m_size = size; | ||
| 99 | m_addr_changed = true; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::span<T> Read(u64 addr, std::size_t size, | ||
| 103 | Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||
| 104 | m_addr = addr; | ||
| 105 | m_size = size; | ||
| 106 | if (m_size == 0) { | ||
| 107 | m_is_data_copy = true; | ||
| 108 | return {}; | ||
| 109 | } | ||
| 110 | |||
| 111 | if (this->TrySetSpan()) { | ||
| 112 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 113 | m_memory.FlushRegion(m_addr, this->size_bytes()); | ||
| 114 | } | ||
| 115 | } else { | ||
| 116 | if (backup) { | ||
| 117 | backup->resize_destructive(this->size()); | ||
| 118 | m_data_span = *backup; | ||
| 119 | } else { | ||
| 120 | m_data_copy.resize(this->size()); | ||
| 121 | m_data_span = std::span(m_data_copy); | ||
| 122 | } | ||
| 123 | m_is_data_copy = true; | ||
| 124 | m_span_valid = true; | ||
| 125 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 126 | m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||
| 127 | } else { | ||
| 128 | m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | return m_data_span; | ||
| 132 | } | ||
| 133 | |||
| 134 | void Write(std::span<T> write_data) noexcept { | ||
| 135 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 136 | m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||
| 137 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 138 | m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||
| 139 | } else { | ||
| 140 | m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | bool TrySetSpan() noexcept { | ||
| 145 | if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||
| 146 | m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||
| 147 | m_span_valid = true; | ||
| 148 | return true; | ||
| 149 | } | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | |||
| 153 | protected: | ||
| 154 | bool IsDataCopy() const noexcept { | ||
| 155 | return m_is_data_copy; | ||
| 156 | } | ||
| 157 | |||
| 158 | bool AddressChanged() const noexcept { | ||
| 159 | return m_addr_changed; | ||
| 160 | } | ||
| 161 | |||
| 162 | M& m_memory; | ||
| 163 | u64 m_addr{}; | ||
| 164 | size_t m_size{}; | ||
| 165 | std::span<T> m_data_span{}; | ||
| 166 | std::vector<T> m_data_copy{}; | ||
| 167 | bool m_span_valid{false}; | ||
| 168 | bool m_is_data_copy{false}; | ||
| 169 | bool m_addr_changed{false}; | ||
| 170 | }; | ||
| 171 | |||
| 172 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 173 | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||
| 174 | public: | ||
| 175 | GuestMemoryScoped() = delete; | ||
| 176 | explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||
| 177 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 178 | : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||
| 179 | if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||
| 180 | if (!this->TrySetSpan()) { | ||
| 181 | if (backup) { | ||
| 182 | this->m_data_span = *backup; | ||
| 183 | this->m_span_valid = true; | ||
| 184 | this->m_is_data_copy = true; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | ~GuestMemoryScoped() { | ||
| 191 | if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||
| 192 | if (this->size() == 0) [[unlikely]] { | ||
| 193 | return; | ||
| 194 | } | ||
| 195 | |||
| 196 | if (this->AddressChanged() || this->IsDataCopy()) { | ||
| 197 | ASSERT(this->m_span_valid); | ||
| 198 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 199 | this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||
| 200 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 201 | this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||
| 202 | } else { | ||
| 203 | this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||
| 204 | } | ||
| 205 | } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || (FLAGS & GuestMemoryFlags::Cached)) { | ||
| 206 | this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | } | ||
| 210 | }; | ||
| 211 | } // namespace | ||
| 212 | |||
| 213 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 214 | using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>; | ||
| 215 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 216 | using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>; | ||
| 217 | |||
| 218 | } // namespace Tegra::Memory | ||
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index 3f38ceb03..9f6274c7d 100644 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp | |||
| @@ -22,19 +22,7 @@ | |||
| 22 | #include "core/hle/service/hle_ipc.h" | 22 | #include "core/hle/service/hle_ipc.h" |
| 23 | #include "core/hle/service/ipc_helpers.h" | 23 | #include "core/hle/service/ipc_helpers.h" |
| 24 | #include "core/memory.h" | 24 | #include "core/memory.h" |
| 25 | 25 | #include "core/guest_memory.h" | |
| 26 | namespace { | ||
| 27 | static thread_local std::array read_buffer_data_a{ | ||
| 28 | Common::ScratchBuffer<u8>(), | ||
| 29 | Common::ScratchBuffer<u8>(), | ||
| 30 | Common::ScratchBuffer<u8>(), | ||
| 31 | }; | ||
| 32 | static thread_local std::array read_buffer_data_x{ | ||
| 33 | Common::ScratchBuffer<u8>(), | ||
| 34 | Common::ScratchBuffer<u8>(), | ||
| 35 | Common::ScratchBuffer<u8>(), | ||
| 36 | }; | ||
| 37 | } // Anonymous namespace | ||
| 38 | 26 | ||
| 39 | namespace Service { | 27 | namespace Service { |
| 40 | 28 | ||
| @@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons | |||
| 343 | } | 331 | } |
| 344 | 332 | ||
| 345 | std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { | 333 | std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { |
| 346 | static thread_local std::array read_buffer_a{ | 334 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 347 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 348 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 349 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 350 | }; | ||
| 351 | 335 | ||
| 352 | ASSERT_OR_EXECUTE_MSG( | 336 | ASSERT_OR_EXECUTE_MSG( |
| 353 | BufferDescriptorA().size() > buffer_index, { return {}; }, | 337 | BufferDescriptorA().size() > buffer_index, { return {}; }, |
| 354 | "BufferDescriptorA invalid buffer_index {}", buffer_index); | 338 | "BufferDescriptorA invalid buffer_index {}", buffer_index); |
| 355 | auto& read_buffer = read_buffer_a[buffer_index]; | 339 | return gm.Read(BufferDescriptorA()[buffer_index].Address(), |
| 356 | return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | 340 | BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); |
| 357 | BufferDescriptorA()[buffer_index].Size(), | ||
| 358 | &read_buffer_data_a[buffer_index]); | ||
| 359 | } | 341 | } |
| 360 | 342 | ||
| 361 | std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { | 343 | std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { |
| 362 | static thread_local std::array read_buffer_x{ | 344 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 363 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 364 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 365 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 366 | }; | ||
| 367 | 345 | ||
| 368 | ASSERT_OR_EXECUTE_MSG( | 346 | ASSERT_OR_EXECUTE_MSG( |
| 369 | BufferDescriptorX().size() > buffer_index, { return {}; }, | 347 | BufferDescriptorX().size() > buffer_index, { return {}; }, |
| 370 | "BufferDescriptorX invalid buffer_index {}", buffer_index); | 348 | "BufferDescriptorX invalid buffer_index {}", buffer_index); |
| 371 | auto& read_buffer = read_buffer_x[buffer_index]; | 349 | return gm.Read(BufferDescriptorX()[buffer_index].Address(), |
| 372 | return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | 350 | BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); |
| 373 | BufferDescriptorX()[buffer_index].Size(), | ||
| 374 | &read_buffer_data_x[buffer_index]); | ||
| 375 | } | 351 | } |
| 376 | 352 | ||
| 377 | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | 353 | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { |
| 378 | static thread_local std::array read_buffer_a{ | 354 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); |
| 379 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 380 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 381 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 382 | }; | ||
| 383 | static thread_local std::array read_buffer_x{ | ||
| 384 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 385 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 386 | Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||
| 387 | }; | ||
| 388 | 355 | ||
| 389 | const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && | 356 | const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && |
| 390 | BufferDescriptorA()[buffer_index].Size()}; | 357 | BufferDescriptorA()[buffer_index].Size()}; |
| @@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons | |||
| 401 | ASSERT_OR_EXECUTE_MSG( | 368 | ASSERT_OR_EXECUTE_MSG( |
| 402 | BufferDescriptorA().size() > buffer_index, { return {}; }, | 369 | BufferDescriptorA().size() > buffer_index, { return {}; }, |
| 403 | "BufferDescriptorA invalid buffer_index {}", buffer_index); | 370 | "BufferDescriptorA invalid buffer_index {}", buffer_index); |
| 404 | auto& read_buffer = read_buffer_a[buffer_index]; | 371 | return gm.Read(BufferDescriptorA()[buffer_index].Address(), |
| 405 | return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | 372 | BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); |
| 406 | BufferDescriptorA()[buffer_index].Size(), | ||
| 407 | &read_buffer_data_a[buffer_index]); | ||
| 408 | } else { | 373 | } else { |
| 409 | ASSERT_OR_EXECUTE_MSG( | 374 | ASSERT_OR_EXECUTE_MSG( |
| 410 | BufferDescriptorX().size() > buffer_index, { return {}; }, | 375 | BufferDescriptorX().size() > buffer_index, { return {}; }, |
| 411 | "BufferDescriptorX invalid buffer_index {}", buffer_index); | 376 | "BufferDescriptorX invalid buffer_index {}", buffer_index); |
| 412 | auto& read_buffer = read_buffer_x[buffer_index]; | 377 | return gm.Read(BufferDescriptorX()[buffer_index].Address(), |
| 413 | return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | 378 | BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); |
| 414 | BufferDescriptorX()[buffer_index].Size(), | ||
| 415 | &read_buffer_data_x[buffer_index]); | ||
| 416 | } | 379 | } |
| 417 | } | 380 | } |
| 418 | 381 | ||
diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h index d550a11b7..8329d7265 100644 --- a/src/core/hle/service/hle_ipc.h +++ b/src/core/hle/service/hle_ipc.h | |||
| @@ -19,8 +19,6 @@ | |||
| 19 | #include "core/hle/ipc.h" | 19 | #include "core/hle/ipc.h" |
| 20 | #include "core/hle/kernel/k_handle_table.h" | 20 | #include "core/hle/kernel/k_handle_table.h" |
| 21 | #include "core/hle/kernel/svc_common.h" | 21 | #include "core/hle/kernel/svc_common.h" |
| 22 | #include "core/hle/kernel/k_auto_object.h" | ||
| 23 | #include "core/hle/kernel/k_handle_table.h" | ||
| 24 | 22 | ||
| 25 | union Result; | 23 | union Result; |
| 26 | 24 | ||
| @@ -377,10 +375,6 @@ public: | |||
| 377 | return nullptr; | 375 | return nullptr; |
| 378 | } | 376 | } |
| 379 | 377 | ||
| 380 | Kernel::KScopedAutoObject<Kernel::KAutoObject> GetObjectFromHandle(u32 handle) { | ||
| 381 | return GetClientHandleTable().GetObjectForIpc(handle, thread); | ||
| 382 | } | ||
| 383 | |||
| 384 | [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { | 378 | [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { |
| 385 | return manager.lock(); | 379 | return manager.lock(); |
| 386 | } | 380 | } |
| @@ -432,6 +426,9 @@ private: | |||
| 432 | 426 | ||
| 433 | Kernel::KernelCore& kernel; | 427 | Kernel::KernelCore& kernel; |
| 434 | Core::Memory::Memory& memory; | 428 | Core::Memory::Memory& memory; |
| 429 | |||
| 430 | mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{}; | ||
| 431 | mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{}; | ||
| 435 | }; | 432 | }; |
| 436 | 433 | ||
| 437 | } // namespace Service | 434 | } // namespace Service |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp index fd6c9aa0c..7879c6f04 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.cpp +++ b/src/core/hle/service/nvdrv/core/nvmap.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors | 2 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors |
| 3 | // SPDX-License-Identifier: GPL-3.0-or-later | 3 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 4 | 4 | ||
| 5 | #include <functional> | ||
| 6 | |||
| 5 | #include "common/alignment.h" | 7 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| @@ -18,6 +20,7 @@ NvMap::Handle::Handle(u64 size_, Id id_) | |||
| 18 | } | 20 | } |
| 19 | 21 | ||
| 20 | NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { | 22 | NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { |
| 23 | std::scoped_lock lock(mutex); | ||
| 21 | // Handles cannot be allocated twice | 24 | // Handles cannot be allocated twice |
| 22 | if (allocated) { | 25 | if (allocated) { |
| 23 | return NvResult::AccessDenied; | 26 | return NvResult::AccessDenied; |
| @@ -78,11 +81,9 @@ void NvMap::UnmapHandle(Handle& handle_description) { | |||
| 78 | 81 | ||
| 79 | // Free and unmap the handle from the SMMU | 82 | // Free and unmap the handle from the SMMU |
| 80 | auto& smmu = host1x.MemoryManager(); | 83 | auto& smmu = host1x.MemoryManager(); |
| 81 | smmu.Unmap(static_cast<DAddr>(handle_description.pin_virt_address), | 84 | smmu.Unmap(handle_description.d_address, handle_description.aligned_size); |
| 82 | handle_description.aligned_size); | 85 | smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size)); |
| 83 | smmu.Free(handle_description.pin_virt_address, | 86 | handle_description.d_address = 0; |
| 84 | static_cast<size_t>(handle_description.aligned_size)); | ||
| 85 | handle_description.pin_virt_address = 0; | ||
| 86 | } | 87 | } |
| 87 | 88 | ||
| 88 | bool NvMap::TryRemoveHandle(const Handle& handle_description) { | 89 | bool NvMap::TryRemoveHandle(const Handle& handle_description) { |
| @@ -123,41 +124,16 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) { | |||
| 123 | } | 124 | } |
| 124 | } | 125 | } |
| 125 | 126 | ||
| 126 | VAddr NvMap::GetHandleAddress(Handle::Id handle) { | 127 | DAddr NvMap::GetHandleAddress(Handle::Id handle) { |
| 127 | std::scoped_lock lock(handles_lock); | 128 | std::scoped_lock lock(handles_lock); |
| 128 | try { | 129 | try { |
| 129 | return handles.at(handle)->address; | 130 | return handles.at(handle)->d_address; |
| 130 | } catch (std::out_of_range&) { | 131 | } catch (std::out_of_range&) { |
| 131 | return 0; | 132 | return 0; |
| 132 | } | 133 | } |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | NvResult NvMap::AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id) { | 136 | DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_area_pin) { |
| 136 | auto handle_description{GetHandle(handle)}; | ||
| 137 | if (!handle_description) [[unlikely]] { | ||
| 138 | return NvResult::BadParameter; | ||
| 139 | } | ||
| 140 | |||
| 141 | if (handle_description->allocated) [[unlikely]] { | ||
| 142 | return NvResult::InsufficientMemory; | ||
| 143 | } | ||
| 144 | |||
| 145 | std::scoped_lock lock(handle_description->mutex); | ||
| 146 | NvResult result = handle_description->Alloc(pFlags, pAlign, pKind, pAddress); | ||
| 147 | if (result != NvResult::Success) { | ||
| 148 | return result; | ||
| 149 | } | ||
| 150 | auto& smmu = host1x.MemoryManager(); | ||
| 151 | size_t total_size = static_cast<size_t>(handle_description->aligned_size); | ||
| 152 | handle_description->d_address = smmu.Allocate(total_size); | ||
| 153 | if (handle_description->d_address == 0) { | ||
| 154 | return NvResult::InsufficientMemory; | ||
| 155 | } | ||
| 156 | smmu.Map(handle_description->d_address, handle_description->address, total_size, session_id); | ||
| 157 | return NvResult::Success; | ||
| 158 | } | ||
| 159 | |||
| 160 | u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { | ||
| 161 | auto handle_description{GetHandle(handle)}; | 137 | auto handle_description{GetHandle(handle)}; |
| 162 | if (!handle_description) [[unlikely]] { | 138 | if (!handle_description) [[unlikely]] { |
| 163 | return 0; | 139 | return 0; |
| @@ -176,35 +152,38 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { | |||
| 176 | handle_description->unmap_queue_entry.reset(); | 152 | handle_description->unmap_queue_entry.reset(); |
| 177 | 153 | ||
| 178 | handle_description->pins++; | 154 | handle_description->pins++; |
| 179 | return handle_description->pin_virt_address; | 155 | return handle_description->d_address; |
| 180 | } | 156 | } |
| 181 | } | 157 | } |
| 182 | 158 | ||
| 159 | using namespace std::placeholders; | ||
| 183 | // If not then allocate some space and map it | 160 | // If not then allocate some space and map it |
| 184 | DAddr address{}; | 161 | DAddr address{}; |
| 185 | auto& smmu = host1x.MemoryManager(); | 162 | auto& smmu = host1x.MemoryManager(); |
| 186 | while ((address = smmu.AllocatePinned( | 163 | auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); |
| 187 | static_cast<size_t>(handle_description->aligned_size))) == 0) { | 164 | //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); |
| 165 | while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { | ||
| 188 | // Free handles until the allocation succeeds | 166 | // Free handles until the allocation succeeds |
| 189 | std::scoped_lock queueLock(unmap_queue_lock); | 167 | std::scoped_lock queueLock(unmap_queue_lock); |
| 190 | if (auto freeHandleDesc{unmap_queue.front()}) { | 168 | if (auto freeHandleDesc{unmap_queue.front()}) { |
| 191 | // Handles in the unmap queue are guaranteed not to be pinned so don't bother | 169 | // Handles in the unmap queue are guaranteed not to be pinned so don't bother |
| 192 | // checking if they are before unmapping | 170 | // checking if they are before unmapping |
| 193 | std::scoped_lock freeLock(freeHandleDesc->mutex); | 171 | std::scoped_lock freeLock(freeHandleDesc->mutex); |
| 194 | if (handle_description->pin_virt_address) | 172 | if (handle_description->d_address) |
| 195 | UnmapHandle(*freeHandleDesc); | 173 | UnmapHandle(*freeHandleDesc); |
| 196 | } else { | 174 | } else { |
| 197 | LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | 175 | LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); |
| 198 | } | 176 | } |
| 199 | } | 177 | } |
| 200 | 178 | ||
| 179 | handle_description->d_address = address; | ||
| 180 | |||
| 201 | smmu.Map(address, handle_description->address, handle_description->aligned_size, | 181 | smmu.Map(address, handle_description->address, handle_description->aligned_size, |
| 202 | session_id); | 182 | session_id); |
| 203 | handle_description->pin_virt_address = static_cast<u32>(address); | ||
| 204 | } | 183 | } |
| 205 | 184 | ||
| 206 | handle_description->pins++; | 185 | handle_description->pins++; |
| 207 | return handle_description->pin_virt_address; | 186 | return handle_description->d_address; |
| 208 | } | 187 | } |
| 209 | 188 | ||
| 210 | void NvMap::UnpinHandle(Handle::Id handle) { | 189 | void NvMap::UnpinHandle(Handle::Id handle) { |
| @@ -255,15 +234,10 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna | |||
| 255 | LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); | 234 | LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); |
| 256 | } else if (handle_description->dupes == 0) { | 235 | } else if (handle_description->dupes == 0) { |
| 257 | // Force unmap the handle | 236 | // Force unmap the handle |
| 258 | if (handle_description->pin_virt_address) { | 237 | if (handle_description->d_address) { |
| 259 | std::scoped_lock queueLock(unmap_queue_lock); | 238 | std::scoped_lock queueLock(unmap_queue_lock); |
| 260 | UnmapHandle(*handle_description); | 239 | UnmapHandle(*handle_description); |
| 261 | } | 240 | } |
| 262 | if (handle_description->allocated) { | ||
| 263 | auto& smmu = host1x.MemoryManager(); | ||
| 264 | smmu.Free(handle_description->d_address, handle_description->aligned_size); | ||
| 265 | smmu.Unmap(handle_description->d_address, handle_description->aligned_size); | ||
| 266 | } | ||
| 267 | 241 | ||
| 268 | handle_description->pins = 0; | 242 | handle_description->pins = 0; |
| 269 | } | 243 | } |
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h index 7c3110d91..e9e9e8b5b 100644 --- a/src/core/hle/service/nvdrv/core/nvmap.h +++ b/src/core/hle/service/nvdrv/core/nvmap.h | |||
| @@ -48,7 +48,7 @@ public: | |||
| 48 | using Id = u32; | 48 | using Id = u32; |
| 49 | Id id; //!< A globally unique identifier for this handle | 49 | Id id; //!< A globally unique identifier for this handle |
| 50 | 50 | ||
| 51 | s32 pins{}; | 51 | s64 pins{}; |
| 52 | u32 pin_virt_address{}; | 52 | u32 pin_virt_address{}; |
| 53 | std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; | 53 | std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; |
| 54 | 54 | ||
| @@ -63,15 +63,14 @@ public: | |||
| 63 | 63 | ||
| 64 | VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, | 64 | VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, |
| 65 | //!< this can also be in the nvdrv tmem | 65 | //!< this can also be in the nvdrv tmem |
| 66 | DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to, | ||
| 67 | //!< this can also be in the nvdrv tmem | ||
| 68 | bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC | 66 | bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC |
| 69 | //!< call | 67 | //!< call |
| 70 | 68 | ||
| 71 | u8 kind{}; //!< Used for memory compression | 69 | u8 kind{}; //!< Used for memory compression |
| 72 | bool allocated{}; //!< If the handle has been allocated with `Alloc` | 70 | bool allocated{}; //!< If the handle has been allocated with `Alloc` |
| 73 | 71 | ||
| 74 | u64 dma_map_addr{}; //! remove me after implementing pinning. | 72 | DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to, |
| 73 | //!< this can also be in the nvdrv tmem | ||
| 75 | 74 | ||
| 76 | Handle(u64 size, Id id); | 75 | Handle(u64 size, Id id); |
| 77 | 76 | ||
| @@ -119,15 +118,7 @@ public: | |||
| 119 | 118 | ||
| 120 | std::shared_ptr<Handle> GetHandle(Handle::Id handle); | 119 | std::shared_ptr<Handle> GetHandle(Handle::Id handle); |
| 121 | 120 | ||
| 122 | VAddr GetHandleAddress(Handle::Id handle); | 121 | DAddr GetHandleAddress(Handle::Id handle); |
| 123 | |||
| 124 | /** | ||
| 125 | * @brief Maps a handle into the SMMU address space | ||
| 126 | * @note This operation is refcounted, the number of calls to this must eventually match the | ||
| 127 | * number of calls to `UnpinHandle` | ||
| 128 | * @return The SMMU virtual address that the handle has been mapped to | ||
| 129 | */ | ||
| 130 | u32 PinHandle(Handle::Id handle, size_t session_id); | ||
| 131 | 122 | ||
| 132 | /** | 123 | /** |
| 133 | * @brief Maps a handle into the SMMU address space | 124 | * @brief Maps a handle into the SMMU address space |
| @@ -135,7 +126,7 @@ public: | |||
| 135 | * number of calls to `UnpinHandle` | 126 | * number of calls to `UnpinHandle` |
| 136 | * @return The SMMU virtual address that the handle has been mapped to | 127 | * @return The SMMU virtual address that the handle has been mapped to |
| 137 | */ | 128 | */ |
| 138 | NvResult AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id); | 129 | DAddr PinHandle(Handle::Id handle, size_t session_id, bool low_area_pin); |
| 139 | 130 | ||
| 140 | /** | 131 | /** |
| 141 | * @brief When this has been called an equal number of times to `PinHandle` for the supplied | 132 | * @brief When this has been called an equal number of times to `PinHandle` for the supplied |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 0ff41c6b2..f1404b9da 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -42,7 +42,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
| 42 | u32 height, u32 stride, android::BufferTransformFlags transform, | 42 | u32 height, u32 stride, android::BufferTransformFlags transform, |
| 43 | const Common::Rectangle<int>& crop_rect, | 43 | const Common::Rectangle<int>& crop_rect, |
| 44 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | 44 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { |
| 45 | const VAddr addr = nvmap.GetHandleAddress(buffer_handle); | 45 | const DAddr addr = nvmap.GetHandleAddress(buffer_handle); |
| 46 | LOG_TRACE(Service, | 46 | LOG_TRACE(Service, |
| 47 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | 47 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", |
| 48 | addr, offset, width, height, stride, format); | 48 | addr, offset, width, height, stride, format); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index c92a7b2f6..8bc10eac2 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -40,15 +40,15 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i | |||
| 40 | case 0x3: | 40 | case 0x3: |
| 41 | return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); | 41 | return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); |
| 42 | case 0x5: | 42 | case 0x5: |
| 43 | return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); | 43 | return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output, fd); |
| 44 | case 0x6: | 44 | case 0x6: |
| 45 | return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); | 45 | return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output, fd); |
| 46 | case 0x8: | 46 | case 0x8: |
| 47 | return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); | 47 | return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); |
| 48 | case 0x9: | 48 | case 0x9: |
| 49 | return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); | 49 | return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); |
| 50 | case 0x14: | 50 | case 0x14: |
| 51 | return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); | 51 | return WrapVariable(this, &nvhost_as_gpu::Remap, input, output, fd); |
| 52 | default: | 52 | default: |
| 53 | break; | 53 | break; |
| 54 | } | 54 | } |
| @@ -86,8 +86,15 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i | |||
| 86 | return NvResult::NotImplemented; | 86 | return NvResult::NotImplemented; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {} | 89 | void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) { |
| 90 | void nvhost_as_gpu::OnClose(DeviceFD fd) {} | 90 | sessions[fd] = session_id; |
| 91 | } | ||
| 92 | void nvhost_as_gpu::OnClose(DeviceFD fd) { | ||
| 93 | auto it = sessions.find(fd); | ||
| 94 | if (it != sessions.end()) { | ||
| 95 | sessions.erase(it); | ||
| 96 | } | ||
| 97 | } | ||
| 91 | 98 | ||
| 92 | NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { | 99 | NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { |
| 93 | LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); | 100 | LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); |
| @@ -206,6 +213,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | |||
| 206 | static_cast<u32>(aligned_size >> page_size_bits)); | 213 | static_cast<u32>(aligned_size >> page_size_bits)); |
| 207 | } | 214 | } |
| 208 | 215 | ||
| 216 | nvmap.UnpinHandle(mapping->handle); | ||
| 217 | |||
| 209 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | 218 | // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |
| 210 | // Only FreeSpace can unmap them fully | 219 | // Only FreeSpace can unmap them fully |
| 211 | if (mapping->sparse_alloc) { | 220 | if (mapping->sparse_alloc) { |
| @@ -259,7 +268,7 @@ NvResult nvhost_as_gpu::FreeSpace(IoctlFreeSpace& params) { | |||
| 259 | return NvResult::Success; | 268 | return NvResult::Success; |
| 260 | } | 269 | } |
| 261 | 270 | ||
| 262 | NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | 271 | NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries, DeviceFD fd) { |
| 263 | LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); | 272 | LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); |
| 264 | 273 | ||
| 265 | if (!vm.initialised) { | 274 | if (!vm.initialised) { |
| @@ -293,19 +302,19 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | |||
| 293 | return NvResult::BadValue; | 302 | return NvResult::BadValue; |
| 294 | } | 303 | } |
| 295 | 304 | ||
| 296 | VAddr cpu_address{static_cast<VAddr>( | 305 | DAddr base = nvmap.PinHandle(entry.handle, sessions[fd], false); |
| 297 | handle->address + | 306 | DAddr device_address{static_cast<DAddr>( |
| 298 | (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | 307 | base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |
| 299 | 308 | ||
| 300 | gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), | 309 | gmmu->Map(virtual_address, device_address, size, |
| 301 | use_big_pages); | 310 | static_cast<Tegra::PTEKind>(entry.kind), use_big_pages); |
| 302 | } | 311 | } |
| 303 | } | 312 | } |
| 304 | 313 | ||
| 305 | return NvResult::Success; | 314 | return NvResult::Success; |
| 306 | } | 315 | } |
| 307 | 316 | ||
| 308 | NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | 317 | NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd) { |
| 309 | LOG_DEBUG(Service_NVDRV, | 318 | LOG_DEBUG(Service_NVDRV, |
| 310 | "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" | 319 | "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" |
| 311 | ", offset={}", | 320 | ", offset={}", |
| @@ -331,9 +340,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 331 | } | 340 | } |
| 332 | 341 | ||
| 333 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | 342 | u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |
| 334 | VAddr cpu_address{mapping->ptr + params.buffer_offset}; | 343 | VAddr device_address{mapping->ptr + params.buffer_offset}; |
| 335 | 344 | ||
| 336 | gmmu->Map(gpu_address, cpu_address, params.mapping_size, | 345 | gmmu->Map(gpu_address, device_address, params.mapping_size, |
| 337 | static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); | 346 | static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); |
| 338 | 347 | ||
| 339 | return NvResult::Success; | 348 | return NvResult::Success; |
| @@ -349,7 +358,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 349 | return NvResult::BadValue; | 358 | return NvResult::BadValue; |
| 350 | } | 359 | } |
| 351 | 360 | ||
| 352 | VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; | 361 | DAddr device_address{static_cast<DAddr>(nvmap.PinHandle(params.handle, sessions[fd], false) + |
| 362 | params.buffer_offset)}; | ||
| 353 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | 363 | u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |
| 354 | 364 | ||
| 355 | bool big_page{[&]() { | 365 | bool big_page{[&]() { |
| @@ -373,15 +383,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 373 | } | 383 | } |
| 374 | 384 | ||
| 375 | const bool use_big_pages = alloc->second.big_pages && big_page; | 385 | const bool use_big_pages = alloc->second.big_pages && big_page; |
| 376 | gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), | 386 | gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind), |
| 377 | use_big_pages); | 387 | use_big_pages); |
| 378 | 388 | ||
| 379 | auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, | 389 | auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, |
| 380 | use_big_pages, alloc->second.sparse)}; | 390 | true, use_big_pages, alloc->second.sparse)}; |
| 381 | alloc->second.mappings.push_back(mapping); | 391 | alloc->second.mappings.push_back(mapping); |
| 382 | mapping_map[params.offset] = mapping; | 392 | mapping_map[params.offset] = mapping; |
| 383 | } else { | 393 | } else { |
| 384 | |||
| 385 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | 394 | auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; |
| 386 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | 395 | u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; |
| 387 | u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | 396 | u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; |
| @@ -394,18 +403,18 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
| 394 | return NvResult::InsufficientMemory; | 403 | return NvResult::InsufficientMemory; |
| 395 | } | 404 | } |
| 396 | 405 | ||
| 397 | gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), | 406 | gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), |
| 398 | static_cast<Tegra::PTEKind>(params.kind), big_page); | 407 | static_cast<Tegra::PTEKind>(params.kind), big_page); |
| 399 | 408 | ||
| 400 | auto mapping{ | 409 | auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, |
| 401 | std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | 410 | false, big_page, false)}; |
| 402 | mapping_map[params.offset] = mapping; | 411 | mapping_map[params.offset] = mapping; |
| 403 | } | 412 | } |
| 404 | 413 | ||
| 405 | return NvResult::Success; | 414 | return NvResult::Success; |
| 406 | } | 415 | } |
| 407 | 416 | ||
| 408 | NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | 417 | NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd) { |
| 409 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | 418 | LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); |
| 410 | 419 | ||
| 411 | std::scoped_lock lock(mutex); | 420 | std::scoped_lock lock(mutex); |
| @@ -433,6 +442,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | |||
| 433 | gmmu->Unmap(params.offset, mapping->size); | 442 | gmmu->Unmap(params.offset, mapping->size); |
| 434 | } | 443 | } |
| 435 | 444 | ||
| 445 | nvmap.UnpinHandle(mapping->handle); | ||
| 446 | |||
| 436 | mapping_map.erase(params.offset); | 447 | mapping_map.erase(params.offset); |
| 437 | } catch (const std::out_of_range&) { | 448 | } catch (const std::out_of_range&) { |
| 438 | LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); | 449 | LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 0dd279f88..4b28f5078 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h | |||
| @@ -141,9 +141,9 @@ private: | |||
| 141 | 141 | ||
| 142 | NvResult AllocAsEx(IoctlAllocAsEx& params); | 142 | NvResult AllocAsEx(IoctlAllocAsEx& params); |
| 143 | NvResult AllocateSpace(IoctlAllocSpace& params); | 143 | NvResult AllocateSpace(IoctlAllocSpace& params); |
| 144 | NvResult Remap(std::span<IoctlRemapEntry> params); | 144 | NvResult Remap(std::span<IoctlRemapEntry> params, DeviceFD fd); |
| 145 | NvResult MapBufferEx(IoctlMapBufferEx& params); | 145 | NvResult MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd); |
| 146 | NvResult UnmapBuffer(IoctlUnmapBuffer& params); | 146 | NvResult UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd); |
| 147 | NvResult FreeSpace(IoctlFreeSpace& params); | 147 | NvResult FreeSpace(IoctlFreeSpace& params); |
| 148 | NvResult BindChannel(IoctlBindChannel& params); | 148 | NvResult BindChannel(IoctlBindChannel& params); |
| 149 | 149 | ||
| @@ -159,16 +159,18 @@ private: | |||
| 159 | NvCore::NvMap& nvmap; | 159 | NvCore::NvMap& nvmap; |
| 160 | 160 | ||
| 161 | struct Mapping { | 161 | struct Mapping { |
| 162 | VAddr ptr; | 162 | NvCore::NvMap::Handle::Id handle; |
| 163 | DAddr ptr; | ||
| 163 | u64 offset; | 164 | u64 offset; |
| 164 | u64 size; | 165 | u64 size; |
| 165 | bool fixed; | 166 | bool fixed; |
| 166 | bool big_page; // Only valid if fixed == false | 167 | bool big_page; // Only valid if fixed == false |
| 167 | bool sparse_alloc; | 168 | bool sparse_alloc; |
| 168 | 169 | ||
| 169 | Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) | 170 | Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, |
| 170 | : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), | 171 | bool big_page_, bool sparse_alloc_) |
| 171 | sparse_alloc(sparse_alloc_) {} | 172 | : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), |
| 173 | big_page(big_page_), sparse_alloc(sparse_alloc_) {} | ||
| 172 | }; | 174 | }; |
| 173 | 175 | ||
| 174 | struct Allocation { | 176 | struct Allocation { |
| @@ -212,9 +214,7 @@ private: | |||
| 212 | bool initialised{}; | 214 | bool initialised{}; |
| 213 | } vm; | 215 | } vm; |
| 214 | std::shared_ptr<Tegra::MemoryManager> gmmu; | 216 | std::shared_ptr<Tegra::MemoryManager> gmmu; |
| 215 | 217 | std::unordered_map<DeviceFD, size_t> sessions; | |
| 216 | // s32 channel{}; | ||
| 217 | // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; | ||
| 218 | }; | 218 | }; |
| 219 | 219 | ||
| 220 | } // namespace Service::Nvidia::Devices | 220 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 9ab0ae4d8..78bc5f3c4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -95,6 +95,9 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
| 95 | offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); | 95 | offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); |
| 96 | 96 | ||
| 97 | auto& gpu = system.GPU(); | 97 | auto& gpu = system.GPU(); |
| 98 | //auto& device_memory = system.Host1x().MemoryManager(); | ||
| 99 | auto* session = core.GetSession(sessions[fd]); | ||
| 100 | |||
| 98 | if (gpu.UseNvdec()) { | 101 | if (gpu.UseNvdec()) { |
| 99 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | 102 | for (std::size_t i = 0; i < syncpt_increments.size(); i++) { |
| 100 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; | 103 | const SyncptIncr& syncpt_incr = syncpt_increments[i]; |
| @@ -106,7 +109,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
| 106 | const auto object = nvmap.GetHandle(cmd_buffer.memory_id); | 109 | const auto object = nvmap.GetHandle(cmd_buffer.memory_id); |
| 107 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | 110 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |
| 108 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | 111 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); |
| 109 | system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | 112 | session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), |
| 110 | cmdlist.size() * sizeof(u32)); | 113 | cmdlist.size() * sizeof(u32)); |
| 111 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); | 114 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); |
| 112 | } | 115 | } |
| @@ -136,7 +139,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { | |||
| 136 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { | 139 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { |
| 137 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); | 140 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); |
| 138 | for (size_t i = 0; i < num_entries; i++) { | 141 | for (size_t i = 0; i < num_entries; i++) { |
| 139 | entries[i].map_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd]); | 142 | DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); |
| 143 | entries[i].map_address = static_cast<u32>(pin_address); | ||
| 140 | } | 144 | } |
| 141 | 145 | ||
| 142 | return NvResult::Success; | 146 | return NvResult::Success; |
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 2b107f009..7765ca1be 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp | |||
| @@ -123,8 +123,8 @@ NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { | |||
| 123 | return NvResult::InsufficientMemory; | 123 | return NvResult::InsufficientMemory; |
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | const auto result = file.AllocateHandle(params.handle, params.flags, params.align, params.kind, | 126 | const auto result = |
| 127 | params.address, sessions[fd]); | 127 | handle_description->Alloc(params.flags, params.align, params.kind, params.address); |
| 128 | if (result != NvResult::Success) { | 128 | if (result != NvResult::Success) { |
| 129 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | 129 | LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); |
| 130 | return result; | 130 | return result; |
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index 492ad849a..6e4825313 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp | |||
| @@ -13,8 +13,6 @@ | |||
| 13 | #include "core/hle/service/nvdrv/nvdrv.h" | 13 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 14 | #include "core/hle/service/nvdrv/nvdrv_interface.h" | 14 | #include "core/hle/service/nvdrv/nvdrv_interface.h" |
| 15 | 15 | ||
| 16 | #pragma optimize("", off) | ||
| 17 | |||
| 18 | namespace Service::Nvidia { | 16 | namespace Service::Nvidia { |
| 19 | 17 | ||
| 20 | void NVDRV::Open(HLERequestContext& ctx) { | 18 | void NVDRV::Open(HLERequestContext& ctx) { |
| @@ -173,8 +171,8 @@ void NVDRV::Initialize(HLERequestContext& ctx) { | |||
| 173 | [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); | 171 | [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); |
| 174 | 172 | ||
| 175 | auto& container = nvdrv->GetContainer(); | 173 | auto& container = nvdrv->GetContainer(); |
| 176 | auto process = ctx.GetObjectFromHandle(process_handle); | 174 | auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle); |
| 177 | session_id = container.OpenSession(process->DynamicCast<Kernel::KProcess*>()); | 175 | session_id = container.OpenSession(process.GetPointerUnsafe()); |
| 178 | 176 | ||
| 179 | is_initialized = true; | 177 | is_initialized = true; |
| 180 | } | 178 | } |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8176a41be..609e775ae 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include "core/hle/kernel/k_process.h" | 24 | #include "core/hle/kernel/k_process.h" |
| 25 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 26 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 27 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 28 | #include "video_core/host1x/host1x.h" | ||
| 27 | #include "video_core/rasterizer_download_area.h" | 29 | #include "video_core/rasterizer_download_area.h" |
| 28 | 30 | ||
| 29 | namespace Core::Memory { | 31 | namespace Core::Memory { |
| @@ -638,15 +640,16 @@ struct Memory::Impl { | |||
| 638 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | 640 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); |
| 639 | 641 | ||
| 640 | // During boot, current_page_table might not be set yet, in which case we need not flush | 642 | // During boot, current_page_table might not be set yet, in which case we need not flush |
| 641 | if (system.IsPoweredOn()) { | 643 | /*if (system.IsPoweredOn()) { |
| 642 | auto& gpu = system.GPU(); | 644 | auto& gpu = system.GPU(); |
| 643 | for (u64 i = 0; i < size; i++) { | 645 | for (u64 i = 0; i < size; i++) { |
| 644 | const auto page = base + i; | 646 | const auto page = base + i; |
| 645 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | 647 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { |
| 648 | |||
| 646 | gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | 649 | gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); |
| 647 | } | 650 | } |
| 648 | } | 651 | } |
| 649 | } | 652 | }*/ |
| 650 | 653 | ||
| 651 | const auto end = base + size; | 654 | const auto end = base + size; |
| 652 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | 655 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| @@ -811,10 +814,15 @@ struct Memory::Impl { | |||
| 811 | return true; | 814 | return true; |
| 812 | } | 815 | } |
| 813 | 816 | ||
| 814 | void HandleRasterizerDownload(VAddr address, size_t size) { | 817 | void HandleRasterizerDownload(VAddr v_address, size_t size) { |
| 818 | const auto* p = GetPointerImpl( | ||
| 819 | v_address, []() {}, []() {}); | ||
| 820 | auto& gpu_device_memory = system.Host1x().MemoryManager(); | ||
| 821 | DAddr address = | ||
| 822 | gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); | ||
| 815 | const size_t core = system.GetCurrentHostThreadID(); | 823 | const size_t core = system.GetCurrentHostThreadID(); |
| 816 | auto& current_area = rasterizer_read_areas[core]; | 824 | auto& current_area = rasterizer_read_areas[core]; |
| 817 | const VAddr end_address = address + size; | 825 | const DAddr end_address = address + size; |
| 818 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 826 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 819 | [[likely]] { | 827 | [[likely]] { |
| 820 | return; | 828 | return; |
| @@ -822,7 +830,10 @@ struct Memory::Impl { | |||
| 822 | current_area = system.GPU().OnCPURead(address, size); | 830 | current_area = system.GPU().OnCPURead(address, size); |
| 823 | } | 831 | } |
| 824 | 832 | ||
| 825 | void HandleRasterizerWrite(VAddr address, size_t size) { | 833 | void HandleRasterizerWrite(VAddr v_address, size_t size) { |
| 834 | const auto* p = GetPointerImpl( | ||
| 835 | v_address, []() {}, []() {}); | ||
| 836 | PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p); | ||
| 826 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | 837 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 827 | const size_t core = std::min(system.GetCurrentHostThreadID(), | 838 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 828 | sys_core); // any other calls threads go to syscore. | 839 | sys_core); // any other calls threads go to syscore. |
| @@ -836,7 +847,7 @@ struct Memory::Impl { | |||
| 836 | } | 847 | } |
| 837 | }); | 848 | }); |
| 838 | auto& current_area = rasterizer_write_areas[core]; | 849 | auto& current_area = rasterizer_write_areas[core]; |
| 839 | VAddr subaddress = address >> YUZU_PAGEBITS; | 850 | PAddr subaddress = address >> YUZU_PAGEBITS; |
| 840 | bool do_collection = current_area.last_address == subaddress; | 851 | bool do_collection = current_area.last_address == subaddress; |
| 841 | if (!do_collection) [[unlikely]] { | 852 | if (!do_collection) [[unlikely]] { |
| 842 | do_collection = system.GPU().OnCPUWrite(address, size); | 853 | do_collection = system.GPU().OnCPUWrite(address, size); |
| @@ -849,7 +860,7 @@ struct Memory::Impl { | |||
| 849 | } | 860 | } |
| 850 | 861 | ||
| 851 | struct GPUDirtyState { | 862 | struct GPUDirtyState { |
| 852 | VAddr last_address; | 863 | PAddr last_address; |
| 853 | }; | 864 | }; |
| 854 | 865 | ||
| 855 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | 866 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { |
diff --git a/src/core/memory.h b/src/core/memory.h index dddfaf4a4..47ca6a35a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -498,209 +498,4 @@ private: | |||
| 498 | std::unique_ptr<Impl> impl; | 498 | std::unique_ptr<Impl> impl; |
| 499 | }; | 499 | }; |
| 500 | 500 | ||
| 501 | enum GuestMemoryFlags : u32 { | ||
| 502 | Read = 1 << 0, | ||
| 503 | Write = 1 << 1, | ||
| 504 | Safe = 1 << 2, | ||
| 505 | Cached = 1 << 3, | ||
| 506 | |||
| 507 | SafeRead = Read | Safe, | ||
| 508 | SafeWrite = Write | Safe, | ||
| 509 | SafeReadWrite = SafeRead | SafeWrite, | ||
| 510 | SafeReadCachedWrite = SafeReadWrite | Cached, | ||
| 511 | |||
| 512 | UnsafeRead = Read, | ||
| 513 | UnsafeWrite = Write, | ||
| 514 | UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||
| 515 | UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||
| 516 | }; | ||
| 517 | |||
| 518 | namespace { | ||
| 519 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 520 | class GuestMemory { | ||
| 521 | using iterator = T*; | ||
| 522 | using const_iterator = const T*; | ||
| 523 | using value_type = T; | ||
| 524 | using element_type = T; | ||
| 525 | using iterator_category = std::contiguous_iterator_tag; | ||
| 526 | |||
| 527 | public: | ||
| 528 | GuestMemory() = delete; | ||
| 529 | explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||
| 530 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 531 | : m_memory{memory}, m_addr{addr}, m_size{size} { | ||
| 532 | static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||
| 533 | if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||
| 534 | Read(addr, size, backup); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | ~GuestMemory() = default; | ||
| 539 | |||
| 540 | T* data() noexcept { | ||
| 541 | return m_data_span.data(); | ||
| 542 | } | ||
| 543 | |||
| 544 | const T* data() const noexcept { | ||
| 545 | return m_data_span.data(); | ||
| 546 | } | ||
| 547 | |||
| 548 | size_t size() const noexcept { | ||
| 549 | return m_size; | ||
| 550 | } | ||
| 551 | |||
| 552 | size_t size_bytes() const noexcept { | ||
| 553 | return this->size() * sizeof(T); | ||
| 554 | } | ||
| 555 | |||
| 556 | [[nodiscard]] T* begin() noexcept { | ||
| 557 | return this->data(); | ||
| 558 | } | ||
| 559 | |||
| 560 | [[nodiscard]] const T* begin() const noexcept { | ||
| 561 | return this->data(); | ||
| 562 | } | ||
| 563 | |||
| 564 | [[nodiscard]] T* end() noexcept { | ||
| 565 | return this->data() + this->size(); | ||
| 566 | } | ||
| 567 | |||
| 568 | [[nodiscard]] const T* end() const noexcept { | ||
| 569 | return this->data() + this->size(); | ||
| 570 | } | ||
| 571 | |||
| 572 | T& operator[](size_t index) noexcept { | ||
| 573 | return m_data_span[index]; | ||
| 574 | } | ||
| 575 | |||
| 576 | const T& operator[](size_t index) const noexcept { | ||
| 577 | return m_data_span[index]; | ||
| 578 | } | ||
| 579 | |||
| 580 | void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||
| 581 | m_addr = addr; | ||
| 582 | m_size = size; | ||
| 583 | m_addr_changed = true; | ||
| 584 | } | ||
| 585 | |||
| 586 | std::span<T> Read(u64 addr, std::size_t size, | ||
| 587 | Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||
| 588 | m_addr = addr; | ||
| 589 | m_size = size; | ||
| 590 | if (m_size == 0) { | ||
| 591 | m_is_data_copy = true; | ||
| 592 | return {}; | ||
| 593 | } | ||
| 594 | |||
| 595 | if (this->TrySetSpan()) { | ||
| 596 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 597 | m_memory.FlushRegion(m_addr, this->size_bytes()); | ||
| 598 | } | ||
| 599 | } else { | ||
| 600 | if (backup) { | ||
| 601 | backup->resize_destructive(this->size()); | ||
| 602 | m_data_span = *backup; | ||
| 603 | } else { | ||
| 604 | m_data_copy.resize(this->size()); | ||
| 605 | m_data_span = std::span(m_data_copy); | ||
| 606 | } | ||
| 607 | m_is_data_copy = true; | ||
| 608 | m_span_valid = true; | ||
| 609 | if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 610 | m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||
| 611 | } else { | ||
| 612 | m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||
| 613 | } | ||
| 614 | } | ||
| 615 | return m_data_span; | ||
| 616 | } | ||
| 617 | |||
| 618 | void Write(std::span<T> write_data) noexcept { | ||
| 619 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 620 | m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||
| 621 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 622 | m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||
| 623 | } else { | ||
| 624 | m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||
| 625 | } | ||
| 626 | } | ||
| 627 | |||
| 628 | bool TrySetSpan() noexcept { | ||
| 629 | if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||
| 630 | m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||
| 631 | m_span_valid = true; | ||
| 632 | return true; | ||
| 633 | } | ||
| 634 | return false; | ||
| 635 | } | ||
| 636 | |||
| 637 | protected: | ||
| 638 | bool IsDataCopy() const noexcept { | ||
| 639 | return m_is_data_copy; | ||
| 640 | } | ||
| 641 | |||
| 642 | bool AddressChanged() const noexcept { | ||
| 643 | return m_addr_changed; | ||
| 644 | } | ||
| 645 | |||
| 646 | M& m_memory; | ||
| 647 | u64 m_addr{}; | ||
| 648 | size_t m_size{}; | ||
| 649 | std::span<T> m_data_span{}; | ||
| 650 | std::vector<T> m_data_copy{}; | ||
| 651 | bool m_span_valid{false}; | ||
| 652 | bool m_is_data_copy{false}; | ||
| 653 | bool m_addr_changed{false}; | ||
| 654 | }; | ||
| 655 | |||
| 656 | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||
| 657 | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||
| 658 | public: | ||
| 659 | GuestMemoryScoped() = delete; | ||
| 660 | explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||
| 661 | Common::ScratchBuffer<T>* backup = nullptr) | ||
| 662 | : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||
| 663 | if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||
| 664 | if (!this->TrySetSpan()) { | ||
| 665 | if (backup) { | ||
| 666 | this->m_data_span = *backup; | ||
| 667 | this->m_span_valid = true; | ||
| 668 | this->m_is_data_copy = true; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 674 | ~GuestMemoryScoped() { | ||
| 675 | if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||
| 676 | if (this->size() == 0) [[unlikely]] { | ||
| 677 | return; | ||
| 678 | } | ||
| 679 | |||
| 680 | if (this->AddressChanged() || this->IsDataCopy()) { | ||
| 681 | ASSERT(this->m_span_valid); | ||
| 682 | if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||
| 683 | this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||
| 684 | } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||
| 685 | this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||
| 686 | } else { | ||
| 687 | this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||
| 688 | } | ||
| 689 | } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || | ||
| 690 | (FLAGS & GuestMemoryFlags::Cached)) { | ||
| 691 | this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||
| 692 | } | ||
| 693 | } | ||
| 694 | } | ||
| 695 | }; | ||
| 696 | } // namespace | ||
| 697 | |||
| 698 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 699 | using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; | ||
| 700 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 701 | using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; | ||
| 702 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 703 | using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||
| 704 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 705 | using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||
| 706 | } // namespace Core::Memory | 501 | } // namespace Core::Memory |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2dda8ebc2..5ed0ad0ed 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -95,6 +95,7 @@ add_library(video_core STATIC | |||
| 95 | gpu.h | 95 | gpu.h |
| 96 | gpu_thread.cpp | 96 | gpu_thread.cpp |
| 97 | gpu_thread.h | 97 | gpu_thread.h |
| 98 | guest_memory.h | ||
| 98 | invalidation_accumulator.h | 99 | invalidation_accumulator.h |
| 99 | memory_manager.cpp | 100 | memory_manager.cpp |
| 100 | memory_manager.h | 101 | memory_manager.h |
| @@ -107,8 +108,6 @@ add_library(video_core STATIC | |||
| 107 | query_cache/query_stream.h | 108 | query_cache/query_stream.h |
| 108 | query_cache/types.h | 109 | query_cache/types.h |
| 109 | query_cache.h | 110 | query_cache.h |
| 110 | rasterizer_accelerated.cpp | ||
| 111 | rasterizer_accelerated.h | ||
| 112 | rasterizer_interface.h | 111 | rasterizer_interface.h |
| 113 | renderer_base.cpp | 112 | renderer_base.cpp |
| 114 | renderer_base.h | 113 | renderer_base.h |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 0bb3bf8ae..40e98e395 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -33,13 +33,12 @@ struct NullBufferParams {}; | |||
| 33 | * | 33 | * |
| 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. | 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. |
| 35 | */ | 35 | */ |
| 36 | template <class RasterizerInterface> | ||
| 37 | class BufferBase { | 36 | class BufferBase { |
| 38 | public: | 37 | public: |
| 39 | static constexpr u64 BASE_PAGE_BITS = 16; | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 40 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 41 | 40 | ||
| 42 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | 41 | explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) |
| 43 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} |
| 44 | 43 | ||
| 45 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6d1fc3887..6fe2e8b93 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -8,16 +8,16 @@ | |||
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | 9 | ||
| 10 | #include "video_core/buffer_cache/buffer_cache_base.h" | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 12 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 11 | 13 | ||
| 12 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 13 | 15 | ||
| 14 | using Core::Memory::YUZU_PAGESIZE; | 16 | using Core::Memory::YUZU_PAGESIZE; |
| 15 | 17 | ||
| 16 | template <class P> | 18 | template <class P> |
| 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 19 | BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) |
| 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 20 | : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { |
| 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ | ||
| 20 | rasterizer} { | ||
| 21 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 23 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| @@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 29 | return; | 29 | return; |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | 32 | const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 33 | const s64 min_spacing_expected = device_memory - 1_GiB; | 33 | const s64 min_spacing_expected = device_local_memory - 1_GiB; |
| 34 | const s64 min_spacing_critical = device_memory - 512_MiB; | 34 | const s64 min_spacing_critical = device_local_memory - 512_MiB; |
| 35 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | 35 | const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); |
| 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 38 | minimum_memory = static_cast<u64>( | 38 | minimum_memory = static_cast<u64>( |
| 39 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | 39 | std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), |
| 40 | DEFAULT_EXPECTED_MEMORY)); | 40 | DEFAULT_EXPECTED_MEMORY)); |
| 41 | critical_memory = static_cast<u64>( | 41 | critical_memory = static_cast<u64>( |
| 42 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | 42 | std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), |
| 43 | DEFAULT_CRITICAL_MEMORY)); | 43 | DEFAULT_CRITICAL_MEMORY)); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| @@ -105,71 +105,72 @@ void BufferCache<P>::TickFrame() { | |||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | template <class P> | 107 | template <class P> |
| 108 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 108 | void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { |
| 109 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 109 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 110 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; | 110 | const IntervalType subtract_interval{device_addr, device_addr + size}; |
| 111 | ClearDownload(subtract_interval); | 111 | ClearDownload(subtract_interval); |
| 112 | common_ranges.subtract(subtract_interval); | 112 | common_ranges.subtract(subtract_interval); |
| 113 | } | 113 | } |
| 114 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); | 114 | memory_tracker.MarkRegionAsCpuModified(device_addr, size); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | template <class P> | 117 | template <class P> |
| 118 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 118 | void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) { |
| 119 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 119 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 120 | if (!is_dirty) { | 120 | if (!is_dirty) { |
| 121 | return; | 121 | return; |
| 122 | } | 122 | } |
| 123 | VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); | 123 | DAddr aligned_start = Common::AlignDown(device_addr, YUZU_PAGESIZE); |
| 124 | VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); | 124 | DAddr aligned_end = Common::AlignUp(device_addr + size, YUZU_PAGESIZE); |
| 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 126 | WriteMemory(cpu_addr, size); | 126 | WriteMemory(device_addr, size); |
| 127 | return; | 127 | return; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | tmp_buffer.resize_destructive(size); | 130 | tmp_buffer.resize_destructive(size); |
| 131 | cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); | 131 | device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size); |
| 132 | 132 | ||
| 133 | InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | 133 | InlineMemoryImplementation(device_addr, size, tmp_buffer); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | template <class P> | 136 | template <class P> |
| 137 | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | 137 | bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) { |
| 138 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 138 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 139 | if (!is_dirty) { | 139 | if (!is_dirty) { |
| 140 | return false; | 140 | return false; |
| 141 | } | 141 | } |
| 142 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 142 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 143 | return true; | 143 | return true; |
| 144 | } | 144 | } |
| 145 | WriteMemory(cpu_addr, size); | 145 | WriteMemory(device_addr, size); |
| 146 | return false; | 146 | return false; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | template <class P> | 149 | template <class P> |
| 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr, |
| 151 | u64 size) { | 151 | u64 size) { |
| 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| 153 | area.emplace(); | 153 | area.emplace(); |
| 154 | VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); | 154 | DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::Memory::YUZU_PAGESIZE); |
| 155 | VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 155 | DAddr device_addr_end_aligned = |
| 156 | area->start_address = cpu_addr_start_aligned; | 156 | Common::AlignUp(device_addr + size, Core::Memory::YUZU_PAGESIZE); |
| 157 | area->end_address = cpu_addr_end_aligned; | 157 | area->start_address = device_addr_start_aligned; |
| 158 | if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { | 158 | area->end_address = device_addr_end_aligned; |
| 159 | if (memory_tracker.IsRegionPreflushable(device_addr, size)) { | ||
| 159 | area->preemtive = true; | 160 | area->preemtive = true; |
| 160 | return area; | 161 | return area; |
| 161 | }; | 162 | }; |
| 162 | area->preemtive = | 163 | area->preemtive = !IsRegionGpuModified(device_addr_start_aligned, |
| 163 | !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); | 164 | device_addr_end_aligned - device_addr_start_aligned); |
| 164 | memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, | 165 | memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, |
| 165 | cpu_addr_end_aligned - cpu_addr_start_aligned); | 166 | device_addr_end_aligned - device_addr_start_aligned); |
| 166 | return area; | 167 | return area; |
| 167 | } | 168 | } |
| 168 | 169 | ||
| 169 | template <class P> | 170 | template <class P> |
| 170 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 171 | void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) { |
| 171 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 172 | ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) { |
| 172 | DownloadBufferMemory(buffer, cpu_addr, size); | 173 | DownloadBufferMemory(buffer, device_addr, size); |
| 173 | }); | 174 | }); |
| 174 | } | 175 | } |
| 175 | 176 | ||
| @@ -184,8 +185,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | |||
| 184 | 185 | ||
| 185 | template <class P> | 186 | template <class P> |
| 186 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 187 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 187 | const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); | 188 | const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); |
| 188 | const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); | 189 | const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); |
| 189 | if (!cpu_src_address || !cpu_dest_address) { | 190 | if (!cpu_src_address || !cpu_dest_address) { |
| 190 | return false; | 191 | return false; |
| 191 | } | 192 | } |
| @@ -216,10 +217,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 216 | }}; | 217 | }}; |
| 217 | 218 | ||
| 218 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 219 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 219 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 220 | auto mirror = [&](DAddr base_address, DAddr base_address_end) { |
| 220 | const u64 size = base_address_end - base_address; | 221 | const u64 size = base_address_end - base_address; |
| 221 | const VAddr diff = base_address - *cpu_src_address; | 222 | const DAddr diff = base_address - *cpu_src_address; |
| 222 | const VAddr new_base_address = *cpu_dest_address + diff; | 223 | const DAddr new_base_address = *cpu_dest_address + diff; |
| 223 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 224 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 224 | tmp_intervals.push_back(add_interval); | 225 | tmp_intervals.push_back(add_interval); |
| 225 | uncommitted_ranges.add(add_interval); | 226 | uncommitted_ranges.add(add_interval); |
| @@ -239,15 +240,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 240 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 240 | } | 241 | } |
| 241 | 242 | ||
| 242 | Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( | 243 | Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( |
| 243 | cpu_memory, *cpu_src_address, amount, &tmp_buffer); | 244 | device_memory, *cpu_src_address, amount, &tmp_buffer); |
| 244 | tmp.SetAddressAndSize(*cpu_dest_address, amount); | 245 | tmp.SetAddressAndSize(*cpu_dest_address, amount); |
| 245 | return true; | 246 | return true; |
| 246 | } | 247 | } |
| 247 | 248 | ||
| 248 | template <class P> | 249 | template <class P> |
| 249 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | 250 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { |
| 250 | const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); | 251 | const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); |
| 251 | if (!cpu_dst_address) { | 252 | if (!cpu_dst_address) { |
| 252 | return false; | 253 | return false; |
| 253 | } | 254 | } |
| @@ -273,23 +274,23 @@ template <class P> | |||
| 273 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 274 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 274 | ObtainBufferSynchronize sync_info, | 275 | ObtainBufferSynchronize sync_info, |
| 275 | ObtainBufferOperation post_op) { | 276 | ObtainBufferOperation post_op) { |
| 276 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 277 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 277 | if (!cpu_addr) { | 278 | if (!device_addr) { |
| 278 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | 279 | return {&slot_buffers[NULL_BUFFER_ID], 0}; |
| 279 | } | 280 | } |
| 280 | return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); | 281 | return ObtainCPUBuffer(*device_addr, size, sync_info, post_op); |
| 281 | } | 282 | } |
| 282 | 283 | ||
| 283 | template <class P> | 284 | template <class P> |
| 284 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | 285 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( |
| 285 | VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { | 286 | DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { |
| 286 | const BufferId buffer_id = FindBuffer(cpu_addr, size); | 287 | const BufferId buffer_id = FindBuffer(device_addr, size); |
| 287 | Buffer& buffer = slot_buffers[buffer_id]; | 288 | Buffer& buffer = slot_buffers[buffer_id]; |
| 288 | 289 | ||
| 289 | // synchronize op | 290 | // synchronize op |
| 290 | switch (sync_info) { | 291 | switch (sync_info) { |
| 291 | case ObtainBufferSynchronize::FullSynchronize: | 292 | case ObtainBufferSynchronize::FullSynchronize: |
| 292 | SynchronizeBuffer(buffer, cpu_addr, size); | 293 | SynchronizeBuffer(buffer, device_addr, size); |
| 293 | break; | 294 | break; |
| 294 | default: | 295 | default: |
| 295 | break; | 296 | break; |
| @@ -297,12 +298,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 297 | 298 | ||
| 298 | switch (post_op) { | 299 | switch (post_op) { |
| 299 | case ObtainBufferOperation::MarkAsWritten: | 300 | case ObtainBufferOperation::MarkAsWritten: |
| 300 | MarkWrittenBuffer(buffer_id, cpu_addr, size); | 301 | MarkWrittenBuffer(buffer_id, device_addr, size); |
| 301 | break; | 302 | break; |
| 302 | case ObtainBufferOperation::DiscardWrite: { | 303 | case ObtainBufferOperation::DiscardWrite: { |
| 303 | VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); | 304 | DAddr device_addr_start = Common::AlignDown(device_addr, 64); |
| 304 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); | 305 | DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); |
| 305 | IntervalType interval{cpu_addr_start, cpu_addr_end}; | 306 | IntervalType interval{device_addr_start, device_addr_end}; |
| 306 | ClearDownload(interval); | 307 | ClearDownload(interval); |
| 307 | common_ranges.subtract(interval); | 308 | common_ranges.subtract(interval); |
| 308 | break; | 309 | break; |
| @@ -311,15 +312,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 311 | break; | 312 | break; |
| 312 | } | 313 | } |
| 313 | 314 | ||
| 314 | return {&buffer, buffer.Offset(cpu_addr)}; | 315 | return {&buffer, buffer.Offset(device_addr)}; |
| 315 | } | 316 | } |
| 316 | 317 | ||
| 317 | template <class P> | 318 | template <class P> |
| 318 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 319 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 319 | u32 size) { | 320 | u32 size) { |
| 320 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 321 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 321 | const Binding binding{ | 322 | const Binding binding{ |
| 322 | .cpu_addr = *cpu_addr, | 323 | .device_addr = *device_addr, |
| 323 | .size = size, | 324 | .size = size, |
| 324 | .buffer_id = BufferId{}, | 325 | .buffer_id = BufferId{}, |
| 325 | }; | 326 | }; |
| @@ -555,16 +556,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 555 | for (const IntervalSet& intervals : committed_ranges) { | 556 | for (const IntervalSet& intervals : committed_ranges) { |
| 556 | for (auto& interval : intervals) { | 557 | for (auto& interval : intervals) { |
| 557 | const std::size_t size = interval.upper() - interval.lower(); | 558 | const std::size_t size = interval.upper() - interval.lower(); |
| 558 | const VAddr cpu_addr = interval.lower(); | 559 | const DAddr device_addr = interval.lower(); |
| 559 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 560 | ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 560 | const VAddr buffer_start = buffer.CpuAddr(); | 561 | const DAddr buffer_start = buffer.CpuAddr(); |
| 561 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); | 562 | const DAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 562 | const VAddr new_start = std::max(buffer_start, cpu_addr); | 563 | const DAddr new_start = std::max(buffer_start, device_addr); |
| 563 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); | 564 | const DAddr new_end = std::min(buffer_end, device_addr + size); |
| 564 | memory_tracker.ForEachDownloadRange( | 565 | memory_tracker.ForEachDownloadRange( |
| 565 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | 566 | new_start, new_end - new_start, false, |
| 566 | const VAddr buffer_addr = buffer.CpuAddr(); | 567 | [&](u64 device_addr_out, u64 range_size) { |
| 567 | const auto add_download = [&](VAddr start, VAddr end) { | 568 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 569 | const auto add_download = [&](DAddr start, DAddr end) { | ||
| 568 | const u64 new_offset = start - buffer_addr; | 570 | const u64 new_offset = start - buffer_addr; |
| 569 | const u64 new_size = end - start; | 571 | const u64 new_size = end - start; |
| 570 | downloads.push_back({ | 572 | downloads.push_back({ |
| @@ -582,7 +584,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 582 | largest_copy = std::max(largest_copy, new_size); | 584 | largest_copy = std::max(largest_copy, new_size); |
| 583 | }; | 585 | }; |
| 584 | 586 | ||
| 585 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); | 587 | ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); |
| 586 | }); | 588 | }); |
| 587 | }); | 589 | }); |
| 588 | } | 590 | } |
| @@ -605,8 +607,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 605 | BufferCopy second_copy{copy}; | 607 | BufferCopy second_copy{copy}; |
| 606 | Buffer& buffer = slot_buffers[buffer_id]; | 608 | Buffer& buffer = slot_buffers[buffer_id]; |
| 607 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 609 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 610 | DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); |
| 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 611 | const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; |
| 610 | async_downloads += std::make_pair(base_interval, 1); | 612 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | 613 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 614 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| @@ -635,11 +637,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 635 | runtime.Finish(); | 637 | runtime.Finish(); |
| 636 | for (const auto& [copy, buffer_id] : downloads) { | 638 | for (const auto& [copy, buffer_id] : downloads) { |
| 637 | const Buffer& buffer = slot_buffers[buffer_id]; | 639 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 638 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 640 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 639 | // Undo the modified offset | 641 | // Undo the modified offset |
| 640 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 642 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 641 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 643 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; |
| 642 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 644 | device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size); |
| 643 | } | 645 | } |
| 644 | } else { | 646 | } else { |
| 645 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 647 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| @@ -647,8 +649,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 647 | Buffer& buffer = slot_buffers[buffer_id]; | 649 | Buffer& buffer = slot_buffers[buffer_id]; |
| 648 | buffer.ImmediateDownload(copy.src_offset, | 650 | buffer.ImmediateDownload(copy.src_offset, |
| 649 | immediate_buffer.subspan(0, copy.size)); | 651 | immediate_buffer.subspan(0, copy.size)); |
| 650 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 652 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 651 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 653 | device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 652 | } | 654 | } |
| 653 | } | 655 | } |
| 654 | } | 656 | } |
| @@ -681,19 +683,19 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 681 | u8* base = async_buffer->mapped_span.data(); | 683 | u8* base = async_buffer->mapped_span.data(); |
| 682 | const size_t base_offset = async_buffer->offset; | 684 | const size_t base_offset = async_buffer->offset; |
| 683 | for (const auto& copy : downloads) { | 685 | for (const auto& copy : downloads) { |
| 684 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | 686 | const DAddr device_addr = static_cast<DAddr>(copy.src_offset); |
| 685 | const u64 dst_offset = copy.dst_offset - base_offset; | 687 | const u64 dst_offset = copy.dst_offset - base_offset; |
| 686 | const u8* read_mapped_memory = base + dst_offset; | 688 | const u8* read_mapped_memory = base + dst_offset; |
| 687 | ForEachInOverlapCounter( | 689 | ForEachInOverlapCounter( |
| 688 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | 690 | async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { |
| 689 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | 691 | device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], |
| 690 | end - start); | 692 | end - start); |
| 691 | if (count == 1) { | 693 | if (count == 1) { |
| 692 | const IntervalType base_interval{start, end}; | 694 | const IntervalType base_interval{start, end}; |
| 693 | common_ranges.subtract(base_interval); | 695 | common_ranges.subtract(base_interval); |
| 694 | } | 696 | } |
| 695 | }); | 697 | }); |
| 696 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | 698 | const IntervalType subtract_interval{device_addr, device_addr + copy.size}; |
| 697 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | 699 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); |
| 698 | } | 700 | } |
| 699 | async_buffers_death_ring.emplace_back(*async_buffer); | 701 | async_buffers_death_ring.emplace_back(*async_buffer); |
| @@ -703,15 +705,15 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 703 | } | 705 | } |
| 704 | 706 | ||
| 705 | template <class P> | 707 | template <class P> |
| 706 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 708 | bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 707 | bool is_dirty = false; | 709 | bool is_dirty = false; |
| 708 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | 710 | ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); |
| 709 | return is_dirty; | 711 | return is_dirty; |
| 710 | } | 712 | } |
| 711 | 713 | ||
| 712 | template <class P> | 714 | template <class P> |
| 713 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 715 | bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) { |
| 714 | const VAddr end_addr = addr + size; | 716 | const DAddr end_addr = addr + size; |
| 715 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 717 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 716 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { | 718 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 717 | const BufferId buffer_id = page_table[page]; | 719 | const BufferId buffer_id = page_table[page]; |
| @@ -720,8 +722,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 720 | continue; | 722 | continue; |
| 721 | } | 723 | } |
| 722 | Buffer& buffer = slot_buffers[buffer_id]; | 724 | Buffer& buffer = slot_buffers[buffer_id]; |
| 723 | const VAddr buf_start_addr = buffer.CpuAddr(); | 725 | const DAddr buf_start_addr = buffer.CpuAddr(); |
| 724 | const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); | 726 | const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); |
| 725 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 727 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 726 | return true; | 728 | return true; |
| 727 | } | 729 | } |
| @@ -731,7 +733,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 731 | } | 733 | } |
| 732 | 734 | ||
| 733 | template <class P> | 735 | template <class P> |
| 734 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 736 | bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) { |
| 735 | return memory_tracker.IsRegionCpuModified(addr, size); | 737 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 736 | } | 738 | } |
| 737 | 739 | ||
| @@ -739,7 +741,7 @@ template <class P> | |||
| 739 | void BufferCache<P>::BindHostIndexBuffer() { | 741 | void BufferCache<P>::BindHostIndexBuffer() { |
| 740 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; | 742 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; |
| 741 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); | 743 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); |
| 742 | const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); | 744 | const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr); |
| 743 | const u32 size = channel_state->index_buffer.size; | 745 | const u32 size = channel_state->index_buffer.size; |
| 744 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 746 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 745 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { | 747 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| @@ -754,7 +756,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 756 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 755 | } | 757 | } |
| 756 | } else { | 758 | } else { |
| 757 | SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); | 759 | SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size); |
| 758 | } | 760 | } |
| 759 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 761 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 760 | const u32 new_offset = | 762 | const u32 new_offset = |
| @@ -777,7 +779,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 777 | const Binding& binding = channel_state->vertex_buffers[index]; | 779 | const Binding& binding = channel_state->vertex_buffers[index]; |
| 778 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 780 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 779 | TouchBuffer(buffer, binding.buffer_id); | 781 | TouchBuffer(buffer, binding.buffer_id); |
| 780 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 782 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 781 | if (!flags[Dirty::VertexBuffer0 + index]) { | 783 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 782 | continue; | 784 | continue; |
| 783 | } | 785 | } |
| @@ -797,7 +799,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 797 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 799 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 798 | 800 | ||
| 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 801 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 800 | const u32 offset = buffer.Offset(binding.cpu_addr); | 802 | const u32 offset = buffer.Offset(binding.device_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | 803 | buffer.MarkUsage(offset, binding.size); |
| 802 | 804 | ||
| 803 | host_bindings.buffers.push_back(&buffer); | 805 | host_bindings.buffers.push_back(&buffer); |
| @@ -814,7 +816,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { | |||
| 814 | const auto bind_buffer = [this](const Binding& binding) { | 816 | const auto bind_buffer = [this](const Binding& binding) { |
| 815 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 817 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 816 | TouchBuffer(buffer, binding.buffer_id); | 818 | TouchBuffer(buffer, binding.buffer_id); |
| 817 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 819 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 818 | }; | 820 | }; |
| 819 | if (current_draw_indirect->include_count) { | 821 | if (current_draw_indirect->include_count) { |
| 820 | bind_buffer(channel_state->count_buffer_binding); | 822 | bind_buffer(channel_state->count_buffer_binding); |
| @@ -842,13 +844,13 @@ template <class P> | |||
| 842 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | 844 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, |
| 843 | bool needs_bind) { | 845 | bool needs_bind) { |
| 844 | const Binding& binding = channel_state->uniform_buffers[stage][index]; | 846 | const Binding& binding = channel_state->uniform_buffers[stage][index]; |
| 845 | const VAddr cpu_addr = binding.cpu_addr; | 847 | const DAddr device_addr = binding.device_addr; |
| 846 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); | 848 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); |
| 847 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 849 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 848 | TouchBuffer(buffer, binding.buffer_id); | 850 | TouchBuffer(buffer, binding.buffer_id); |
| 849 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 851 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 850 | size <= channel_state->uniform_buffer_skip_cache_size && | 852 | size <= channel_state->uniform_buffer_skip_cache_size && |
| 851 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); | 853 | !memory_tracker.IsRegionGpuModified(device_addr, size); |
| 852 | if (use_fast_buffer) { | 854 | if (use_fast_buffer) { |
| 853 | if constexpr (IS_OPENGL) { | 855 | if constexpr (IS_OPENGL) { |
| 854 | if (runtime.HasFastBufferSubData()) { | 856 | if (runtime.HasFastBufferSubData()) { |
| @@ -862,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 862 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 864 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 863 | runtime.BindFastUniformBuffer(stage, binding_index, size); | 865 | runtime.BindFastUniformBuffer(stage, binding_index, size); |
| 864 | } | 866 | } |
| 865 | const auto span = ImmediateBufferWithData(cpu_addr, size); | 867 | const auto span = ImmediateBufferWithData(device_addr, size); |
| 866 | runtime.PushFastUniformBuffer(stage, binding_index, span); | 868 | runtime.PushFastUniformBuffer(stage, binding_index, span); |
| 867 | return; | 869 | return; |
| 868 | } | 870 | } |
| @@ -873,11 +875,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 873 | } | 875 | } |
| 874 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan | 876 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 875 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | 877 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| 876 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 878 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 877 | return; | 879 | return; |
| 878 | } | 880 | } |
| 879 | // Classic cached path | 881 | // Classic cached path |
| 880 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); | 882 | const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size); |
| 881 | if (sync_cached) { | 883 | if (sync_cached) { |
| 882 | ++channel_state->uniform_cache_hits[0]; | 884 | ++channel_state->uniform_cache_hits[0]; |
| 883 | } | 885 | } |
| @@ -892,7 +894,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 892 | if (!needs_bind) { | 894 | if (!needs_bind) { |
| 893 | return; | 895 | return; |
| 894 | } | 896 | } |
| 895 | const u32 offset = buffer.Offset(cpu_addr); | 897 | const u32 offset = buffer.Offset(device_addr); |
| 896 | if constexpr (IS_OPENGL) { | 898 | if constexpr (IS_OPENGL) { |
| 897 | // Fast buffer will be unbound | 899 | // Fast buffer will be unbound |
| 898 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | 900 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); |
| @@ -920,14 +922,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 920 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 922 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 921 | TouchBuffer(buffer, binding.buffer_id); | 923 | TouchBuffer(buffer, binding.buffer_id); |
| 922 | const u32 size = binding.size; | 924 | const u32 size = binding.size; |
| 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 925 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 924 | 926 | ||
| 925 | const u32 offset = buffer.Offset(binding.cpu_addr); | 927 | const u32 offset = buffer.Offset(binding.device_addr); |
| 926 | buffer.MarkUsage(offset, size); | 928 | buffer.MarkUsage(offset, size); |
| 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 929 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 928 | 930 | ||
| 929 | if (is_written) { | 931 | if (is_written) { |
| 930 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 932 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 931 | } | 933 | } |
| 932 | 934 | ||
| 933 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 935 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -945,14 +947,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 945 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; | 947 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; |
| 946 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 948 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 947 | const u32 size = binding.size; | 949 | const u32 size = binding.size; |
| 948 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 950 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 949 | 951 | ||
| 950 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; | 952 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; |
| 951 | if (is_written) { | 953 | if (is_written) { |
| 952 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 954 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 953 | } | 955 | } |
| 954 | 956 | ||
| 955 | const u32 offset = buffer.Offset(binding.cpu_addr); | 957 | const u32 offset = buffer.Offset(binding.device_addr); |
| 956 | const PixelFormat format = binding.format; | 958 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | 959 | buffer.MarkUsage(offset, size); |
| 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 960 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -982,11 +984,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 982 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 984 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 983 | TouchBuffer(buffer, binding.buffer_id); | 985 | TouchBuffer(buffer, binding.buffer_id); |
| 984 | const u32 size = binding.size; | 986 | const u32 size = binding.size; |
| 985 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 987 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 986 | 988 | ||
| 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 989 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 988 | 990 | ||
| 989 | const u32 offset = buffer.Offset(binding.cpu_addr); | 991 | const u32 offset = buffer.Offset(binding.device_addr); |
| 990 | buffer.MarkUsage(offset, size); | 992 | buffer.MarkUsage(offset, size); |
| 991 | host_bindings.buffers.push_back(&buffer); | 993 | host_bindings.buffers.push_back(&buffer); |
| 992 | host_bindings.offsets.push_back(offset); | 994 | host_bindings.offsets.push_back(offset); |
| @@ -1011,9 +1013,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1011 | TouchBuffer(buffer, binding.buffer_id); | 1013 | TouchBuffer(buffer, binding.buffer_id); |
| 1012 | const u32 size = | 1014 | const u32 size = |
| 1013 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); | 1015 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); |
| 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1016 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1015 | 1017 | ||
| 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1018 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1017 | buffer.MarkUsage(offset, size); | 1019 | buffer.MarkUsage(offset, size); |
| 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1020 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1021 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| @@ -1032,15 +1034,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1032 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1034 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1033 | TouchBuffer(buffer, binding.buffer_id); | 1035 | TouchBuffer(buffer, binding.buffer_id); |
| 1034 | const u32 size = binding.size; | 1036 | const u32 size = binding.size; |
| 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1037 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1036 | 1038 | ||
| 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1039 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1038 | buffer.MarkUsage(offset, size); | 1040 | buffer.MarkUsage(offset, size); |
| 1039 | const bool is_written = | 1041 | const bool is_written = |
| 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1042 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1041 | 1043 | ||
| 1042 | if (is_written) { | 1044 | if (is_written) { |
| 1043 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1045 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1044 | } | 1046 | } |
| 1045 | 1047 | ||
| 1046 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 1048 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -1058,15 +1060,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1058 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; | 1060 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; |
| 1059 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1061 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1060 | const u32 size = binding.size; | 1062 | const u32 size = binding.size; |
| 1061 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1063 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1062 | 1064 | ||
| 1063 | const bool is_written = | 1065 | const bool is_written = |
| 1064 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; | 1066 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; |
| 1065 | if (is_written) { | 1067 | if (is_written) { |
| 1066 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1068 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1067 | } | 1069 | } |
| 1068 | 1070 | ||
| 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1071 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1070 | const PixelFormat format = binding.format; | 1072 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | 1073 | buffer.MarkUsage(offset, size); |
| 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1074 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -1131,7 +1133,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1131 | inline_buffer_id = CreateBuffer(0, buffer_size); | 1133 | inline_buffer_id = CreateBuffer(0, buffer_size); |
| 1132 | } | 1134 | } |
| 1133 | channel_state->index_buffer = Binding{ | 1135 | channel_state->index_buffer = Binding{ |
| 1134 | .cpu_addr = 0, | 1136 | .device_addr = 0, |
| 1135 | .size = inline_index_size, | 1137 | .size = inline_index_size, |
| 1136 | .buffer_id = inline_buffer_id, | 1138 | .buffer_id = inline_buffer_id, |
| 1137 | }; | 1139 | }; |
| @@ -1140,19 +1142,19 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1140 | 1142 | ||
| 1141 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); | 1143 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1142 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | 1144 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); |
| 1143 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1145 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1144 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1146 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1145 | const u32 draw_size = | 1147 | const u32 draw_size = |
| 1146 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | 1148 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); |
| 1147 | const u32 size = std::min(address_size, draw_size); | 1149 | const u32 size = std::min(address_size, draw_size); |
| 1148 | if (size == 0 || !cpu_addr) { | 1150 | if (size == 0 || !device_addr) { |
| 1149 | channel_state->index_buffer = NULL_BINDING; | 1151 | channel_state->index_buffer = NULL_BINDING; |
| 1150 | return; | 1152 | return; |
| 1151 | } | 1153 | } |
| 1152 | channel_state->index_buffer = Binding{ | 1154 | channel_state->index_buffer = Binding{ |
| 1153 | .cpu_addr = *cpu_addr, | 1155 | .device_addr = *device_addr, |
| 1154 | .size = size, | 1156 | .size = size, |
| 1155 | .buffer_id = FindBuffer(*cpu_addr, size), | 1157 | .buffer_id = FindBuffer(*device_addr, size), |
| 1156 | }; | 1158 | }; |
| 1157 | } | 1159 | } |
| 1158 | 1160 | ||
| @@ -1178,19 +1180,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1178 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; | 1180 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; |
| 1179 | const GPUVAddr gpu_addr_begin = array.Address(); | 1181 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1180 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1182 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1181 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1183 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1182 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1184 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1183 | u32 size = address_size; // TODO: Analyze stride and number of vertices | 1185 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1184 | if (array.enable == 0 || size == 0 || !cpu_addr) { | 1186 | if (array.enable == 0 || size == 0 || !device_addr) { |
| 1185 | channel_state->vertex_buffers[index] = NULL_BINDING; | 1187 | channel_state->vertex_buffers[index] = NULL_BINDING; |
| 1186 | return; | 1188 | return; |
| 1187 | } | 1189 | } |
| 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1190 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1191 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1190 | } | 1192 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1193 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1192 | channel_state->vertex_buffers[index] = Binding{ | 1194 | channel_state->vertex_buffers[index] = Binding{ |
| 1193 | .cpu_addr = *cpu_addr, | 1195 | .device_addr = *device_addr, |
| 1194 | .size = size, | 1196 | .size = size, |
| 1195 | .buffer_id = buffer_id, | 1197 | .buffer_id = buffer_id, |
| 1196 | }; | 1198 | }; |
| @@ -1199,15 +1201,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1199 | template <class P> | 1201 | template <class P> |
| 1200 | void BufferCache<P>::UpdateDrawIndirect() { | 1202 | void BufferCache<P>::UpdateDrawIndirect() { |
| 1201 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { | 1203 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { |
| 1202 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1204 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1203 | if (!cpu_addr) { | 1205 | if (!device_addr) { |
| 1204 | binding = NULL_BINDING; | 1206 | binding = NULL_BINDING; |
| 1205 | return; | 1207 | return; |
| 1206 | } | 1208 | } |
| 1207 | binding = Binding{ | 1209 | binding = Binding{ |
| 1208 | .cpu_addr = *cpu_addr, | 1210 | .device_addr = *device_addr, |
| 1209 | .size = static_cast<u32>(size), | 1211 | .size = static_cast<u32>(size), |
| 1210 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), | 1212 | .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)), |
| 1211 | }; | 1213 | }; |
| 1212 | }; | 1214 | }; |
| 1213 | if (current_draw_indirect->include_count) { | 1215 | if (current_draw_indirect->include_count) { |
| @@ -1231,7 +1233,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | |||
| 1231 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; | 1233 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; |
| 1232 | } | 1234 | } |
| 1233 | // Resolve buffer | 1235 | // Resolve buffer |
| 1234 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1236 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1235 | }); | 1237 | }); |
| 1236 | } | 1238 | } |
| 1237 | 1239 | ||
| @@ -1240,7 +1242,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | |||
| 1240 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { | 1242 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { |
| 1241 | // Resolve buffer | 1243 | // Resolve buffer |
| 1242 | Binding& binding = channel_state->storage_buffers[stage][index]; | 1244 | Binding& binding = channel_state->storage_buffers[stage][index]; |
| 1243 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1245 | const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1244 | binding.buffer_id = buffer_id; | 1246 | binding.buffer_id = buffer_id; |
| 1245 | }); | 1247 | }); |
| 1246 | } | 1248 | } |
| @@ -1249,7 +1251,7 @@ template <class P> | |||
| 1249 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | 1251 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { |
| 1250 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { | 1252 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { |
| 1251 | Binding& binding = channel_state->texture_buffers[stage][index]; | 1253 | Binding& binding = channel_state->texture_buffers[stage][index]; |
| 1252 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1254 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1253 | }); | 1255 | }); |
| 1254 | } | 1256 | } |
| 1255 | 1257 | ||
| @@ -1268,14 +1270,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1268 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; | 1270 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; |
| 1269 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; | 1271 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; |
| 1270 | const u32 size = binding.size; | 1272 | const u32 size = binding.size; |
| 1271 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1273 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1272 | if (binding.enable == 0 || size == 0 || !cpu_addr) { | 1274 | if (binding.enable == 0 || size == 0 || !device_addr) { |
| 1273 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; | 1275 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; |
| 1274 | return; | 1276 | return; |
| 1275 | } | 1277 | } |
| 1276 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1278 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1277 | channel_state->transform_feedback_buffers[index] = Binding{ | 1279 | channel_state->transform_feedback_buffers[index] = Binding{ |
| 1278 | .cpu_addr = *cpu_addr, | 1280 | .device_addr = *device_addr, |
| 1279 | .size = size, | 1281 | .size = size, |
| 1280 | .buffer_id = buffer_id, | 1282 | .buffer_id = buffer_id, |
| 1281 | }; | 1283 | }; |
| @@ -1289,13 +1291,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1289 | const auto& launch_desc = kepler_compute->launch_description; | 1291 | const auto& launch_desc = kepler_compute->launch_description; |
| 1290 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1292 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| 1291 | const auto& cbuf = launch_desc.const_buffer_config[index]; | 1293 | const auto& cbuf = launch_desc.const_buffer_config[index]; |
| 1292 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); | 1294 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); |
| 1293 | if (cpu_addr) { | 1295 | if (device_addr) { |
| 1294 | binding.cpu_addr = *cpu_addr; | 1296 | binding.device_addr = *device_addr; |
| 1295 | binding.size = cbuf.size; | 1297 | binding.size = cbuf.size; |
| 1296 | } | 1298 | } |
| 1297 | } | 1299 | } |
| 1298 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1300 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1299 | }); | 1301 | }); |
| 1300 | } | 1302 | } |
| 1301 | 1303 | ||
| @@ -1304,7 +1306,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1304 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { | 1306 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { |
| 1305 | // Resolve buffer | 1307 | // Resolve buffer |
| 1306 | Binding& binding = channel_state->compute_storage_buffers[index]; | 1308 | Binding& binding = channel_state->compute_storage_buffers[index]; |
| 1307 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1309 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1308 | }); | 1310 | }); |
| 1309 | } | 1311 | } |
| 1310 | 1312 | ||
| @@ -1312,45 +1314,63 @@ template <class P> | |||
| 1312 | void BufferCache<P>::UpdateComputeTextureBuffers() { | 1314 | void BufferCache<P>::UpdateComputeTextureBuffers() { |
| 1313 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { | 1315 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { |
| 1314 | Binding& binding = channel_state->compute_texture_buffers[index]; | 1316 | Binding& binding = channel_state->compute_texture_buffers[index]; |
| 1315 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1317 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1316 | }); | 1318 | }); |
| 1317 | } | 1319 | } |
| 1318 | 1320 | ||
| 1319 | template <class P> | 1321 | template <class P> |
| 1320 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1322 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { |
| 1321 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); | 1323 | memory_tracker.MarkRegionAsGpuModified(device_addr, size); |
| 1322 | 1324 | ||
| 1323 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1325 | const IntervalType base_interval{device_addr, device_addr + size}; |
| 1324 | common_ranges.add(base_interval); | 1326 | common_ranges.add(base_interval); |
| 1325 | uncommitted_ranges.add(base_interval); | 1327 | uncommitted_ranges.add(base_interval); |
| 1326 | } | 1328 | } |
| 1327 | 1329 | ||
| 1328 | template <class P> | 1330 | template <class P> |
| 1329 | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | 1331 | BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) { |
| 1330 | if (cpu_addr == 0) { | 1332 | if (device_addr == 0) { |
| 1331 | return NULL_BUFFER_ID; | 1333 | return NULL_BUFFER_ID; |
| 1332 | } | 1334 | } |
| 1333 | const u64 page = cpu_addr >> CACHING_PAGEBITS; | 1335 | const u64 page = device_addr >> CACHING_PAGEBITS; |
| 1334 | const BufferId buffer_id = page_table[page]; | 1336 | const BufferId buffer_id = page_table[page]; |
| 1335 | if (!buffer_id) { | 1337 | if (!buffer_id) { |
| 1336 | return CreateBuffer(cpu_addr, size); | 1338 | return CreateBuffer(device_addr, size); |
| 1337 | } | 1339 | } |
| 1338 | const Buffer& buffer = slot_buffers[buffer_id]; | 1340 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 1339 | if (buffer.IsInBounds(cpu_addr, size)) { | 1341 | if (buffer.IsInBounds(device_addr, size)) { |
| 1340 | return buffer_id; | 1342 | return buffer_id; |
| 1341 | } | 1343 | } |
| 1342 | return CreateBuffer(cpu_addr, size); | 1344 | return CreateBuffer(device_addr, size); |
| 1343 | } | 1345 | } |
| 1344 | 1346 | ||
| 1345 | template <class P> | 1347 | template <class P> |
| 1346 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1348 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr, |
| 1347 | u32 wanted_size) { | 1349 | u32 wanted_size) { |
| 1348 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | 1350 | static constexpr int STREAM_LEAP_THRESHOLD = 16; |
| 1349 | boost::container::small_vector<BufferId, 16> overlap_ids; | 1351 | boost::container::small_vector<BufferId, 16> overlap_ids; |
| 1350 | VAddr begin = cpu_addr; | 1352 | DAddr begin = device_addr; |
| 1351 | VAddr end = cpu_addr + wanted_size; | 1353 | DAddr end = device_addr + wanted_size; |
| 1352 | int stream_score = 0; | 1354 | int stream_score = 0; |
| 1353 | bool has_stream_leap = false; | 1355 | bool has_stream_leap = false; |
| 1356 | auto expand_begin = [&](DAddr add_value) { | ||
| 1357 | static constexpr DAddr min_page = CACHING_PAGESIZE + Core::Memory::YUZU_PAGESIZE; | ||
| 1358 | if (add_value > begin - min_page ) { | ||
| 1359 | begin = min_page; | ||
| 1360 | device_addr = Core::Memory::YUZU_PAGESIZE; | ||
| 1361 | return; | ||
| 1362 | } | ||
| 1363 | begin -= add_value; | ||
| 1364 | device_addr = begin - CACHING_PAGESIZE; | ||
| 1365 | }; | ||
| 1366 | auto expand_end = [&](DAddr add_value) { | ||
| 1367 | static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS; | ||
| 1368 | if (add_value > max_page - end ) { | ||
| 1369 | end = max_page; | ||
| 1370 | return; | ||
| 1371 | } | ||
| 1372 | end += add_value; | ||
| 1373 | }; | ||
| 1354 | if (begin == 0) { | 1374 | if (begin == 0) { |
| 1355 | return OverlapResult{ | 1375 | return OverlapResult{ |
| 1356 | .ids = std::move(overlap_ids), | 1376 | .ids = std::move(overlap_ids), |
| @@ -1359,9 +1379,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1359 | .has_stream_leap = has_stream_leap, | 1379 | .has_stream_leap = has_stream_leap, |
| 1360 | }; | 1380 | }; |
| 1361 | } | 1381 | } |
| 1362 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); | 1382 | for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1363 | cpu_addr += CACHING_PAGESIZE) { | 1383 | device_addr += CACHING_PAGESIZE) { |
| 1364 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; | 1384 | const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; |
| 1365 | if (!overlap_id) { | 1385 | if (!overlap_id) { |
| 1366 | continue; | 1386 | continue; |
| 1367 | } | 1387 | } |
| @@ -1371,12 +1391,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1371 | } | 1391 | } |
| 1372 | overlap_ids.push_back(overlap_id); | 1392 | overlap_ids.push_back(overlap_id); |
| 1373 | overlap.Pick(); | 1393 | overlap.Pick(); |
| 1374 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | 1394 | const DAddr overlap_device_addr = overlap.CpuAddr(); |
| 1375 | const bool expands_left = overlap_cpu_addr < begin; | 1395 | const bool expands_left = overlap_device_addr < begin; |
| 1376 | if (expands_left) { | 1396 | if (expands_left) { |
| 1377 | begin = overlap_cpu_addr; | 1397 | begin = overlap_device_addr; |
| 1378 | } | 1398 | } |
| 1379 | const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); | 1399 | const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); |
| 1380 | const bool expands_right = overlap_end > end; | 1400 | const bool expands_right = overlap_end > end; |
| 1381 | if (overlap_end > end) { | 1401 | if (overlap_end > end) { |
| 1382 | end = overlap_end; | 1402 | end = overlap_end; |
| @@ -1387,11 +1407,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1387 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1407 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1388 | has_stream_leap = true; | 1408 | has_stream_leap = true; |
| 1389 | if (expands_right) { | 1409 | if (expands_right) { |
| 1390 | begin -= CACHING_PAGESIZE * 256; | 1410 | expand_begin(CACHING_PAGESIZE * 128); |
| 1391 | cpu_addr = begin - CACHING_PAGESIZE; | ||
| 1392 | } | 1411 | } |
| 1393 | if (expands_left) { | 1412 | if (expands_left) { |
| 1394 | end += CACHING_PAGESIZE * 256; | 1413 | expand_end(CACHING_PAGESIZE * 128); |
| 1395 | } | 1414 | } |
| 1396 | } | 1415 | } |
| 1397 | } | 1416 | } |
| @@ -1424,13 +1443,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1424 | } | 1443 | } |
| 1425 | 1444 | ||
| 1426 | template <class P> | 1445 | template <class P> |
| 1427 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1446 | BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) { |
| 1428 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | 1447 | DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); |
| 1429 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | 1448 | device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); |
| 1430 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | 1449 | wanted_size = static_cast<u32>(device_addr_end - device_addr); |
| 1431 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1450 | const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); |
| 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1451 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1452 | const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size); |
| 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1453 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1435 | const size_t size_bytes = new_buffer.SizeBytes(); | 1454 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | 1455 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); |
| @@ -1465,10 +1484,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1465 | total_used_memory -= Common::AlignUp(size, 1024); | 1484 | total_used_memory -= Common::AlignUp(size, 1024); |
| 1466 | lru_cache.Free(buffer.getLRUID()); | 1485 | lru_cache.Free(buffer.getLRUID()); |
| 1467 | } | 1486 | } |
| 1468 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1487 | const DAddr device_addr_begin = buffer.CpuAddr(); |
| 1469 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1488 | const DAddr device_addr_end = device_addr_begin + size; |
| 1470 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; | 1489 | const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; |
| 1471 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); | 1490 | const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); |
| 1472 | for (u64 page = page_begin; page != page_end; ++page) { | 1491 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1473 | if constexpr (insert) { | 1492 | if constexpr (insert) { |
| 1474 | page_table[page] = buffer_id; | 1493 | page_table[page] = buffer_id; |
| @@ -1486,15 +1505,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1486 | } | 1505 | } |
| 1487 | 1506 | ||
| 1488 | template <class P> | 1507 | template <class P> |
| 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1508 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) { |
| 1490 | boost::container::small_vector<BufferCopy, 4> copies; | 1509 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1491 | u64 total_size_bytes = 0; | 1510 | u64 total_size_bytes = 0; |
| 1492 | u64 largest_copy = 0; | 1511 | u64 largest_copy = 0; |
| 1493 | VAddr buffer_start = buffer.CpuAddr(); | 1512 | DAddr buffer_start = buffer.CpuAddr(); |
| 1494 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1513 | memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1495 | copies.push_back(BufferCopy{ | 1514 | copies.push_back(BufferCopy{ |
| 1496 | .src_offset = total_size_bytes, | 1515 | .src_offset = total_size_bytes, |
| 1497 | .dst_offset = cpu_addr_out - buffer_start, | 1516 | .dst_offset = device_addr_out - buffer_start, |
| 1498 | .size = range_size, | 1517 | .size = range_size, |
| 1499 | }); | 1518 | }); |
| 1500 | total_size_bytes += range_size; | 1519 | total_size_bytes += range_size; |
| @@ -1526,14 +1545,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1526 | std::span<u8> immediate_buffer; | 1545 | std::span<u8> immediate_buffer; |
| 1527 | for (const BufferCopy& copy : copies) { | 1546 | for (const BufferCopy& copy : copies) { |
| 1528 | std::span<const u8> upload_span; | 1547 | std::span<const u8> upload_span; |
| 1529 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1548 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1530 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1549 | if (IsRangeGranular(device_addr, copy.size)) { |
| 1531 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1550 | upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size); |
| 1532 | } else { | 1551 | } else { |
| 1533 | if (immediate_buffer.empty()) { | 1552 | if (immediate_buffer.empty()) { |
| 1534 | immediate_buffer = ImmediateBuffer(largest_copy); | 1553 | immediate_buffer = ImmediateBuffer(largest_copy); |
| 1535 | } | 1554 | } |
| 1536 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1555 | device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 1537 | upload_span = immediate_buffer.subspan(0, copy.size); | 1556 | upload_span = immediate_buffer.subspan(0, copy.size); |
| 1538 | } | 1557 | } |
| 1539 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | 1558 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| @@ -1550,8 +1569,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1550 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1569 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1551 | for (BufferCopy& copy : copies) { | 1570 | for (BufferCopy& copy : copies) { |
| 1552 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1571 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1553 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1572 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1554 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1573 | device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size); |
| 1555 | 1574 | ||
| 1556 | // Apply the staging offset | 1575 | // Apply the staging offset |
| 1557 | copy.src_offset += upload_staging.offset; | 1576 | copy.src_offset += upload_staging.offset; |
| @@ -1562,14 +1581,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1562 | } | 1581 | } |
| 1563 | 1582 | ||
| 1564 | template <class P> | 1583 | template <class P> |
| 1565 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | 1584 | bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size, |
| 1566 | std::span<const u8> inlined_buffer) { | 1585 | std::span<const u8> inlined_buffer) { |
| 1567 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | 1586 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); |
| 1568 | if (!is_dirty) { | 1587 | if (!is_dirty) { |
| 1569 | return false; | 1588 | return false; |
| 1570 | } | 1589 | } |
| 1571 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); | 1590 | DAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1572 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | 1591 | DAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); |
| 1573 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 1592 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 1574 | return false; | 1593 | return false; |
| 1575 | } | 1594 | } |
| @@ -1580,7 +1599,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1580 | } | 1599 | } |
| 1581 | 1600 | ||
| 1582 | template <class P> | 1601 | template <class P> |
| 1583 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 1602 | void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 1584 | std::span<const u8> inlined_buffer) { | 1603 | std::span<const u8> inlined_buffer) { |
| 1585 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1604 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |
| 1586 | ClearDownload(subtract_interval); | 1605 | ClearDownload(subtract_interval); |
| @@ -1612,14 +1631,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | |||
| 1612 | } | 1631 | } |
| 1613 | 1632 | ||
| 1614 | template <class P> | 1633 | template <class P> |
| 1615 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | 1634 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) { |
| 1616 | boost::container::small_vector<BufferCopy, 1> copies; | 1635 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1617 | u64 total_size_bytes = 0; | 1636 | u64 total_size_bytes = 0; |
| 1618 | u64 largest_copy = 0; | 1637 | u64 largest_copy = 0; |
| 1619 | memory_tracker.ForEachDownloadRangeAndClear( | 1638 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1620 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1639 | device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1621 | const VAddr buffer_addr = buffer.CpuAddr(); | 1640 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 1622 | const auto add_download = [&](VAddr start, VAddr end) { | 1641 | const auto add_download = [&](DAddr start, DAddr end) { |
| 1623 | const u64 new_offset = start - buffer_addr; | 1642 | const u64 new_offset = start - buffer_addr; |
| 1624 | const u64 new_size = end - start; | 1643 | const u64 new_size = end - start; |
| 1625 | copies.push_back(BufferCopy{ | 1644 | copies.push_back(BufferCopy{ |
| @@ -1634,8 +1653,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1634 | largest_copy = std::max(largest_copy, new_size); | 1653 | largest_copy = std::max(largest_copy, new_size); |
| 1635 | }; | 1654 | }; |
| 1636 | 1655 | ||
| 1637 | const VAddr start_address = cpu_addr_out; | 1656 | const DAddr start_address = device_addr_out; |
| 1638 | const VAddr end_address = start_address + range_size; | 1657 | const DAddr end_address = start_address + range_size; |
| 1639 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); | 1658 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1640 | const IntervalType subtract_interval{start_address, end_address}; | 1659 | const IntervalType subtract_interval{start_address, end_address}; |
| 1641 | ClearDownload(subtract_interval); | 1660 | ClearDownload(subtract_interval); |
| @@ -1658,18 +1677,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); | 1677 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1659 | runtime.Finish(); | 1678 | runtime.Finish(); |
| 1660 | for (const BufferCopy& copy : copies) { | 1679 | for (const BufferCopy& copy : copies) { |
| 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1680 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1662 | // Undo the modified offset | 1681 | // Undo the modified offset |
| 1663 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 1682 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 1664 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | 1683 | const u8* copy_mapped_memory = mapped_memory + dst_offset; |
| 1665 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | 1684 | device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size); |
| 1666 | } | 1685 | } |
| 1667 | } else { | 1686 | } else { |
| 1668 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 1687 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 1669 | for (const BufferCopy& copy : copies) { | 1688 | for (const BufferCopy& copy : copies) { |
| 1670 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 1689 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 1671 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1690 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1672 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | 1691 | device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size); |
| 1673 | } | 1692 | } |
| 1674 | } | 1693 | } |
| 1675 | } | 1694 | } |
| @@ -1758,20 +1777,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1758 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); | 1777 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); |
| 1759 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; | 1778 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; |
| 1760 | 1779 | ||
| 1761 | const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | 1780 | const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); |
| 1762 | if (!aligned_cpu_addr || size == 0) { | 1781 | if (!aligned_device_addr || size == 0) { |
| 1763 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1782 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 1764 | return NULL_BINDING; | 1783 | return NULL_BINDING; |
| 1765 | } | 1784 | } |
| 1766 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1785 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1767 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", | 1786 | ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}", |
| 1768 | cbuf_index); | 1787 | cbuf_index); |
| 1769 | // The end address used for size calculation does not need to be aligned | 1788 | // The end address used for size calculation does not need to be aligned |
| 1770 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1789 | const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::Memory::YUZU_PAGESIZE); |
| 1771 | 1790 | ||
| 1772 | const Binding binding{ | 1791 | const Binding binding{ |
| 1773 | .cpu_addr = *aligned_cpu_addr, | 1792 | .device_addr = *aligned_device_addr, |
| 1774 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), | 1793 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr), |
| 1775 | .buffer_id = BufferId{}, | 1794 | .buffer_id = BufferId{}, |
| 1776 | }; | 1795 | }; |
| 1777 | return binding; | 1796 | return binding; |
| @@ -1780,15 +1799,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1780 | template <class P> | 1799 | template <class P> |
| 1781 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 1800 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 1782 | PixelFormat format) { | 1801 | PixelFormat format) { |
| 1783 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1802 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1784 | TextureBufferBinding binding; | 1803 | TextureBufferBinding binding; |
| 1785 | if (!cpu_addr || size == 0) { | 1804 | if (!device_addr || size == 0) { |
| 1786 | binding.cpu_addr = 0; | 1805 | binding.device_addr = 0; |
| 1787 | binding.size = 0; | 1806 | binding.size = 0; |
| 1788 | binding.buffer_id = NULL_BUFFER_ID; | 1807 | binding.buffer_id = NULL_BUFFER_ID; |
| 1789 | binding.format = PixelFormat::Invalid; | 1808 | binding.format = PixelFormat::Invalid; |
| 1790 | } else { | 1809 | } else { |
| 1791 | binding.cpu_addr = *cpu_addr; | 1810 | binding.device_addr = *device_addr; |
| 1792 | binding.size = size; | 1811 | binding.size = size; |
| 1793 | binding.buffer_id = BufferId{}; | 1812 | binding.buffer_id = BufferId{}; |
| 1794 | binding.format = format; | 1813 | binding.format = format; |
| @@ -1797,14 +1816,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, | |||
| 1797 | } | 1816 | } |
| 1798 | 1817 | ||
| 1799 | template <class P> | 1818 | template <class P> |
| 1800 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | 1819 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) { |
| 1801 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | 1820 | u8* const base_pointer = device_memory.GetPointer<u8>(device_addr); |
| 1802 | if (IsRangeGranular(cpu_addr, size) || | 1821 | if (IsRangeGranular(device_addr, size) || |
| 1803 | base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { | 1822 | base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) { |
| 1804 | return std::span(base_pointer, size); | 1823 | return std::span(base_pointer, size); |
| 1805 | } else { | 1824 | } else { |
| 1806 | const std::span<u8> span = ImmediateBuffer(size); | 1825 | const std::span<u8> span = ImmediateBuffer(size); |
| 1807 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 1826 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 1808 | return span; | 1827 | return span; |
| 1809 | } | 1828 | } |
| 1810 | } | 1829 | } |
| @@ -1828,13 +1847,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) | |||
| 1828 | template <class P> | 1847 | template <class P> |
| 1829 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { | 1848 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { |
| 1830 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; | 1849 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; |
| 1831 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); | 1850 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr)); |
| 1832 | } | 1851 | } |
| 1833 | 1852 | ||
| 1834 | template <class P> | 1853 | template <class P> |
| 1835 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { | 1854 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { |
| 1836 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; | 1855 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; |
| 1837 | return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); | 1856 | return std::make_pair(&buffer, |
| 1857 | buffer.Offset(channel_state->indirect_buffer_binding.device_addr)); | ||
| 1838 | } | 1858 | } |
| 1839 | 1859 | ||
| 1840 | } // namespace VideoCommon | 1860 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index d6d696d8c..4074003e4 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include "common/microprofile.h" | 32 | #include "common/microprofile.h" |
| 33 | #include "common/scope_exit.h" | 33 | #include "common/scope_exit.h" |
| 34 | #include "common/settings.h" | 34 | #include "common/settings.h" |
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | 35 | #include "video_core/buffer_cache/buffer_base.h" |
| 37 | #include "video_core/control/channel_state_cache.h" | 36 | #include "video_core/control/channel_state_cache.h" |
| 38 | #include "video_core/delayed_destruction_ring.h" | 37 | #include "video_core/delayed_destruction_ring.h" |
| @@ -41,7 +40,6 @@ | |||
| 41 | #include "video_core/engines/kepler_compute.h" | 40 | #include "video_core/engines/kepler_compute.h" |
| 42 | #include "video_core/engines/maxwell_3d.h" | 41 | #include "video_core/engines/maxwell_3d.h" |
| 43 | #include "video_core/memory_manager.h" | 42 | #include "video_core/memory_manager.h" |
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | 43 | #include "video_core/surface.h" |
| 46 | #include "video_core/texture_cache/slot_vector.h" | 44 | #include "video_core/texture_cache/slot_vector.h" |
| 47 | #include "video_core/texture_cache/types.h" | 45 | #include "video_core/texture_cache/types.h" |
| @@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0}; | |||
| 94 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | 92 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 95 | 93 | ||
| 96 | struct Binding { | 94 | struct Binding { |
| 97 | VAddr cpu_addr{}; | 95 | DAddr device_addr{}; |
| 98 | u32 size{}; | 96 | u32 size{}; |
| 99 | BufferId buffer_id; | 97 | BufferId buffer_id; |
| 100 | }; | 98 | }; |
| @@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding { | |||
| 104 | }; | 102 | }; |
| 105 | 103 | ||
| 106 | static constexpr Binding NULL_BINDING{ | 104 | static constexpr Binding NULL_BINDING{ |
| 107 | .cpu_addr = 0, | 105 | .device_addr = 0, |
| 108 | .size = 0, | 106 | .size = 0, |
| 109 | .buffer_id = NULL_BUFFER_ID, | 107 | .buffer_id = NULL_BUFFER_ID, |
| 110 | }; | 108 | }; |
| @@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 204 | using Async_Buffer = typename P::Async_Buffer; | 202 | using Async_Buffer = typename P::Async_Buffer; |
| 205 | using MemoryTracker = typename P::MemoryTracker; | 203 | using MemoryTracker = typename P::MemoryTracker; |
| 206 | 204 | ||
| 207 | using IntervalCompare = std::less<VAddr>; | 205 | using IntervalCompare = std::less<DAddr>; |
| 208 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | 206 | using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; |
| 209 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | 207 | using IntervalAllocator = boost::fast_pool_allocator<DAddr>; |
| 210 | using IntervalSet = boost::icl::interval_set<VAddr>; | 208 | using IntervalSet = boost::icl::interval_set<DAddr>; |
| 211 | using IntervalType = typename IntervalSet::interval_type; | 209 | using IntervalType = typename IntervalSet::interval_type; |
| 212 | 210 | ||
| 213 | template <typename Type> | 211 | template <typename Type> |
| @@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 230 | 228 | ||
| 231 | using OverlapCombine = counter_add_functor<int>; | 229 | using OverlapCombine = counter_add_functor<int>; |
| 232 | using OverlapSection = boost::icl::inter_section<int>; | 230 | using OverlapSection = boost::icl::inter_section<int>; |
| 233 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 231 | using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; |
| 234 | 232 | ||
| 235 | struct OverlapResult { | 233 | struct OverlapResult { |
| 236 | boost::container::small_vector<BufferId, 16> ids; | 234 | boost::container::small_vector<BufferId, 16> ids; |
| 237 | VAddr begin; | 235 | DAddr begin; |
| 238 | VAddr end; | 236 | DAddr end; |
| 239 | bool has_stream_leap = false; | 237 | bool has_stream_leap = false; |
| 240 | }; | 238 | }; |
| 241 | 239 | ||
| 242 | public: | 240 | public: |
| 243 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 241 | explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); |
| 244 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 245 | 242 | ||
| 246 | void TickFrame(); | 243 | void TickFrame(); |
| 247 | 244 | ||
| 248 | void WriteMemory(VAddr cpu_addr, u64 size); | 245 | void WriteMemory(DAddr device_addr, u64 size); |
| 249 | 246 | ||
| 250 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 247 | void CachedWriteMemory(DAddr device_addr, u64 size); |
| 251 | 248 | ||
| 252 | bool OnCPUWrite(VAddr cpu_addr, u64 size); | 249 | bool OnCPUWrite(DAddr device_addr, u64 size); |
| 253 | 250 | ||
| 254 | void DownloadMemory(VAddr cpu_addr, u64 size); | 251 | void DownloadMemory(DAddr device_addr, u64 size); |
| 255 | 252 | ||
| 256 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 253 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size); |
| 257 | 254 | ||
| 258 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | 255 | bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |
| 259 | 256 | ||
| 260 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 257 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 261 | 258 | ||
| @@ -300,7 +297,7 @@ public: | |||
| 300 | ObtainBufferSynchronize sync_info, | 297 | ObtainBufferSynchronize sync_info, |
| 301 | ObtainBufferOperation post_op); | 298 | ObtainBufferOperation post_op); |
| 302 | 299 | ||
| 303 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | 300 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size, |
| 304 | ObtainBufferSynchronize sync_info, | 301 | ObtainBufferSynchronize sync_info, |
| 305 | ObtainBufferOperation post_op); | 302 | ObtainBufferOperation post_op); |
| 306 | void FlushCachedWrites(); | 303 | void FlushCachedWrites(); |
| @@ -326,13 +323,13 @@ public: | |||
| 326 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | 323 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); |
| 327 | 324 | ||
| 328 | /// Return true when a CPU region is modified from the GPU | 325 | /// Return true when a CPU region is modified from the GPU |
| 329 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 326 | [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); |
| 330 | 327 | ||
| 331 | /// Return true when a region is registered on the cache | 328 | /// Return true when a region is registered on the cache |
| 332 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | 329 | [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size); |
| 333 | 330 | ||
| 334 | /// Return true when a CPU region is modified from the CPU | 331 | /// Return true when a CPU region is modified from the CPU |
| 335 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 332 | [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size); |
| 336 | 333 | ||
| 337 | void SetDrawIndirect( | 334 | void SetDrawIndirect( |
| 338 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | 335 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { |
| @@ -366,9 +363,9 @@ private: | |||
| 366 | } | 363 | } |
| 367 | 364 | ||
| 368 | template <typename Func> | 365 | template <typename Func> |
| 369 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | 366 | void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) { |
| 370 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | 367 | const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); |
| 371 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | 368 | for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { |
| 372 | const BufferId buffer_id = page_table[page]; | 369 | const BufferId buffer_id = page_table[page]; |
| 373 | if (!buffer_id) { | 370 | if (!buffer_id) { |
| 374 | ++page; | 371 | ++page; |
| @@ -377,15 +374,15 @@ private: | |||
| 377 | Buffer& buffer = slot_buffers[buffer_id]; | 374 | Buffer& buffer = slot_buffers[buffer_id]; |
| 378 | func(buffer_id, buffer); | 375 | func(buffer_id, buffer); |
| 379 | 376 | ||
| 380 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 377 | const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); |
| 381 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 378 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 382 | } | 379 | } |
| 383 | } | 380 | } |
| 384 | 381 | ||
| 385 | template <typename Func> | 382 | template <typename Func> |
| 386 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | 383 | void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { |
| 387 | const VAddr start_address = cpu_addr; | 384 | const DAddr start_address = device_addr; |
| 388 | const VAddr end_address = start_address + size; | 385 | const DAddr end_address = start_address + size; |
| 389 | const IntervalType search_interval{start_address, end_address}; | 386 | const IntervalType search_interval{start_address, end_address}; |
| 390 | auto it = current_range.lower_bound(search_interval); | 387 | auto it = current_range.lower_bound(search_interval); |
| 391 | if (it == current_range.end()) { | 388 | if (it == current_range.end()) { |
| @@ -393,8 +390,8 @@ private: | |||
| 393 | } | 390 | } |
| 394 | auto end_it = current_range.upper_bound(search_interval); | 391 | auto end_it = current_range.upper_bound(search_interval); |
| 395 | for (; it != end_it; it++) { | 392 | for (; it != end_it; it++) { |
| 396 | VAddr inter_addr_end = it->upper(); | 393 | DAddr inter_addr_end = it->upper(); |
| 397 | VAddr inter_addr = it->lower(); | 394 | DAddr inter_addr = it->lower(); |
| 398 | if (inter_addr_end > end_address) { | 395 | if (inter_addr_end > end_address) { |
| 399 | inter_addr_end = end_address; | 396 | inter_addr_end = end_address; |
| 400 | } | 397 | } |
| @@ -406,10 +403,10 @@ private: | |||
| 406 | } | 403 | } |
| 407 | 404 | ||
| 408 | template <typename Func> | 405 | template <typename Func> |
| 409 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | 406 | void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, |
| 410 | Func&& func) { | 407 | Func&& func) { |
| 411 | const VAddr start_address = cpu_addr; | 408 | const DAddr start_address = device_addr; |
| 412 | const VAddr end_address = start_address + size; | 409 | const DAddr end_address = start_address + size; |
| 413 | const IntervalType search_interval{start_address, end_address}; | 410 | const IntervalType search_interval{start_address, end_address}; |
| 414 | auto it = current_range.lower_bound(search_interval); | 411 | auto it = current_range.lower_bound(search_interval); |
| 415 | if (it == current_range.end()) { | 412 | if (it == current_range.end()) { |
| @@ -418,8 +415,8 @@ private: | |||
| 418 | auto end_it = current_range.upper_bound(search_interval); | 415 | auto end_it = current_range.upper_bound(search_interval); |
| 419 | for (; it != end_it; it++) { | 416 | for (; it != end_it; it++) { |
| 420 | auto& inter = it->first; | 417 | auto& inter = it->first; |
| 421 | VAddr inter_addr_end = inter.upper(); | 418 | DAddr inter_addr_end = inter.upper(); |
| 422 | VAddr inter_addr = inter.lower(); | 419 | DAddr inter_addr = inter.lower(); |
| 423 | if (inter_addr_end > end_address) { | 420 | if (inter_addr_end > end_address) { |
| 424 | inter_addr_end = end_address; | 421 | inter_addr_end = end_address; |
| 425 | } | 422 | } |
| @@ -451,9 +448,9 @@ private: | |||
| 451 | } while (any_removals); | 448 | } while (any_removals); |
| 452 | } | 449 | } |
| 453 | 450 | ||
| 454 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | 451 | static bool IsRangeGranular(DAddr device_addr, size_t size) { |
| 455 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | 452 | return (device_addr & ~Core::Memory::YUZU_PAGEMASK) == |
| 456 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | 453 | ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK); |
| 457 | } | 454 | } |
| 458 | 455 | ||
| 459 | void RunGarbageCollector(); | 456 | void RunGarbageCollector(); |
| @@ -508,15 +505,15 @@ private: | |||
| 508 | 505 | ||
| 509 | void UpdateComputeTextureBuffers(); | 506 | void UpdateComputeTextureBuffers(); |
| 510 | 507 | ||
| 511 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | 508 | void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size); |
| 512 | 509 | ||
| 513 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | 510 | [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size); |
| 514 | 511 | ||
| 515 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | 512 | [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size); |
| 516 | 513 | ||
| 517 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | 514 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); |
| 518 | 515 | ||
| 519 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | 516 | [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size); |
| 520 | 517 | ||
| 521 | void Register(BufferId buffer_id); | 518 | void Register(BufferId buffer_id); |
| 522 | 519 | ||
| @@ -527,7 +524,7 @@ private: | |||
| 527 | 524 | ||
| 528 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | 525 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; |
| 529 | 526 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 527 | bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size); |
| 531 | 528 | ||
| 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 529 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 533 | std::span<BufferCopy> copies); | 530 | std::span<BufferCopy> copies); |
| @@ -539,7 +536,7 @@ private: | |||
| 539 | 536 | ||
| 540 | void DownloadBufferMemory(Buffer& buffer_id); | 537 | void DownloadBufferMemory(Buffer& buffer_id); |
| 541 | 538 | ||
| 542 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | 539 | void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size); |
| 543 | 540 | ||
| 544 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | 541 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); |
| 545 | 542 | ||
| @@ -549,7 +546,7 @@ private: | |||
| 549 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 546 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 550 | PixelFormat format); | 547 | PixelFormat format); |
| 551 | 548 | ||
| 552 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | 549 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size); |
| 553 | 550 | ||
| 554 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | 551 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); |
| 555 | 552 | ||
| @@ -557,11 +554,10 @@ private: | |||
| 557 | 554 | ||
| 558 | void ClearDownload(IntervalType subtract_interval); | 555 | void ClearDownload(IntervalType subtract_interval); |
| 559 | 556 | ||
| 560 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 557 | void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 561 | std::span<const u8> inlined_buffer); | 558 | std::span<const u8> inlined_buffer); |
| 562 | 559 | ||
| 563 | VideoCore::RasterizerInterface& rasterizer; | 560 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 564 | Core::Memory::Memory& cpu_memory; | ||
| 565 | 561 | ||
| 566 | SlotVector<Buffer> slot_buffers; | 562 | SlotVector<Buffer> slot_buffers; |
| 567 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 563 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| @@ -598,7 +594,7 @@ private: | |||
| 598 | u64 critical_memory = 0; | 594 | u64 critical_memory = 0; |
| 599 | BufferId inline_buffer_id; | 595 | BufferId inline_buffer_id; |
| 600 | 596 | ||
| 601 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 597 | std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table; |
| 602 | Common::ScratchBuffer<u8> tmp_buffer; | 598 | Common::ScratchBuffer<u8> tmp_buffer; |
| 603 | }; | 599 | }; |
| 604 | 600 | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index 6c1c8287b..c95eed1f6 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -17,19 +17,19 @@ | |||
| 17 | 17 | ||
| 18 | namespace VideoCommon { | 18 | namespace VideoCommon { |
| 19 | 19 | ||
| 20 | template <class RasterizerInterface> | 20 | template <typename DeviceTracker> |
| 21 | class MemoryTrackerBase { | 21 | class MemoryTrackerBase { |
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 34; |
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; |
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; |
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; |
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); |
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; |
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; |
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | 29 | using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>; |
| 30 | 30 | ||
| 31 | public: | 31 | public: |
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | 32 | MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {} |
| 33 | ~MemoryTrackerBase() = default; | 33 | ~MemoryTrackerBase() = default; |
| 34 | 34 | ||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | 35 | /// Returns the inclusive CPU modified range in a begin end pair |
| @@ -74,7 +74,7 @@ public: | |||
| 74 | }); | 74 | }); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /// Mark region as CPU modified, notifying the rasterizer about this change | 77 | /// Mark region as CPU modified, notifying the device_tracker about this change |
| 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 79 | IteratePages<true>(dirty_cpu_addr, query_size, | 79 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 80 | [](Manager* manager, u64 offset, size_t size) { | 80 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -83,7 +83,7 @@ public: | |||
| 83 | }); | 83 | }); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /// Unmark region as CPU modified, notifying the rasterizer about this change | 86 | /// Unmark region as CPU modified, notifying the device_tracker about this change |
| 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 88 | IteratePages<true>(dirty_cpu_addr, query_size, | 88 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 89 | [](Manager* manager, u64 offset, size_t size) { | 89 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -139,7 +139,7 @@ public: | |||
| 139 | }); | 139 | }); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | 142 | /// Flushes cached CPU writes, and notify the device_tracker about the deltas |
| 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { |
| 144 | IteratePages<false>(query_cpu_addr, query_size, | 144 | IteratePages<false>(query_cpu_addr, query_size, |
| 145 | [](Manager* manager, [[maybe_unused]] u64 offset, | 145 | [](Manager* manager, [[maybe_unused]] u64 offset, |
| @@ -280,7 +280,7 @@ private: | |||
| 280 | manager_pool.emplace_back(); | 280 | manager_pool.emplace_back(); |
| 281 | auto& last_pool = manager_pool.back(); | 281 | auto& last_pool = manager_pool.back(); |
| 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { |
| 283 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | 283 | new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE); |
| 284 | free_managers.push_back(&last_pool[i]); | 284 | free_managers.push_back(&last_pool[i]); |
| 285 | } | 285 | } |
| 286 | return on_return(); | 286 | return on_return(); |
| @@ -293,7 +293,7 @@ private: | |||
| 293 | 293 | ||
| 294 | std::unordered_set<u32> cached_pages; | 294 | std::unordered_set<u32> cached_pages; |
| 295 | 295 | ||
| 296 | RasterizerInterface* rasterizer = nullptr; | 296 | DeviceTracker* device_tracker = nullptr; |
| 297 | }; | 297 | }; |
| 298 | 298 | ||
| 299 | } // namespace VideoCommon | 299 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..56ab4f5f1 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -163,11 +163,11 @@ struct Words { | |||
| 163 | WordsArray<stack_words> preflushable; | 163 | WordsArray<stack_words> preflushable; |
| 164 | }; | 164 | }; |
| 165 | 165 | ||
| 166 | template <class RasterizerInterface, size_t stack_words = 1> | 166 | template <class DeviceTracker, size_t stack_words = 1> |
| 167 | class WordManager { | 167 | class WordManager { |
| 168 | public: | 168 | public: |
| 169 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | 169 | explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes) |
| 170 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | 170 | : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {} |
| 171 | 171 | ||
| 172 | explicit WordManager() = default; | 172 | explicit WordManager() = default; |
| 173 | 173 | ||
| @@ -279,7 +279,7 @@ public: | |||
| 279 | } | 279 | } |
| 280 | 280 | ||
| 281 | /** | 281 | /** |
| 282 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | 282 | * Loop over each page in the given range, turn off those bits and notify the tracker if |
| 283 | * needed. Call the given function on each turned off range. | 283 | * needed. Call the given function on each turned off range. |
| 284 | * | 284 | * |
| 285 | * @param query_cpu_range Base CPU address to loop over | 285 | * @param query_cpu_range Base CPU address to loop over |
| @@ -459,26 +459,26 @@ private: | |||
| 459 | } | 459 | } |
| 460 | 460 | ||
| 461 | /** | 461 | /** |
| 462 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | 462 | * Notify tracker about changes in the CPU tracking state of a word in the buffer |
| 463 | * | 463 | * |
| 464 | * @param word_index Index to the word to notify to the rasterizer | 464 | * @param word_index Index to the word to notify to the tracker |
| 465 | * @param current_bits Current state of the word | 465 | * @param current_bits Current state of the word |
| 466 | * @param new_bits New state of the word | 466 | * @param new_bits New state of the word |
| 467 | * | 467 | * |
| 468 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | 468 | * @tparam add_to_tracker True when the tracker should start tracking the new pages |
| 469 | */ | 469 | */ |
| 470 | template <bool add_to_rasterizer> | 470 | template <bool add_to_tracker> |
| 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |
| 472 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | 472 | u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; |
| 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { |
| 475 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | 475 | tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, |
| 476 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | 476 | size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1); |
| 477 | }); | 477 | }); |
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | VAddr cpu_addr = 0; | 480 | VAddr cpu_addr = 0; |
| 481 | RasterizerInterface* rasterizer = nullptr; | 481 | DeviceTracker* tracker = nullptr; |
| 482 | Words<stack_words> words; | 482 | Words<stack_words> words; |
| 483 | }; | 483 | }; |
| 484 | 484 | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 58ce0d8c2..d461c5be8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -5,10 +5,10 @@ | |||
| 5 | #include "common/microprofile.h" | 5 | #include "common/microprofile.h" |
| 6 | #include "common/settings.h" | 6 | #include "common/settings.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/memory.h" | ||
| 9 | #include "video_core/dma_pusher.h" | 8 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 12 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 13 | 13 | ||
| 14 | namespace Tegra { | 14 | namespace Tegra { |
| @@ -85,15 +85,15 @@ bool DmaPusher::Step() { | |||
| 85 | } | 85 | } |
| 86 | } | 86 | } |
| 87 | const auto safe_process = [&] { | 87 | const auto safe_process = [&] { |
| 88 | Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | 88 | Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, |
| 89 | Core::Memory::GuestMemoryFlags::SafeRead> | 89 | Tegra::Memory::GuestMemoryFlags::SafeRead> |
| 90 | headers(memory_manager, dma_state.dma_get, command_list_header.size, | 90 | headers(memory_manager, dma_state.dma_get, command_list_header.size, |
| 91 | &command_headers); | 91 | &command_headers); |
| 92 | ProcessCommands(headers); | 92 | ProcessCommands(headers); |
| 93 | }; | 93 | }; |
| 94 | const auto unsafe_process = [&] { | 94 | const auto unsafe_process = [&] { |
| 95 | Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | 95 | Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, |
| 96 | Core::Memory::GuestMemoryFlags::UnsafeRead> | 96 | Tegra::Memory::GuestMemoryFlags::UnsafeRead> |
| 97 | headers(memory_manager, dma_state.dma_get, command_list_header.size, | 97 | headers(memory_manager, dma_state.dma_get, command_list_header.size, |
| 98 | &command_headers); | 98 | &command_headers); |
| 99 | ProcessCommands(headers); | 99 | ProcessCommands(headers); |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index bc64d4486..e5cc04ec4 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -5,8 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include "common/algorithm.h" | 6 | #include "common/algorithm.h" |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "core/memory.h" | ||
| 9 | #include "video_core/engines/engine_upload.h" | 8 | #include "video_core/engines/engine_upload.h" |
| 9 | #include "video_core/guest_memory.h" | ||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/textures/decoders.h" | 12 | #include "video_core/textures/decoders.h" |
| @@ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 68 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | 68 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 69 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | 69 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); |
| 70 | 70 | ||
| 71 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 71 | Tegra::Memory::GpuGuestMemoryScoped<u8, |
| 72 | Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||
| 72 | tmp(memory_manager, address, dst_size, &tmp_buffer); | 73 | tmp(memory_manager, address, dst_size, &tmp_buffer); |
| 73 | 74 | ||
| 74 | Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, | 75 | Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 56fbff306..4bf461fb0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 13 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/guest_memory.h" | ||
| 14 | #include "video_core/memory_manager.h" | 15 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| 16 | #include "video_core/textures/decoders.h" | 17 | #include "video_core/textures/decoders.h" |
| @@ -133,8 +134,8 @@ void MaxwellDMA::Launch() { | |||
| 133 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 134 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 134 | read_buffer.resize_destructive(16); | 135 | read_buffer.resize_destructive(16); |
| 135 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 136 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 136 | Core::Memory::GpuGuestMemoryScoped< | 137 | Tegra::Memory::GpuGuestMemoryScoped< |
| 137 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 138 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 138 | tmp_write_buffer(memory_manager, | 139 | tmp_write_buffer(memory_manager, |
| 139 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 140 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 140 | 16, &read_buffer); | 141 | 16, &read_buffer); |
| @@ -146,16 +147,16 @@ void MaxwellDMA::Launch() { | |||
| 146 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 147 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 147 | read_buffer.resize_destructive(16); | 148 | read_buffer.resize_destructive(16); |
| 148 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 149 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 149 | Core::Memory::GpuGuestMemoryScoped< | 150 | Tegra::Memory::GpuGuestMemoryScoped< |
| 150 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 151 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 151 | tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); | 152 | tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); |
| 152 | tmp_write_buffer.SetAddressAndSize( | 153 | tmp_write_buffer.SetAddressAndSize( |
| 153 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); | 154 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); |
| 154 | } | 155 | } |
| 155 | } else { | 156 | } else { |
| 156 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 157 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 157 | Core::Memory::GpuGuestMemoryScoped< | 158 | Tegra::Memory::GpuGuestMemoryScoped< |
| 158 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 159 | u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 159 | tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, | 160 | tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, |
| 160 | &read_buffer); | 161 | &read_buffer); |
| 161 | tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); | 162 | tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); |
| @@ -226,9 +227,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 226 | 227 | ||
| 227 | const size_t dst_size = dst_operand.pitch * regs.line_count; | 228 | const size_t dst_size = dst_operand.pitch * regs.line_count; |
| 228 | 229 | ||
| 229 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 230 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 230 | memory_manager, src_operand.address, src_size, &read_buffer); | 231 | memory_manager, src_operand.address, src_size, &read_buffer); |
| 231 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | 232 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |
| 232 | tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | 233 | tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); |
| 233 | 234 | ||
| 234 | UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | 235 | UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, |
| @@ -290,9 +291,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 290 | 291 | ||
| 291 | GPUVAddr src_addr = regs.offset_in; | 292 | GPUVAddr src_addr = regs.offset_in; |
| 292 | GPUVAddr dst_addr = regs.offset_out; | 293 | GPUVAddr dst_addr = regs.offset_out; |
| 293 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 294 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 294 | memory_manager, src_addr, src_size, &read_buffer); | 295 | memory_manager, src_addr, src_size, &read_buffer); |
| 295 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | 296 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |
| 296 | tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | 297 | tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); |
| 297 | 298 | ||
| 298 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 299 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| @@ -344,9 +345,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 344 | 345 | ||
| 345 | intermediate_buffer.resize_destructive(mid_buffer_size); | 346 | intermediate_buffer.resize_destructive(mid_buffer_size); |
| 346 | 347 | ||
| 347 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | 348 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 348 | memory_manager, regs.offset_in, src_size, &read_buffer); | 349 | memory_manager, regs.offset_in, src_size, &read_buffer); |
| 349 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | 350 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 350 | tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | 351 | tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); |
| 351 | 352 | ||
| 352 | UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, | 353 | UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, |
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index 67ce9134b..b67589daf 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "video_core/memory_manager.h" | 11 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/surface.h" | 12 | #include "video_core/surface.h" |
| 13 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 14 | #include "video_core/guest_memory.h" | ||
| 14 | 15 | ||
| 15 | namespace Tegra { | 16 | namespace Tegra { |
| 16 | class MemoryManager; | 17 | class MemoryManager; |
| @@ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 160 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | 161 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |
| 161 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | 162 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |
| 162 | 163 | ||
| 163 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | 164 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( |
| 164 | memory_manager, src.Address(), src_size, &impl->tmp_buffer); | 165 | memory_manager, src.Address(), src_size, &impl->tmp_buffer); |
| 165 | 166 | ||
| 166 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | 167 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; |
| @@ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 220 | } | 221 | } |
| 221 | 222 | ||
| 222 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | 223 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |
| 223 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> | 224 | Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite> |
| 224 | tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); | 225 | tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); |
| 225 | 226 | ||
| 226 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | 227 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 5f3bffcab..856f4bd52 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h | |||
| @@ -14,7 +14,7 @@ namespace Tegra { | |||
| 14 | * Struct describing framebuffer configuration | 14 | * Struct describing framebuffer configuration |
| 15 | */ | 15 | */ |
| 16 | struct FramebufferConfig { | 16 | struct FramebufferConfig { |
| 17 | VAddr address{}; | 17 | DAddr address{}; |
| 18 | u32 offset{}; | 18 | u32 offset{}; |
| 19 | u32 width{}; | 19 | u32 width{}; |
| 20 | u32 height{}; | 20 | u32 height{}; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1e915682f..5f780507b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #include "video_core/renderer_base.h" | 34 | #include "video_core/renderer_base.h" |
| 35 | #include "video_core/shader_notify.h" | 35 | #include "video_core/shader_notify.h" |
| 36 | 36 | ||
| 37 | #pragma optimize("", off) | ||
| 38 | |||
| 37 | namespace Tegra { | 39 | namespace Tegra { |
| 38 | 40 | ||
| 39 | struct GPU::Impl { | 41 | struct GPU::Impl { |
| @@ -95,8 +97,8 @@ struct GPU::Impl { | |||
| 95 | 97 | ||
| 96 | /// Synchronizes CPU writes with Host GPU memory. | 98 | /// Synchronizes CPU writes with Host GPU memory. |
| 97 | void InvalidateGPUCache() { | 99 | void InvalidateGPUCache() { |
| 98 | std::function<void(VAddr, size_t)> callback_writes( | 100 | std::function<void(PAddr, size_t)> callback_writes( |
| 99 | [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | 101 | [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); |
| 100 | system.GatherGPUDirtyMemory(callback_writes); | 102 | system.GatherGPUDirtyMemory(callback_writes); |
| 101 | } | 103 | } |
| 102 | 104 | ||
| @@ -279,11 +281,11 @@ struct GPU::Impl { | |||
| 279 | } | 281 | } |
| 280 | 282 | ||
| 281 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 283 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 282 | void FlushRegion(VAddr addr, u64 size) { | 284 | void FlushRegion(DAddr addr, u64 size) { |
| 283 | gpu_thread.FlushRegion(addr, size); | 285 | gpu_thread.FlushRegion(addr, size); |
| 284 | } | 286 | } |
| 285 | 287 | ||
| 286 | VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { | 288 | VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { |
| 287 | auto raster_area = rasterizer->GetFlushArea(addr, size); | 289 | auto raster_area = rasterizer->GetFlushArea(addr, size); |
| 288 | if (raster_area.preemtive) { | 290 | if (raster_area.preemtive) { |
| 289 | return raster_area; | 291 | return raster_area; |
| @@ -299,16 +301,16 @@ struct GPU::Impl { | |||
| 299 | } | 301 | } |
| 300 | 302 | ||
| 301 | /// Notify rasterizer that any caches of the specified region should be invalidated | 303 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 302 | void InvalidateRegion(VAddr addr, u64 size) { | 304 | void InvalidateRegion(DAddr addr, u64 size) { |
| 303 | gpu_thread.InvalidateRegion(addr, size); | 305 | gpu_thread.InvalidateRegion(addr, size); |
| 304 | } | 306 | } |
| 305 | 307 | ||
| 306 | bool OnCPUWrite(VAddr addr, u64 size) { | 308 | bool OnCPUWrite(DAddr addr, u64 size) { |
| 307 | return rasterizer->OnCPUWrite(addr, size); | 309 | return rasterizer->OnCPUWrite(addr, size); |
| 308 | } | 310 | } |
| 309 | 311 | ||
| 310 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 312 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 311 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | 313 | void FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 312 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 314 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 313 | } | 315 | } |
| 314 | 316 | ||
| @@ -437,7 +439,7 @@ void GPU::OnCommandListEnd() { | |||
| 437 | impl->OnCommandListEnd(); | 439 | impl->OnCommandListEnd(); |
| 438 | } | 440 | } |
| 439 | 441 | ||
| 440 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 442 | u64 GPU::RequestFlush(DAddr addr, std::size_t size) { |
| 441 | return impl->RequestSyncOperation( | 443 | return impl->RequestSyncOperation( |
| 442 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | 444 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); |
| 443 | } | 445 | } |
| @@ -557,23 +559,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 557 | impl->SwapBuffers(framebuffer); | 559 | impl->SwapBuffers(framebuffer); |
| 558 | } | 560 | } |
| 559 | 561 | ||
| 560 | VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { | 562 | VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { |
| 561 | return impl->OnCPURead(addr, size); | 563 | return impl->OnCPURead(addr, size); |
| 562 | } | 564 | } |
| 563 | 565 | ||
| 564 | void GPU::FlushRegion(VAddr addr, u64 size) { | 566 | void GPU::FlushRegion(DAddr addr, u64 size) { |
| 565 | impl->FlushRegion(addr, size); | 567 | impl->FlushRegion(addr, size); |
| 566 | } | 568 | } |
| 567 | 569 | ||
| 568 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | 570 | void GPU::InvalidateRegion(DAddr addr, u64 size) { |
| 569 | impl->InvalidateRegion(addr, size); | 571 | impl->InvalidateRegion(addr, size); |
| 570 | } | 572 | } |
| 571 | 573 | ||
| 572 | bool GPU::OnCPUWrite(VAddr addr, u64 size) { | 574 | bool GPU::OnCPUWrite(DAddr addr, u64 size) { |
| 573 | return impl->OnCPUWrite(addr, size); | 575 | return impl->OnCPUWrite(addr, size); |
| 574 | } | 576 | } |
| 575 | 577 | ||
| 576 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 578 | void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 577 | impl->FlushAndInvalidateRegion(addr, size); | 579 | impl->FlushAndInvalidateRegion(addr, size); |
| 578 | } | 580 | } |
| 579 | 581 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ba2838b89..b3c1d15bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -158,7 +158,7 @@ public: | |||
| 158 | void InitAddressSpace(Tegra::MemoryManager& memory_manager); | 158 | void InitAddressSpace(Tegra::MemoryManager& memory_manager); |
| 159 | 159 | ||
| 160 | /// Request a host GPU memory flush from the CPU. | 160 | /// Request a host GPU memory flush from the CPU. |
| 161 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 161 | [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size); |
| 162 | 162 | ||
| 163 | /// Obtains current flush request fence id. | 163 | /// Obtains current flush request fence id. |
| 164 | [[nodiscard]] u64 CurrentSyncRequestFence() const; | 164 | [[nodiscard]] u64 CurrentSyncRequestFence() const; |
| @@ -242,20 +242,20 @@ public: | |||
| 242 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 242 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 243 | 243 | ||
| 244 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 244 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 245 | [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); | 245 | [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size); |
| 246 | 246 | ||
| 247 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 247 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 248 | void FlushRegion(VAddr addr, u64 size); | 248 | void FlushRegion(DAddr addr, u64 size); |
| 249 | 249 | ||
| 250 | /// Notify rasterizer that any caches of the specified region should be invalidated | 250 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 251 | void InvalidateRegion(VAddr addr, u64 size); | 251 | void InvalidateRegion(DAddr addr, u64 size); |
| 252 | 252 | ||
| 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is |
| 254 | /// sensible, false otherwise | 254 | /// sensible, false otherwise |
| 255 | bool OnCPUWrite(VAddr addr, u64 size); | 255 | bool OnCPUWrite(DAddr addr, u64 size); |
| 256 | 256 | ||
| 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 258 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 258 | void FlushAndInvalidateRegion(DAddr addr, u64 size); |
| 259 | 259 | ||
| 260 | private: | 260 | private: |
| 261 | struct Impl; | 261 | struct Impl; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2f0f9f593..788d4f61e 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 82 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | 82 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 85 | void ThreadManager::FlushRegion(DAddr addr, u64 size) { |
| 86 | if (!is_async) { | 86 | if (!is_async) { |
| 87 | // Always flush with synchronous GPU mode | 87 | // Always flush with synchronous GPU mode |
| 88 | PushCommand(FlushRegionCommand(addr, size)); | 88 | PushCommand(FlushRegionCommand(addr, size)); |
| @@ -101,11 +101,11 @@ void ThreadManager::TickGPU() { | |||
| 101 | PushCommand(GPUTickCommand()); | 101 | PushCommand(GPUTickCommand()); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { |
| 105 | rasterizer->OnCacheInvalidation(addr, size); | 105 | rasterizer->OnCacheInvalidation(addr, size); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 108 | void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { |
| 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 110 | rasterizer->OnCacheInvalidation(addr, size); | 110 | rasterizer->OnCacheInvalidation(addr, size); |
| 111 | } | 111 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 43940bd6d..2de25e9ef 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -54,26 +54,26 @@ struct SwapBuffersCommand final { | |||
| 54 | 54 | ||
| 55 | /// Command to signal to the GPU thread to flush a region | 55 | /// Command to signal to the GPU thread to flush a region |
| 56 | struct FlushRegionCommand final { | 56 | struct FlushRegionCommand final { |
| 57 | explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | 57 | explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |
| 58 | 58 | ||
| 59 | VAddr addr; | 59 | DAddr addr; |
| 60 | u64 size; | 60 | u64 size; |
| 61 | }; | 61 | }; |
| 62 | 62 | ||
| 63 | /// Command to signal to the GPU thread to invalidate a region | 63 | /// Command to signal to the GPU thread to invalidate a region |
| 64 | struct InvalidateRegionCommand final { | 64 | struct InvalidateRegionCommand final { |
| 65 | explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | 65 | explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |
| 66 | 66 | ||
| 67 | VAddr addr; | 67 | DAddr addr; |
| 68 | u64 size; | 68 | u64 size; |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | /// Command to signal to the GPU thread to flush and invalidate a region | 71 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 72 | struct FlushAndInvalidateRegionCommand final { | 72 | struct FlushAndInvalidateRegionCommand final { |
| 73 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) | 73 | explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_) |
| 74 | : addr{addr_}, size{size_} {} | 74 | : addr{addr_}, size{size_} {} |
| 75 | 75 | ||
| 76 | VAddr addr; | 76 | DAddr addr; |
| 77 | u64 size; | 77 | u64 size; |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
| @@ -122,13 +122,13 @@ public: | |||
| 122 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 122 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 123 | 123 | ||
| 124 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 124 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 125 | void FlushRegion(VAddr addr, u64 size); | 125 | void FlushRegion(DAddr addr, u64 size); |
| 126 | 126 | ||
| 127 | /// Notify rasterizer that any caches of the specified region should be invalidated | 127 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 128 | void InvalidateRegion(VAddr addr, u64 size); | 128 | void InvalidateRegion(DAddr addr, u64 size); |
| 129 | 129 | ||
| 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 131 | void FlushAndInvalidateRegion(DAddr addr, u64 size); |
| 132 | 132 | ||
| 133 | void TickGPU(); | 133 | void TickGPU(); |
| 134 | 134 | ||
diff --git a/src/video_core/guest_memory.h b/src/video_core/guest_memory.h new file mode 100644 index 000000000..a2132f7ea --- /dev/null +++ b/src/video_core/guest_memory.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <iterator> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/scratch_buffer.h" | ||
| 13 | #include "core/guest_memory.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | |||
| 16 | namespace Tegra::Memory { | ||
| 17 | |||
| 18 | using GuestMemoryFlags = Core::Memory::GuestMemoryFlags; | ||
| 19 | |||
| 20 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 21 | using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||
| 22 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 23 | using DeviceGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||
| 24 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 25 | using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||
| 26 | template <typename T, GuestMemoryFlags FLAGS> | ||
| 27 | using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||
| 28 | |||
| 29 | } // namespace Tegra::Memory | ||
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index 2fb77605e..a406ce965 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | ||
| 5 | |||
| 4 | #include "core/device_memory_manager.h" | 6 | #include "core/device_memory_manager.h" |
| 5 | 7 | ||
| 6 | namespace VideoCore { | 8 | namespace VideoCore { |
| @@ -12,8 +14,8 @@ namespace Tegra { | |||
| 12 | struct MaxwellDeviceMethods; | 14 | struct MaxwellDeviceMethods; |
| 13 | 15 | ||
| 14 | struct MaxwellDeviceTraits { | 16 | struct MaxwellDeviceTraits { |
| 15 | static constexpr bool supports_pinning = true; | 17 | static constexpr bool supports_pinning = false; |
| 16 | static constexpr size_t device_virtual_bits = 34; | 18 | static constexpr size_t device_virtual_bits = 32; |
| 17 | using DeviceInterface = typename VideoCore::RasterizerInterface; | 19 | using DeviceInterface = typename VideoCore::RasterizerInterface; |
| 18 | using DeviceMethods = typename MaxwellDeviceMethods; | 20 | using DeviceMethods = typename MaxwellDeviceMethods; |
| 19 | }; | 21 | }; |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d16040613..82f7a1c3b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -7,22 +7,24 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/device_memory.h" | ||
| 11 | #include "core/hle/kernel/k_page_table.h" | 10 | #include "core/hle/kernel/k_page_table.h" |
| 12 | #include "core/hle/kernel/k_process.h" | 11 | #include "core/hle/kernel/k_process.h" |
| 12 | #include "video_core/guest_memory.h" | ||
| 13 | #include "video_core/host1x/host1x.h" | ||
| 13 | #include "video_core/invalidation_accumulator.h" | 14 | #include "video_core/invalidation_accumulator.h" |
| 14 | #include "video_core/memory_manager.h" | 15 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 16 | #include "video_core/renderer_base.h" | 17 | #include "video_core/renderer_base.h" |
| 17 | 18 | ||
| 19 | |||
| 18 | namespace Tegra { | 20 | namespace Tegra { |
| 19 | using Core::Memory::GuestMemoryFlags; | 21 | using Tegra::Memory::GuestMemoryFlags; |
| 20 | 22 | ||
| 21 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | 23 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; |
| 22 | 24 | ||
| 23 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | 25 | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, |
| 24 | u64 page_bits_) | 26 | u64 page_bits_) |
| 25 | : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, | 27 | : system{system_}, memory{system.Host1x().MemoryManager()}, |
| 26 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | 28 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, |
| 27 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | 29 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
| 28 | page_bits != big_page_bits ? page_bits : 0}, | 30 | page_bits != big_page_bits ? page_bits : 0}, |
| @@ -42,7 +44,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 42 | big_page_table_mask = big_page_table_size - 1; | 44 | big_page_table_mask = big_page_table_size - 1; |
| 43 | 45 | ||
| 44 | big_entries.resize(big_page_table_size / 32, 0); | 46 | big_entries.resize(big_page_table_size / 32, 0); |
| 45 | big_page_table_cpu.resize(big_page_table_size); | 47 | big_page_table_dev.resize(big_page_table_size); |
| 46 | big_page_continuous.resize(big_page_table_size / continuous_bits, 0); | 48 | big_page_continuous.resize(big_page_table_size / continuous_bits, 0); |
| 47 | entries.resize(page_table_size / 32, 0); | 49 | entries.resize(page_table_size / 32, 0); |
| 48 | } | 50 | } |
| @@ -100,7 +102,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu | |||
| 100 | } | 102 | } |
| 101 | 103 | ||
| 102 | template <MemoryManager::EntryType entry_type> | 104 | template <MemoryManager::EntryType entry_type> |
| 103 | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 105 | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 104 | PTEKind kind) { | 106 | PTEKind kind) { |
| 105 | [[maybe_unused]] u64 remaining_size{size}; | 107 | [[maybe_unused]] u64 remaining_size{size}; |
| 106 | if constexpr (entry_type == EntryType::Mapped) { | 108 | if constexpr (entry_type == EntryType::Mapped) { |
| @@ -114,9 +116,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 114 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); | 116 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); |
| 115 | } | 117 | } |
| 116 | if constexpr (entry_type == EntryType::Mapped) { | 118 | if constexpr (entry_type == EntryType::Mapped) { |
| 117 | const VAddr current_cpu_addr = cpu_addr + offset; | 119 | const DAddr current_dev_addr = dev_addr + offset; |
| 118 | const auto index = PageEntryIndex<false>(current_gpu_addr); | 120 | const auto index = PageEntryIndex<false>(current_gpu_addr); |
| 119 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | 121 | const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); |
| 120 | page_table[index] = sub_value; | 122 | page_table[index] = sub_value; |
| 121 | } | 123 | } |
| 122 | remaining_size -= page_size; | 124 | remaining_size -= page_size; |
| @@ -126,7 +128,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 126 | } | 128 | } |
| 127 | 129 | ||
| 128 | template <MemoryManager::EntryType entry_type> | 130 | template <MemoryManager::EntryType entry_type> |
| 129 | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | 131 | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, |
| 130 | size_t size, PTEKind kind) { | 132 | size_t size, PTEKind kind) { |
| 131 | [[maybe_unused]] u64 remaining_size{size}; | 133 | [[maybe_unused]] u64 remaining_size{size}; |
| 132 | for (u64 offset{}; offset < size; offset += big_page_size) { | 134 | for (u64 offset{}; offset < size; offset += big_page_size) { |
| @@ -137,20 +139,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 137 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); | 139 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); |
| 138 | } | 140 | } |
| 139 | if constexpr (entry_type == EntryType::Mapped) { | 141 | if constexpr (entry_type == EntryType::Mapped) { |
| 140 | const VAddr current_cpu_addr = cpu_addr + offset; | 142 | const DAddr current_dev_addr = dev_addr + offset; |
| 141 | const auto index = PageEntryIndex<true>(current_gpu_addr); | 143 | const auto index = PageEntryIndex<true>(current_gpu_addr); |
| 142 | const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | 144 | const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); |
| 143 | big_page_table_cpu[index] = sub_value; | 145 | big_page_table_dev[index] = sub_value; |
| 144 | const bool is_continuous = ([&] { | 146 | const bool is_continuous = ([&] { |
| 145 | uintptr_t base_ptr{ | 147 | uintptr_t base_ptr{ |
| 146 | reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; | 148 | reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))}; |
| 147 | if (base_ptr == 0) { | 149 | if (base_ptr == 0) { |
| 148 | return false; | 150 | return false; |
| 149 | } | 151 | } |
| 150 | for (VAddr start_cpu = current_cpu_addr + page_size; | 152 | for (DAddr start_cpu = current_dev_addr + page_size; |
| 151 | start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { | 153 | start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) { |
| 152 | base_ptr += page_size; | 154 | base_ptr += page_size; |
| 153 | auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); | 155 | auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu)); |
| 154 | if (next_ptr == 0 || base_ptr != next_ptr) { | 156 | if (next_ptr == 0 || base_ptr != next_ptr) { |
| 155 | return false; | 157 | return false; |
| 156 | } | 158 | } |
| @@ -172,12 +174,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
| 172 | rasterizer = rasterizer_; | 174 | rasterizer = rasterizer_; |
| 173 | } | 175 | } |
| 174 | 176 | ||
| 175 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, | 177 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind, |
| 176 | bool is_big_pages) { | 178 | bool is_big_pages) { |
| 177 | if (is_big_pages) [[likely]] { | 179 | if (is_big_pages) [[likely]] { |
| 178 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | 180 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); |
| 179 | } | 181 | } |
| 180 | return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | 182 | return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); |
| 181 | } | 183 | } |
| 182 | 184 | ||
| 183 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | 185 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { |
| @@ -202,7 +204,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 202 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 204 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| 203 | } | 205 | } |
| 204 | 206 | ||
| 205 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | 207 | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { |
| 206 | if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { | 208 | if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { |
| 207 | return std::nullopt; | 209 | return std::nullopt; |
| 208 | } | 210 | } |
| @@ -211,17 +213,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | |||
| 211 | return std::nullopt; | 213 | return std::nullopt; |
| 212 | } | 214 | } |
| 213 | 215 | ||
| 214 | const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | 216 | const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) |
| 215 | << cpu_page_bits; | 217 | << cpu_page_bits; |
| 216 | return cpu_addr_base + (gpu_addr & page_mask); | 218 | return dev_addr_base + (gpu_addr & page_mask); |
| 217 | } | 219 | } |
| 218 | 220 | ||
| 219 | const VAddr cpu_addr_base = | 221 | const DAddr dev_addr_base = |
| 220 | static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | 222 | static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; |
| 221 | return cpu_addr_base + (gpu_addr & big_page_mask); | 223 | return dev_addr_base + (gpu_addr & big_page_mask); |
| 222 | } | 224 | } |
| 223 | 225 | ||
| 224 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | 226 | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { |
| 225 | size_t page_index{addr >> page_bits}; | 227 | size_t page_index{addr >> page_bits}; |
| 226 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | 228 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; |
| 227 | while (page_index < page_last) { | 229 | while (page_index < page_last) { |
| @@ -274,7 +276,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | |||
| 274 | return {}; | 276 | return {}; |
| 275 | } | 277 | } |
| 276 | 278 | ||
| 277 | return memory.GetPointer(*address); | 279 | return memory.GetPointer<u8>(*address); |
| 278 | } | 280 | } |
| 279 | 281 | ||
| 280 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | 282 | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { |
| @@ -283,7 +285,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | |||
| 283 | return {}; | 285 | return {}; |
| 284 | } | 286 | } |
| 285 | 287 | ||
| 286 | return memory.GetPointer(*address); | 288 | return memory.GetPointer<u8>(*address); |
| 287 | } | 289 | } |
| 288 | 290 | ||
| 289 | #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. | 291 | #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. |
| @@ -367,25 +369,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
| 367 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 369 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 368 | }; | 370 | }; |
| 369 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 371 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 370 | const VAddr cpu_addr_base = | 372 | const DAddr dev_addr_base = |
| 371 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 373 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 372 | if constexpr (is_safe) { | 374 | if constexpr (is_safe) { |
| 373 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 375 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 374 | } | 376 | } |
| 375 | u8* physical = memory.GetPointer(cpu_addr_base); | 377 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 376 | std::memcpy(dest_buffer, physical, copy_amount); | 378 | std::memcpy(dest_buffer, physical, copy_amount); |
| 377 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 379 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 378 | }; | 380 | }; |
| 379 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 381 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 380 | const VAddr cpu_addr_base = | 382 | const DAddr dev_addr_base = |
| 381 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 383 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 382 | if constexpr (is_safe) { | 384 | if constexpr (is_safe) { |
| 383 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 385 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 384 | } | 386 | } |
| 385 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { | 387 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { |
| 386 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | 388 | memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount); |
| 387 | } else { | 389 | } else { |
| 388 | u8* physical = memory.GetPointer(cpu_addr_base); | 390 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 389 | std::memcpy(dest_buffer, physical, copy_amount); | 391 | std::memcpy(dest_buffer, physical, copy_amount); |
| 390 | } | 392 | } |
| 391 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 393 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| @@ -416,25 +418,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe | |||
| 416 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 418 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| 417 | }; | 419 | }; |
| 418 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 420 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 419 | const VAddr cpu_addr_base = | 421 | const DAddr dev_addr_base = |
| 420 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 422 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 421 | if constexpr (is_safe) { | 423 | if constexpr (is_safe) { |
| 422 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 424 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 423 | } | 425 | } |
| 424 | u8* physical = memory.GetPointer(cpu_addr_base); | 426 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 425 | std::memcpy(physical, src_buffer, copy_amount); | 427 | std::memcpy(physical, src_buffer, copy_amount); |
| 426 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 428 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| 427 | }; | 429 | }; |
| 428 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 430 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 429 | const VAddr cpu_addr_base = | 431 | const DAddr dev_addr_base = |
| 430 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 432 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 431 | if constexpr (is_safe) { | 433 | if constexpr (is_safe) { |
| 432 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 434 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 433 | } | 435 | } |
| 434 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { | 436 | if (!IsBigPageContinuous(page_index)) [[unlikely]] { |
| 435 | memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | 437 | memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount); |
| 436 | } else { | 438 | } else { |
| 437 | u8* physical = memory.GetPointer(cpu_addr_base); | 439 | u8* physical = memory.GetPointer<u8>(dev_addr_base); |
| 438 | std::memcpy(physical, src_buffer, copy_amount); | 440 | std::memcpy(physical, src_buffer, copy_amount); |
| 439 | } | 441 | } |
| 440 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | 442 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |
| @@ -470,14 +472,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | |||
| 470 | [[maybe_unused]] std::size_t copy_amount) {}; | 472 | [[maybe_unused]] std::size_t copy_amount) {}; |
| 471 | 473 | ||
| 472 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 474 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 473 | const VAddr cpu_addr_base = | 475 | const DAddr dev_addr_base = |
| 474 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 476 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 475 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 477 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 476 | }; | 478 | }; |
| 477 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 479 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 478 | const VAddr cpu_addr_base = | 480 | const DAddr dev_addr_base = |
| 479 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 481 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 480 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 482 | rasterizer->FlushRegion(dev_addr_base, copy_amount, which); |
| 481 | }; | 483 | }; |
| 482 | auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | 484 | auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, |
| 483 | std::size_t copy_amount) { | 485 | std::size_t copy_amount) { |
| @@ -495,15 +497,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
| 495 | [[maybe_unused]] std::size_t copy_amount) { return false; }; | 497 | [[maybe_unused]] std::size_t copy_amount) { return false; }; |
| 496 | 498 | ||
| 497 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 499 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 498 | const VAddr cpu_addr_base = | 500 | const DAddr dev_addr_base = |
| 499 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 501 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 500 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | 502 | result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); |
| 501 | return result; | 503 | return result; |
| 502 | }; | 504 | }; |
| 503 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 505 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 504 | const VAddr cpu_addr_base = | 506 | const DAddr dev_addr_base = |
| 505 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 507 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 506 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | 508 | result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); |
| 507 | return result; | 509 | return result; |
| 508 | }; | 510 | }; |
| 509 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 511 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -517,7 +519,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
| 517 | } | 519 | } |
| 518 | 520 | ||
| 519 | size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | 521 | size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { |
| 520 | std::optional<VAddr> old_page_addr{}; | 522 | std::optional<DAddr> old_page_addr{}; |
| 521 | size_t range_so_far = 0; | 523 | size_t range_so_far = 0; |
| 522 | bool result{false}; | 524 | bool result{false}; |
| 523 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | 525 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |
| @@ -526,24 +528,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 526 | return true; | 528 | return true; |
| 527 | }; | 529 | }; |
| 528 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 530 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 529 | const VAddr cpu_addr_base = | 531 | const DAddr dev_addr_base = |
| 530 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 532 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 531 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 533 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 532 | result = true; | 534 | result = true; |
| 533 | return true; | 535 | return true; |
| 534 | } | 536 | } |
| 535 | range_so_far += copy_amount; | 537 | range_so_far += copy_amount; |
| 536 | old_page_addr = {cpu_addr_base + copy_amount}; | 538 | old_page_addr = {dev_addr_base + copy_amount}; |
| 537 | return false; | 539 | return false; |
| 538 | }; | 540 | }; |
| 539 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 541 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 540 | const VAddr cpu_addr_base = | 542 | const DAddr dev_addr_base = |
| 541 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 543 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 542 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 544 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 543 | return true; | 545 | return true; |
| 544 | } | 546 | } |
| 545 | range_so_far += copy_amount; | 547 | range_so_far += copy_amount; |
| 546 | old_page_addr = {cpu_addr_base + copy_amount}; | 548 | old_page_addr = {dev_addr_base + copy_amount}; |
| 547 | return false; | 549 | return false; |
| 548 | }; | 550 | }; |
| 549 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 551 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -568,14 +570,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 568 | [[maybe_unused]] std::size_t copy_amount) {}; | 570 | [[maybe_unused]] std::size_t copy_amount) {}; |
| 569 | 571 | ||
| 570 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 572 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 571 | const VAddr cpu_addr_base = | 573 | const DAddr dev_addr_base = |
| 572 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 574 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 573 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 575 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 574 | }; | 576 | }; |
| 575 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 577 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 576 | const VAddr cpu_addr_base = | 578 | const DAddr dev_addr_base = |
| 577 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 579 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 578 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | 580 | rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); |
| 579 | }; | 581 | }; |
| 580 | auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, | 582 | auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, |
| 581 | std::size_t copy_amount) { | 583 | std::size_t copy_amount) { |
| @@ -587,7 +589,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 587 | 589 | ||
| 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | 590 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, |
| 589 | VideoCommon::CacheType which) { | 591 | VideoCommon::CacheType which) { |
| 590 | Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | 592 | Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( |
| 591 | *this, gpu_src_addr, size); | 593 | *this, gpu_src_addr, size); |
| 592 | data.SetAddressAndSize(gpu_dest_addr, size); | 594 | data.SetAddressAndSize(gpu_dest_addr, size); |
| 593 | FlushRegion(gpu_dest_addr, size, which); | 595 | FlushRegion(gpu_dest_addr, size, which); |
| @@ -611,7 +613,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| 611 | } | 613 | } |
| 612 | 614 | ||
| 613 | bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { | 615 | bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { |
| 614 | std::optional<VAddr> old_page_addr{}; | 616 | std::optional<DAddr> old_page_addr{}; |
| 615 | bool result{true}; | 617 | bool result{true}; |
| 616 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | 618 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |
| 617 | std::size_t copy_amount) { | 619 | std::size_t copy_amount) { |
| @@ -619,23 +621,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const | |||
| 619 | return true; | 621 | return true; |
| 620 | }; | 622 | }; |
| 621 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 623 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 622 | const VAddr cpu_addr_base = | 624 | const DAddr dev_addr_base = |
| 623 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 625 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 624 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 626 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 625 | result = false; | 627 | result = false; |
| 626 | return true; | 628 | return true; |
| 627 | } | 629 | } |
| 628 | old_page_addr = {cpu_addr_base + copy_amount}; | 630 | old_page_addr = {dev_addr_base + copy_amount}; |
| 629 | return false; | 631 | return false; |
| 630 | }; | 632 | }; |
| 631 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 633 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| 632 | const VAddr cpu_addr_base = | 634 | const DAddr dev_addr_base = |
| 633 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 635 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 634 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | 636 | if (old_page_addr && *old_page_addr != dev_addr_base) { |
| 635 | result = false; | 637 | result = false; |
| 636 | return true; | 638 | return true; |
| 637 | } | 639 | } |
| 638 | old_page_addr = {cpu_addr_base + copy_amount}; | 640 | old_page_addr = {dev_addr_base + copy_amount}; |
| 639 | return false; | 641 | return false; |
| 640 | }; | 642 | }; |
| 641 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | 643 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |
| @@ -678,11 +680,11 @@ template <bool is_gpu_address> | |||
| 678 | void MemoryManager::GetSubmappedRangeImpl( | 680 | void MemoryManager::GetSubmappedRangeImpl( |
| 679 | GPUVAddr gpu_addr, std::size_t size, | 681 | GPUVAddr gpu_addr, std::size_t size, |
| 680 | boost::container::small_vector< | 682 | boost::container::small_vector< |
| 681 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) | 683 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result) |
| 682 | const { | 684 | const { |
| 683 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | 685 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>> |
| 684 | last_segment{}; | 686 | last_segment{}; |
| 685 | std::optional<VAddr> old_page_addr{}; | 687 | std::optional<DAddr> old_page_addr{}; |
| 686 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | 688 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
| 687 | [[maybe_unused]] std::size_t offset, | 689 | [[maybe_unused]] std::size_t offset, |
| 688 | [[maybe_unused]] std::size_t copy_amount) { | 690 | [[maybe_unused]] std::size_t copy_amount) { |
| @@ -694,20 +696,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
| 694 | const auto extend_size_big = [this, &split, &old_page_addr, | 696 | const auto extend_size_big = [this, &split, &old_page_addr, |
| 695 | &last_segment](std::size_t page_index, std::size_t offset, | 697 | &last_segment](std::size_t page_index, std::size_t offset, |
| 696 | std::size_t copy_amount) { | 698 | std::size_t copy_amount) { |
| 697 | const VAddr cpu_addr_base = | 699 | const DAddr dev_addr_base = |
| 698 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | 700 | (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; |
| 699 | if (old_page_addr) { | 701 | if (old_page_addr) { |
| 700 | if (*old_page_addr != cpu_addr_base) { | 702 | if (*old_page_addr != dev_addr_base) { |
| 701 | split(0, 0, 0); | 703 | split(0, 0, 0); |
| 702 | } | 704 | } |
| 703 | } | 705 | } |
| 704 | old_page_addr = {cpu_addr_base + copy_amount}; | 706 | old_page_addr = {dev_addr_base + copy_amount}; |
| 705 | if (!last_segment) { | 707 | if (!last_segment) { |
| 706 | if constexpr (is_gpu_address) { | 708 | if constexpr (is_gpu_address) { |
| 707 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | 709 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
| 708 | last_segment = {new_base_addr, copy_amount}; | 710 | last_segment = {new_base_addr, copy_amount}; |
| 709 | } else { | 711 | } else { |
| 710 | last_segment = {cpu_addr_base, copy_amount}; | 712 | last_segment = {dev_addr_base, copy_amount}; |
| 711 | } | 713 | } |
| 712 | } else { | 714 | } else { |
| 713 | last_segment->second += copy_amount; | 715 | last_segment->second += copy_amount; |
| @@ -716,20 +718,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
| 716 | const auto extend_size_short = [this, &split, &old_page_addr, | 718 | const auto extend_size_short = [this, &split, &old_page_addr, |
| 717 | &last_segment](std::size_t page_index, std::size_t offset, | 719 | &last_segment](std::size_t page_index, std::size_t offset, |
| 718 | std::size_t copy_amount) { | 720 | std::size_t copy_amount) { |
| 719 | const VAddr cpu_addr_base = | 721 | const DAddr dev_addr_base = |
| 720 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | 722 | (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; |
| 721 | if (old_page_addr) { | 723 | if (old_page_addr) { |
| 722 | if (*old_page_addr != cpu_addr_base) { | 724 | if (*old_page_addr != dev_addr_base) { |
| 723 | split(0, 0, 0); | 725 | split(0, 0, 0); |
| 724 | } | 726 | } |
| 725 | } | 727 | } |
| 726 | old_page_addr = {cpu_addr_base + copy_amount}; | 728 | old_page_addr = {dev_addr_base + copy_amount}; |
| 727 | if (!last_segment) { | 729 | if (!last_segment) { |
| 728 | if constexpr (is_gpu_address) { | 730 | if constexpr (is_gpu_address) { |
| 729 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | 731 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
| 730 | last_segment = {new_base_addr, copy_amount}; | 732 | last_segment = {new_base_addr, copy_amount}; |
| 731 | } else { | 733 | } else { |
| 732 | last_segment = {cpu_addr_base, copy_amount}; | 734 | last_segment = {dev_addr_base, copy_amount}; |
| 733 | } | 735 | } |
| 734 | } else { | 736 | } else { |
| 735 | last_segment->second += copy_amount; | 737 | last_segment->second += copy_amount; |
| @@ -756,9 +758,9 @@ void MemoryManager::FlushCaching() { | |||
| 756 | } | 758 | } |
| 757 | 759 | ||
| 758 | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | 760 | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { |
| 759 | auto cpu_addr = GpuToCpuAddress(src_addr); | 761 | auto dev_addr = GpuToCpuAddress(src_addr); |
| 760 | if (cpu_addr) { | 762 | if (dev_addr) { |
| 761 | return memory.GetSpan(*cpu_addr, size); | 763 | return memory.GetSpan(*dev_addr, size); |
| 762 | } | 764 | } |
| 763 | return nullptr; | 765 | return nullptr; |
| 764 | } | 766 | } |
| @@ -767,9 +769,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | |||
| 767 | if (!IsContinuousRange(src_addr, size)) { | 769 | if (!IsContinuousRange(src_addr, size)) { |
| 768 | return nullptr; | 770 | return nullptr; |
| 769 | } | 771 | } |
| 770 | auto cpu_addr = GpuToCpuAddress(src_addr); | 772 | auto dev_addr = GpuToCpuAddress(src_addr); |
| 771 | if (cpu_addr) { | 773 | if (dev_addr) { |
| 772 | return memory.GetSpan(*cpu_addr, size); | 774 | return memory.GetSpan(*dev_addr, size); |
| 773 | } | 775 | } |
| 774 | return nullptr; | 776 | return nullptr; |
| 775 | } | 777 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 9b311b9e5..e2912a73f 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "common/virtual_buffer.h" | 17 | #include "common/virtual_buffer.h" |
| 18 | #include "core/memory.h" | 18 | #include "core/memory.h" |
| 19 | #include "video_core/cache_types.h" | 19 | #include "video_core/cache_types.h" |
| 20 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 20 | #include "video_core/pte_kind.h" | 21 | #include "video_core/pte_kind.h" |
| 21 | 22 | ||
| 22 | namespace VideoCore { | 23 | namespace VideoCore { |
| @@ -28,10 +29,6 @@ class InvalidationAccumulator; | |||
| 28 | } | 29 | } |
| 29 | 30 | ||
| 30 | namespace Core { | 31 | namespace Core { |
| 31 | class DeviceMemory; | ||
| 32 | namespace Memory { | ||
| 33 | class Memory; | ||
| 34 | } // namespace Memory | ||
| 35 | class System; | 32 | class System; |
| 36 | } // namespace Core | 33 | } // namespace Core |
| 37 | 34 | ||
| @@ -50,9 +47,9 @@ public: | |||
| 50 | /// Binds a renderer to the memory manager. | 47 | /// Binds a renderer to the memory manager. |
| 51 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | 48 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 52 | 49 | ||
| 53 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 50 | [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 54 | 51 | ||
| 55 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | 52 | [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; |
| 56 | 53 | ||
| 57 | template <typename T> | 54 | template <typename T> |
| 58 | [[nodiscard]] T Read(GPUVAddr addr) const; | 55 | [[nodiscard]] T Read(GPUVAddr addr) const; |
| @@ -110,7 +107,7 @@ public: | |||
| 110 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | 107 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 111 | 108 | ||
| 112 | /** | 109 | /** |
| 113 | * Checks if a gpu region is mapped by a single range of cpu addresses. | 110 | * Checks if a gpu region is mapped by a single range of device addresses. |
| 114 | */ | 111 | */ |
| 115 | [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; | 112 | [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 116 | 113 | ||
| @@ -120,14 +117,14 @@ public: | |||
| 120 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | 117 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 121 | 118 | ||
| 122 | /** | 119 | /** |
| 123 | * Returns a vector with all the subranges of cpu addresses mapped beneath. | 120 | * Returns a vector with all the subranges of device addresses mapped beneath. |
| 124 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | 121 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty |
| 125 | * vector will be returned; | 122 | * vector will be returned; |
| 126 | */ | 123 | */ |
| 127 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( | 124 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( |
| 128 | GPUVAddr gpu_addr, std::size_t size) const; | 125 | GPUVAddr gpu_addr, std::size_t size) const; |
| 129 | 126 | ||
| 130 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | 127 | GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, |
| 131 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | 128 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |
| 132 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); | 129 | GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); |
| 133 | void Unmap(GPUVAddr gpu_addr, std::size_t size); | 130 | void Unmap(GPUVAddr gpu_addr, std::size_t size); |
| @@ -186,12 +183,11 @@ private: | |||
| 186 | void GetSubmappedRangeImpl( | 183 | void GetSubmappedRangeImpl( |
| 187 | GPUVAddr gpu_addr, std::size_t size, | 184 | GPUVAddr gpu_addr, std::size_t size, |
| 188 | boost::container::small_vector< | 185 | boost::container::small_vector< |
| 189 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& | 186 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& |
| 190 | result) const; | 187 | result) const; |
| 191 | 188 | ||
| 192 | Core::System& system; | 189 | Core::System& system; |
| 193 | Core::Memory::Memory& memory; | 190 | MaxwellDeviceMemoryManager& memory; |
| 194 | Core::DeviceMemory& device_memory; | ||
| 195 | 191 | ||
| 196 | const u64 address_space_bits; | 192 | const u64 address_space_bits; |
| 197 | const u64 page_bits; | 193 | const u64 page_bits; |
| @@ -218,11 +214,11 @@ private: | |||
| 218 | std::vector<u64> big_entries; | 214 | std::vector<u64> big_entries; |
| 219 | 215 | ||
| 220 | template <EntryType entry_type> | 216 | template <EntryType entry_type> |
| 221 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 217 | GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 222 | PTEKind kind); | 218 | PTEKind kind); |
| 223 | 219 | ||
| 224 | template <EntryType entry_type> | 220 | template <EntryType entry_type> |
| 225 | GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | 221 | GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, |
| 226 | PTEKind kind); | 222 | PTEKind kind); |
| 227 | 223 | ||
| 228 | template <bool is_big_page> | 224 | template <bool is_big_page> |
| @@ -233,11 +229,11 @@ private: | |||
| 233 | 229 | ||
| 234 | Common::MultiLevelPageTable<u32> page_table; | 230 | Common::MultiLevelPageTable<u32> page_table; |
| 235 | Common::RangeMap<GPUVAddr, PTEKind> kind_map; | 231 | Common::RangeMap<GPUVAddr, PTEKind> kind_map; |
| 236 | Common::VirtualBuffer<u32> big_page_table_cpu; | 232 | Common::VirtualBuffer<u32> big_page_table_dev; |
| 237 | 233 | ||
| 238 | std::vector<u64> big_page_continuous; | 234 | std::vector<u64> big_page_continuous; |
| 239 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; | 235 | boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{}; |
| 240 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; | 236 | boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{}; |
| 241 | 237 | ||
| 242 | mutable std::mutex guard; | 238 | mutable std::mutex guard; |
| 243 | 239 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index a64404ce4..b01d843e4 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "core/memory.h" | 21 | #include "core/memory.h" |
| 22 | #include "video_core/control/channel_state_cache.h" | 22 | #include "video_core/control/channel_state_cache.h" |
| 23 | #include "video_core/engines/maxwell_3d.h" | 23 | #include "video_core/engines/maxwell_3d.h" |
| 24 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 24 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 25 | #include "video_core/rasterizer_interface.h" | 26 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | 27 | #include "video_core/texture_cache/slot_vector.h" |
| @@ -102,18 +103,19 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo | |||
| 102 | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 103 | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 103 | public: | 104 | public: |
| 104 | explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | 105 | explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, |
| 105 | Core::Memory::Memory& cpu_memory_) | 106 | Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 106 | : rasterizer{rasterizer_}, | 107 | : rasterizer{rasterizer_}, |
| 107 | // Use reinterpret_cast instead of static_cast as workaround for | 108 | // Use reinterpret_cast instead of static_cast as workaround for |
| 108 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | 109 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |
| 109 | cpu_memory{cpu_memory_}, streams{{ | 110 | device_memory{device_memory_}, |
| 110 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 111 | streams{{ |
| 111 | VideoCore::QueryType::SamplesPassed}}, | 112 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 112 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 113 | VideoCore::QueryType::SamplesPassed}}, |
| 113 | VideoCore::QueryType::PrimitivesGenerated}}, | 114 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 114 | {CounterStream{reinterpret_cast<QueryCache&>(*this), | 115 | VideoCore::QueryType::PrimitivesGenerated}}, |
| 115 | VideoCore::QueryType::TfbPrimitivesWritten}}, | 116 | {CounterStream{reinterpret_cast<QueryCache&>(*this), |
| 116 | }} { | 117 | VideoCore::QueryType::TfbPrimitivesWritten}}, |
| 118 | }} { | ||
| 117 | (void)slot_async_jobs.insert(); // Null value | 119 | (void)slot_async_jobs.insert(); // Null value |
| 118 | } | 120 | } |
| 119 | 121 | ||
| @@ -322,13 +324,14 @@ private: | |||
| 322 | local_lock.unlock(); | 324 | local_lock.unlock(); |
| 323 | if (timestamp) { | 325 | if (timestamp) { |
| 324 | u64 timestamp_value = *timestamp; | 326 | u64 timestamp_value = *timestamp; |
| 325 | cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | 327 | device_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, |
| 326 | cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | 328 | sizeof(u64)); |
| 329 | device_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||
| 327 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | 330 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, |
| 328 | VideoCommon::CacheType::NoQueryCache); | 331 | VideoCommon::CacheType::NoQueryCache); |
| 329 | } else { | 332 | } else { |
| 330 | u32 small_value = static_cast<u32>(value); | 333 | u32 small_value = static_cast<u32>(value); |
| 331 | cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | 334 | device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); |
| 332 | rasterizer.InvalidateRegion(address, sizeof(u32), | 335 | rasterizer.InvalidateRegion(address, sizeof(u32), |
| 333 | VideoCommon::CacheType::NoQueryCache); | 336 | VideoCommon::CacheType::NoQueryCache); |
| 334 | } | 337 | } |
| @@ -342,7 +345,7 @@ private: | |||
| 342 | SlotVector<AsyncJob> slot_async_jobs; | 345 | SlotVector<AsyncJob> slot_async_jobs; |
| 343 | 346 | ||
| 344 | VideoCore::RasterizerInterface& rasterizer; | 347 | VideoCore::RasterizerInterface& rasterizer; |
| 345 | Core::Memory::Memory& cpu_memory; | 348 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 346 | 349 | ||
| 347 | mutable std::recursive_mutex mutex; | 350 | mutable std::recursive_mutex mutex; |
| 348 | 351 | ||
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 1d786b3a7..aca6a6447 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | |||
| 23 | 23 | ||
| 24 | class QueryBase { | 24 | class QueryBase { |
| 25 | public: | 25 | public: |
| 26 | VAddr guest_address{}; | 26 | DAddr guest_address{}; |
| 27 | QueryFlagBits flags{}; | 27 | QueryFlagBits flags{}; |
| 28 | u64 value{}; | 28 | u64 value{}; |
| 29 | 29 | ||
| @@ -32,7 +32,7 @@ protected: | |||
| 32 | QueryBase() = default; | 32 | QueryBase() = default; |
| 33 | 33 | ||
| 34 | // Parameterized constructor | 34 | // Parameterized constructor |
| 35 | QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | 35 | QueryBase(DAddr address, QueryFlagBits flags_, u64 value_) |
| 36 | : guest_address(address), flags(flags_), value{value_} {} | 36 | : guest_address(address), flags(flags_), value{value_} {} |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 94f0c4466..508afb10a 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "core/memory.h" | 18 | #include "core/memory.h" |
| 19 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/gpu.h" | 20 | #include "video_core/gpu.h" |
| 21 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 21 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 22 | #include "video_core/query_cache/bank_base.h" | 23 | #include "video_core/query_cache/bank_base.h" |
| 23 | #include "video_core/query_cache/query_base.h" | 24 | #include "video_core/query_cache/query_base.h" |
| @@ -113,9 +114,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 113 | using RuntimeType = typename Traits::RuntimeType; | 114 | using RuntimeType = typename Traits::RuntimeType; |
| 114 | 115 | ||
| 115 | QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | 116 | QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, |
| 116 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) | 117 | Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, |
| 118 | Tegra::GPU& gpu_) | ||
| 117 | : owner{owner_}, rasterizer{rasterizer_}, | 119 | : owner{owner_}, rasterizer{rasterizer_}, |
| 118 | cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { | 120 | device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { |
| 119 | streamer_mask = 0; | 121 | streamer_mask = 0; |
| 120 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | 122 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |
| 121 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | 123 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |
| @@ -158,7 +160,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 158 | 160 | ||
| 159 | QueryCacheBase<Traits>* owner; | 161 | QueryCacheBase<Traits>* owner; |
| 160 | VideoCore::RasterizerInterface& rasterizer; | 162 | VideoCore::RasterizerInterface& rasterizer; |
| 161 | Core::Memory::Memory& cpu_memory; | 163 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 162 | RuntimeType& runtime; | 164 | RuntimeType& runtime; |
| 163 | Tegra::GPU& gpu; | 165 | Tegra::GPU& gpu; |
| 164 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | 166 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |
| @@ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 171 | template <typename Traits> | 173 | template <typename Traits> |
| 172 | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | 174 | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, |
| 173 | VideoCore::RasterizerInterface& rasterizer_, | 175 | VideoCore::RasterizerInterface& rasterizer_, |
| 174 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) | 176 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 177 | RuntimeType& runtime_) | ||
| 175 | : cached_queries{} { | 178 | : cached_queries{} { |
| 176 | impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | 179 | impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( |
| 177 | this, rasterizer_, cpu_memory_, runtime_, gpu_); | 180 | this, rasterizer_, device_memory_, runtime_, gpu_); |
| 178 | } | 181 | } |
| 179 | 182 | ||
| 180 | template <typename Traits> | 183 | template <typename Traits> |
| @@ -240,7 +243,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 240 | if (!cpu_addr_opt) [[unlikely]] { | 243 | if (!cpu_addr_opt) [[unlikely]] { |
| 241 | return; | 244 | return; |
| 242 | } | 245 | } |
| 243 | VAddr cpu_addr = *cpu_addr_opt; | 246 | DAddr cpu_addr = *cpu_addr_opt; |
| 244 | const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | 247 | const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); |
| 245 | auto* query = streamer->GetQuery(new_query_id); | 248 | auto* query = streamer->GetQuery(new_query_id); |
| 246 | if (is_fence) { | 249 | if (is_fence) { |
| @@ -253,10 +256,9 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 253 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | 256 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
| 254 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | 257 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
| 255 | }; | 258 | }; |
| 256 | u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | 259 | u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); |
| 257 | u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | 260 | u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); |
| 258 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | 261 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
| 259 | |||
| 260 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | 262 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |
| 261 | pointer, pointer_timestamp] { | 263 | pointer, pointer_timestamp] { |
| 262 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | 264 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |
| @@ -559,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo | |||
| 559 | } | 561 | } |
| 560 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | 562 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |
| 561 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | 563 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |
| 562 | auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); | 564 | auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); |
| 563 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | 565 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
| 564 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | 566 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |
| 565 | return false; | 567 | return false; |
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h index 07be421c6..7720456ff 100644 --- a/src/video_core/query_cache/query_cache_base.h +++ b/src/video_core/query_cache/query_cache_base.h | |||
| @@ -17,10 +17,7 @@ | |||
| 17 | #include "video_core/control/channel_state_cache.h" | 17 | #include "video_core/control/channel_state_cache.h" |
| 18 | #include "video_core/query_cache/query_base.h" | 18 | #include "video_core/query_cache/query_base.h" |
| 19 | #include "video_core/query_cache/types.h" | 19 | #include "video_core/query_cache/types.h" |
| 20 | 20 | #include "video_core/host1x/gpu_device_memory_manager.h" | |
| 21 | namespace Core::Memory { | ||
| 22 | class Memory; | ||
| 23 | } | ||
| 24 | 21 | ||
| 25 | namespace VideoCore { | 22 | namespace VideoCore { |
| 26 | class RasterizerInterface; | 23 | class RasterizerInterface; |
| @@ -53,7 +50,7 @@ public: | |||
| 53 | }; | 50 | }; |
| 54 | 51 | ||
| 55 | explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | 52 | explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, |
| 56 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); | 53 | Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_); |
| 57 | 54 | ||
| 58 | ~QueryCacheBase(); | 55 | ~QueryCacheBase(); |
| 59 | 56 | ||
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp deleted file mode 100644 index f200a650f..000000000 --- a/src/video_core/rasterizer_accelerated.cpp +++ /dev/null | |||
| @@ -1,72 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <atomic> | ||
| 5 | |||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/div_ceil.h" | ||
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/rasterizer_accelerated.h" | ||
| 11 | |||
| 12 | namespace VideoCore { | ||
| 13 | |||
| 14 | using namespace Core::Memory; | ||
| 15 | |||
| 16 | RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) | ||
| 17 | : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} | ||
| 18 | |||
| 19 | RasterizerAccelerated::~RasterizerAccelerated() = default; | ||
| 20 | |||
| 21 | void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 22 | u64 uncache_begin = 0; | ||
| 23 | u64 cache_begin = 0; | ||
| 24 | u64 uncache_bytes = 0; | ||
| 25 | u64 cache_bytes = 0; | ||
| 26 | |||
| 27 | std::atomic_thread_fence(std::memory_order_acquire); | ||
| 28 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | ||
| 29 | for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { | ||
| 30 | std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||
| 31 | |||
| 32 | if (delta > 0) { | ||
| 33 | ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); | ||
| 34 | } else if (delta < 0) { | ||
| 35 | ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||
| 36 | } else { | ||
| 37 | ASSERT_MSG(false, "Delta must be non-zero!"); | ||
| 38 | } | ||
| 39 | |||
| 40 | // Adds or subtracts 1, as count is a unsigned 8-bit value | ||
| 41 | count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | ||
| 42 | |||
| 43 | // Assume delta is either -1 or 1 | ||
| 44 | if (count.load(std::memory_order::relaxed) == 0) { | ||
| 45 | if (uncache_bytes == 0) { | ||
| 46 | uncache_begin = page; | ||
| 47 | } | ||
| 48 | uncache_bytes += YUZU_PAGESIZE; | ||
| 49 | } else if (uncache_bytes > 0) { | ||
| 50 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, | ||
| 51 | false); | ||
| 52 | uncache_bytes = 0; | ||
| 53 | } | ||
| 54 | if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||
| 55 | if (cache_bytes == 0) { | ||
| 56 | cache_begin = page; | ||
| 57 | } | ||
| 58 | cache_bytes += YUZU_PAGESIZE; | ||
| 59 | } else if (cache_bytes > 0) { | ||
| 60 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||
| 61 | cache_bytes = 0; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | if (uncache_bytes > 0) { | ||
| 65 | cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); | ||
| 66 | } | ||
| 67 | if (cache_bytes > 0) { | ||
| 68 | cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h deleted file mode 100644 index e6c0ea87a..000000000 --- a/src/video_core/rasterizer_accelerated.h +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <atomic> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/rasterizer_interface.h" | ||
| 11 | |||
| 12 | namespace Core::Memory { | ||
| 13 | class Memory; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCore { | ||
| 17 | |||
| 18 | /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. | ||
| 19 | class RasterizerAccelerated : public RasterizerInterface { | ||
| 20 | public: | ||
| 21 | explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); | ||
| 22 | ~RasterizerAccelerated() override; | ||
| 23 | |||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | ||
| 25 | |||
| 26 | private: | ||
| 27 | class CacheEntry final { | ||
| 28 | public: | ||
| 29 | CacheEntry() = default; | ||
| 30 | |||
| 31 | std::atomic_uint16_t& Count(std::size_t page) { | ||
| 32 | return values[page & 3]; | ||
| 33 | } | ||
| 34 | |||
| 35 | const std::atomic_uint16_t& Count(std::size_t page) const { | ||
| 36 | return values[page & 3]; | ||
| 37 | } | ||
| 38 | |||
| 39 | private: | ||
| 40 | std::array<std::atomic_uint16_t, 4> values{}; | ||
| 41 | }; | ||
| 42 | static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); | ||
| 43 | |||
| 44 | using CachedPages = std::array<CacheEntry, 0x2000000>; | ||
| 45 | std::unique_ptr<CachedPages> cached_pages; | ||
| 46 | Core::Memory::Memory& cpu_memory; | ||
| 47 | }; | ||
| 48 | |||
| 49 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 49224ca85..8fa4e4d9a 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -86,35 +86,35 @@ public: | |||
| 86 | virtual void FlushAll() = 0; | 86 | virtual void FlushAll() = 0; |
| 87 | 87 | ||
| 88 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 88 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 89 | virtual void FlushRegion(VAddr addr, u64 size, | 89 | virtual void FlushRegion(DAddr addr, u64 size, |
| 90 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 90 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 91 | 91 | ||
| 92 | /// Check if the the specified memory area requires flushing to CPU Memory. | 92 | /// Check if the the specified memory area requires flushing to CPU Memory. |
| 93 | virtual bool MustFlushRegion(VAddr addr, u64 size, | 93 | virtual bool MustFlushRegion(DAddr addr, u64 size, |
| 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 95 | 95 | ||
| 96 | virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; | 96 | virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0; |
| 97 | 97 | ||
| 98 | /// Notify rasterizer that any caches of the specified region should be invalidated | 98 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 99 | virtual void InvalidateRegion(VAddr addr, u64 size, | 99 | virtual void InvalidateRegion(DAddr addr, u64 size, |
| 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 101 | 101 | ||
| 102 | virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | 102 | virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { |
| 103 | for (const auto& [cpu_addr, size] : sequences) { | 103 | for (const auto& [cpu_addr, size] : sequences) { |
| 104 | InvalidateRegion(cpu_addr, size); | 104 | InvalidateRegion(cpu_addr, size); |
| 105 | } | 105 | } |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | /// Notify rasterizer that any caches of the specified region are desync with guest | 108 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 109 | virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; | 109 | virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0; |
| 110 | 110 | ||
| 111 | virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | 111 | virtual bool OnCPUWrite(PAddr addr, u64 size) = 0; |
| 112 | 112 | ||
| 113 | /// Sync memory between guest and host. | 113 | /// Sync memory between guest and host. |
| 114 | virtual void InvalidateGPUCache() = 0; | 114 | virtual void InvalidateGPUCache() = 0; |
| 115 | 115 | ||
| 116 | /// Unmap memory range | 116 | /// Unmap memory range |
| 117 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | 117 | virtual void UnmapMemory(DAddr addr, u64 size) = 0; |
| 118 | 118 | ||
| 119 | /// Remap GPU memory range. This means underneath backing memory changed | 119 | /// Remap GPU memory range. This means underneath backing memory changed |
| 120 | virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; | 120 | virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; |
| @@ -122,7 +122,7 @@ public: | |||
| 122 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 122 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 123 | /// and invalidated | 123 | /// and invalidated |
| 124 | virtual void FlushAndInvalidateRegion( | 124 | virtual void FlushAndInvalidateRegion( |
| 125 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 125 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 126 | 126 | ||
| 127 | /// Notify the host renderer to wait for previous primitive and compute operations. | 127 | /// Notify the host renderer to wait for previous primitive and compute operations. |
| 128 | virtual void WaitForIdle() = 0; | 128 | virtual void WaitForIdle() = 0; |
| @@ -157,13 +157,10 @@ public: | |||
| 157 | 157 | ||
| 158 | /// Attempt to use a faster method to display the framebuffer to screen | 158 | /// Attempt to use a faster method to display the framebuffer to screen |
| 159 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | 159 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 160 | VAddr framebuffer_addr, u32 pixel_stride) { | 160 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 161 | return false; | 161 | return false; |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | /// Increase/decrease the number of object in pages touching the specified region | ||
| 165 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | ||
| 166 | |||
| 167 | /// Initialize disk cached resources for the game being emulated | 164 | /// Initialize disk cached resources for the game being emulated |
| 168 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 165 | virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 169 | const DiskResourceLoadCallback& callback) {} | 166 | const DiskResourceLoadCallback& callback) {} |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 4f1d5b548..11b93fdc9 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -19,8 +19,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | |||
| 19 | return true; | 19 | return true; |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) | 22 | RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {} |
| 23 | : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {} | ||
| 24 | RasterizerNull::~RasterizerNull() = default; | 23 | RasterizerNull::~RasterizerNull() = default; |
| 25 | 24 | ||
| 26 | void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | 25 | void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} |
| @@ -45,16 +44,16 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr | |||
| 45 | u32 size) {} | 44 | u32 size) {} |
| 46 | void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} | 45 | void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} |
| 47 | void RasterizerNull::FlushAll() {} | 46 | void RasterizerNull::FlushAll() {} |
| 48 | void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 47 | void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 49 | bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { | 48 | bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) { |
| 50 | return false; | 49 | return false; |
| 51 | } | 50 | } |
| 52 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 51 | void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 53 | bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | 52 | bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) { |
| 54 | return false; | 53 | return false; |
| 55 | } | 54 | } |
| 56 | void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | 55 | void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {} |
| 57 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | 56 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) { |
| 58 | VideoCore::RasterizerDownloadArea new_area{ | 57 | VideoCore::RasterizerDownloadArea new_area{ |
| 59 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 58 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), |
| 60 | .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | 59 | .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), |
| @@ -63,7 +62,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 s | |||
| 63 | return new_area; | 62 | return new_area; |
| 64 | } | 63 | } |
| 65 | void RasterizerNull::InvalidateGPUCache() {} | 64 | void RasterizerNull::InvalidateGPUCache() {} |
| 66 | void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} | 65 | void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} |
| 67 | void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} | 66 | void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} |
| 68 | void RasterizerNull::SignalFence(std::function<void()>&& func) { | 67 | void RasterizerNull::SignalFence(std::function<void()>&& func) { |
| 69 | func(); | 68 | func(); |
| @@ -78,7 +77,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||
| 78 | } | 77 | } |
| 79 | void RasterizerNull::SignalReference() {} | 78 | void RasterizerNull::SignalReference() {} |
| 80 | void RasterizerNull::ReleaseFences(bool) {} | 79 | void RasterizerNull::ReleaseFences(bool) {} |
| 81 | void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 80 | void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} |
| 82 | void RasterizerNull::WaitForIdle() {} | 81 | void RasterizerNull::WaitForIdle() {} |
| 83 | void RasterizerNull::FragmentBarrier() {} | 82 | void RasterizerNull::FragmentBarrier() {} |
| 84 | void RasterizerNull::TiledCacheBarrier() {} | 83 | void RasterizerNull::TiledCacheBarrier() {} |
| @@ -95,7 +94,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac | |||
| 95 | void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 94 | void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 96 | std::span<const u8> memory) {} | 95 | std::span<const u8> memory) {} |
| 97 | bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 96 | bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 98 | VAddr framebuffer_addr, u32 pixel_stride) { | 97 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 99 | return true; | 98 | return true; |
| 100 | } | 99 | } |
| 101 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 100 | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 23001eeb8..a5789604f 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include "common/common_types.h" | 6 | #include "common/common_types.h" |
| 7 | #include "video_core/control/channel_state_cache.h" | 7 | #include "video_core/control/channel_state_cache.h" |
| 8 | #include "video_core/engines/maxwell_dma.h" | 8 | #include "video_core/engines/maxwell_dma.h" |
| 9 | #include "video_core/rasterizer_accelerated.h" | ||
| 10 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 11 | 10 | ||
| 12 | namespace Core { | 11 | namespace Core { |
| @@ -32,10 +31,10 @@ public: | |||
| 32 | } | 31 | } |
| 33 | }; | 32 | }; |
| 34 | 33 | ||
| 35 | class RasterizerNull final : public VideoCore::RasterizerAccelerated, | 34 | class RasterizerNull final : public VideoCore::RasterizerInterface, |
| 36 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 35 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 37 | public: | 36 | public: |
| 38 | explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); | 37 | explicit RasterizerNull(Tegra::GPU& gpu); |
| 39 | ~RasterizerNull() override; | 38 | ~RasterizerNull() override; |
| 40 | 39 | ||
| 41 | void Draw(bool is_indexed, u32 instance_count) override; | 40 | void Draw(bool is_indexed, u32 instance_count) override; |
| @@ -48,17 +47,17 @@ public: | |||
| 48 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 47 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 49 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 48 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 50 | void FlushAll() override; | 49 | void FlushAll() override; |
| 51 | void FlushRegion(VAddr addr, u64 size, | 50 | void FlushRegion(DAddr addr, u64 size, |
| 52 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 51 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 53 | bool MustFlushRegion(VAddr addr, u64 size, | 52 | bool MustFlushRegion(DAddr addr, u64 size, |
| 54 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 55 | void InvalidateRegion(VAddr addr, u64 size, | 54 | void InvalidateRegion(DAddr addr, u64 size, |
| 56 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 57 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 56 | void OnCacheInvalidation(DAddr addr, u64 size) override; |
| 58 | bool OnCPUWrite(VAddr addr, u64 size) override; | 57 | bool OnCPUWrite(DAddr addr, u64 size) override; |
| 59 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 58 | VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; |
| 60 | void InvalidateGPUCache() override; | 59 | void InvalidateGPUCache() override; |
| 61 | void UnmapMemory(VAddr addr, u64 size) override; | 60 | void UnmapMemory(DAddr addr, u64 size) override; |
| 62 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 61 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 63 | void SignalFence(std::function<void()>&& func) override; | 62 | void SignalFence(std::function<void()>&& func) override; |
| 64 | void SyncOperation(std::function<void()>&& func) override; | 63 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -66,7 +65,7 @@ public: | |||
| 66 | void SignalReference() override; | 65 | void SignalReference() override; |
| 67 | void ReleaseFences(bool force) override; | 66 | void ReleaseFences(bool force) override; |
| 68 | void FlushAndInvalidateRegion( | 67 | void FlushAndInvalidateRegion( |
| 69 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 68 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 70 | void WaitForIdle() override; | 69 | void WaitForIdle() override; |
| 71 | void FragmentBarrier() override; | 70 | void FragmentBarrier() override; |
| 72 | void TiledCacheBarrier() override; | 71 | void TiledCacheBarrier() override; |
| @@ -78,7 +77,7 @@ public: | |||
| 78 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 77 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 79 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 78 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 80 | std::span<const u8> memory) override; | 79 | std::span<const u8> memory) override; |
| 81 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 80 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 82 | u32 pixel_stride) override; | 81 | u32 pixel_stride) override; |
| 83 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 82 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 84 | const VideoCore::DiskResourceLoadCallback& callback) override; | 83 | const VideoCore::DiskResourceLoadCallback& callback) override; |
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index be92cc2f4..078feb925 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp | |||
| @@ -7,10 +7,9 @@ | |||
| 7 | 7 | ||
| 8 | namespace Null { | 8 | namespace Null { |
| 9 | 9 | ||
| 10 | RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | 10 | RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 11 | Tegra::GPU& gpu, | ||
| 12 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 11 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 13 | : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} | 12 | : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {} |
| 14 | 13 | ||
| 15 | RendererNull::~RendererNull() = default; | 14 | RendererNull::~RendererNull() = default; |
| 16 | 15 | ||
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 967ff5645..9531b43f6 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h | |||
| @@ -13,8 +13,7 @@ namespace Null { | |||
| 13 | 13 | ||
| 14 | class RendererNull final : public VideoCore::RendererBase { | 14 | class RendererNull final : public VideoCore::RendererBase { |
| 15 | public: | 15 | public: |
| 16 | explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | 16 | explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 17 | Tegra::GPU& gpu, | ||
| 18 | std::unique_ptr<Core::Frontend::GraphicsContext> context); | 17 | std::unique_ptr<Core::Frontend::GraphicsContext> context); |
| 19 | ~RendererNull() override; | 18 | ~RendererNull() override; |
| 20 | 19 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 517ac14dd..ade72e1f9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{ | |||
| 47 | } // Anonymous namespace | 47 | } // Anonymous namespace |
| 48 | 48 | ||
| 49 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | 49 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 50 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | 50 | : VideoCommon::BufferBase(null_params) {} |
| 51 | 51 | ||
| 52 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 52 | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) |
| 53 | VAddr cpu_addr_, u64 size_bytes_) | 53 | : VideoCommon::BufferBase(cpu_addr_, size_bytes_) { |
| 54 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 55 | buffer.Create(); | 54 | buffer.Create(); |
| 56 | if (runtime.device.HasDebuggingToolAttached()) { | 55 | if (runtime.device.HasDebuggingToolAttached()) { |
| 57 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | 56 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 2c18de166..e6ad030cb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache_base.h" | 11 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 12 | #include "video_core/buffer_cache/memory_tracker_base.h" | 12 | #include "video_core/buffer_cache/memory_tracker_base.h" |
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 16 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | 15 | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" |
| @@ -19,9 +18,9 @@ namespace OpenGL { | |||
| 19 | 18 | ||
| 20 | class BufferCacheRuntime; | 19 | class BufferCacheRuntime; |
| 21 | 20 | ||
| 22 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | 21 | class Buffer : public VideoCommon::BufferBase { |
| 23 | public: | 22 | public: |
| 24 | explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, | 23 | explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr, |
| 25 | u64 size_bytes); | 24 | u64 size_bytes); |
| 26 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | 25 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); |
| 27 | 26 | ||
| @@ -244,7 +243,7 @@ struct BufferCacheParams { | |||
| 244 | using Runtime = OpenGL::BufferCacheRuntime; | 243 | using Runtime = OpenGL::BufferCacheRuntime; |
| 245 | using Buffer = OpenGL::Buffer; | 244 | using Buffer = OpenGL::Buffer; |
| 246 | using Async_Buffer = OpenGL::StagingBufferMap; | 245 | using Async_Buffer = OpenGL::StagingBufferMap; |
| 247 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | 246 | using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; |
| 248 | 247 | ||
| 249 | static constexpr bool IS_OPENGL = true; | 248 | static constexpr bool IS_OPENGL = true; |
| 250 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | 249 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index fef7360ed..567292e1c 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -35,8 +35,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 35 | 35 | ||
| 36 | } // Anonymous namespace | 36 | } // Anonymous namespace |
| 37 | 37 | ||
| 38 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | 38 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 39 | : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { | 39 | : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} { |
| 40 | EnableCounters(); | 40 | EnableCounters(); |
| 41 | } | 41 | } |
| 42 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 0721e0b3d..04a1b39c9 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "video_core/query_cache.h" | 11 | #include "video_core/query_cache.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| 16 | class System; | 17 | class System; |
| @@ -28,7 +29,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| 28 | class QueryCache final | 29 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | 30 | : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 31 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | 32 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_); |
| 32 | ~QueryCache(); | 33 | ~QueryCache(); |
| 33 | 34 | ||
| 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); | 35 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7a5fad735..ca31e2fbd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy | |||
| 70 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 71 | 71 | ||
| 72 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 72 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 73 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 73 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 74 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 74 | const Device& device_, ScreenInfo& screen_info_, |
| 75 | StateTracker& state_tracker_) | 75 | ProgramManager& program_manager_, StateTracker& state_tracker_) |
| 76 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), | 76 | : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), |
| 77 | program_manager(program_manager_), state_tracker(state_tracker_), | 77 | program_manager(program_manager_), state_tracker(state_tracker_), |
| 78 | texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), | 78 | texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), |
| 79 | texture_cache(texture_cache_runtime, *this), | 79 | texture_cache(texture_cache_runtime, device_memory_), |
| 80 | buffer_cache_runtime(device, staging_buffer_pool), | 80 | buffer_cache_runtime(device, staging_buffer_pool), |
| 81 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 81 | buffer_cache(device_memory_, buffer_cache_runtime), |
| 82 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | 82 | shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache, |
| 83 | state_tracker, gpu.ShaderNotify()), | 83 | program_manager, state_tracker, gpu.ShaderNotify()), |
| 84 | query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), | 84 | query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache), |
| 85 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 85 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 86 | blit_image(program_manager_) {} | 86 | blit_image(program_manager_) {} |
| 87 | 87 | ||
| @@ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { | |||
| 475 | 475 | ||
| 476 | void RasterizerOpenGL::FlushAll() {} | 476 | void RasterizerOpenGL::FlushAll() {} |
| 477 | 477 | ||
| 478 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 478 | void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 479 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 479 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 480 | if (addr == 0 || size == 0) { | 480 | if (addr == 0 || size == 0) { |
| 481 | return; | 481 | return; |
| @@ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
| 493 | } | 493 | } |
| 494 | } | 494 | } |
| 495 | 495 | ||
| 496 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 496 | bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 497 | if ((True(which & VideoCommon::CacheType::BufferCache))) { | 497 | if ((True(which & VideoCommon::CacheType::BufferCache))) { |
| 498 | std::scoped_lock lock{buffer_cache.mutex}; | 498 | std::scoped_lock lock{buffer_cache.mutex}; |
| 499 | if (buffer_cache.IsRegionGpuModified(addr, size)) { | 499 | if (buffer_cache.IsRegionGpuModified(addr, size)) { |
| @@ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
| 510 | return false; | 510 | return false; |
| 511 | } | 511 | } |
| 512 | 512 | ||
| 513 | VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { | 513 | VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) { |
| 514 | { | 514 | { |
| 515 | std::scoped_lock lock{texture_cache.mutex}; | 515 | std::scoped_lock lock{texture_cache.mutex}; |
| 516 | auto area = texture_cache.GetFlushArea(addr, size); | 516 | auto area = texture_cache.GetFlushArea(addr, size); |
| @@ -533,7 +533,7 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 | |||
| 533 | return new_area; | 533 | return new_area; |
| 534 | } | 534 | } |
| 535 | 535 | ||
| 536 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 536 | void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 537 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 537 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 538 | if (addr == 0 || size == 0) { | 538 | if (addr == 0 || size == 0) { |
| 539 | return; | 539 | return; |
| @@ -554,8 +554,9 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 554 | } | 554 | } |
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | 557 | bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { |
| 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 558 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 559 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 559 | if (addr == 0 || size == 0) { | 560 | if (addr == 0 || size == 0) { |
| 560 | return false; | 561 | return false; |
| 561 | } | 562 | } |
| @@ -576,8 +577,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 576 | return false; | 577 | return false; |
| 577 | } | 578 | } |
| 578 | 579 | ||
| 579 | void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | 580 | void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) { |
| 580 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 581 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 582 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 581 | if (addr == 0 || size == 0) { | 583 | if (addr == 0 || size == 0) { |
| 582 | return; | 584 | return; |
| 583 | } | 585 | } |
| @@ -596,7 +598,7 @@ void RasterizerOpenGL::InvalidateGPUCache() { | |||
| 596 | gpu.InvalidateGPUCache(); | 598 | gpu.InvalidateGPUCache(); |
| 597 | } | 599 | } |
| 598 | 600 | ||
| 599 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 601 | void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) { |
| 600 | { | 602 | { |
| 601 | std::scoped_lock lock{texture_cache.mutex}; | 603 | std::scoped_lock lock{texture_cache.mutex}; |
| 602 | texture_cache.UnmapMemory(addr, size); | 604 | texture_cache.UnmapMemory(addr, size); |
| @@ -635,7 +637,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) { | |||
| 635 | fence_manager.WaitPendingFences(force); | 637 | fence_manager.WaitPendingFences(force); |
| 636 | } | 638 | } |
| 637 | 639 | ||
| 638 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | 640 | void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size, |
| 639 | VideoCommon::CacheType which) { | 641 | VideoCommon::CacheType which) { |
| 640 | if (Settings::IsGPULevelExtreme()) { | 642 | if (Settings::IsGPULevelExtreme()) { |
| 641 | FlushRegion(addr, size, which); | 643 | FlushRegion(addr, size, which); |
| @@ -739,7 +741,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| 739 | } | 741 | } |
| 740 | 742 | ||
| 741 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 743 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 742 | VAddr framebuffer_addr, u32 pixel_stride) { | 744 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 743 | if (framebuffer_addr == 0) { | 745 | if (framebuffer_addr == 0) { |
| 744 | return false; | 746 | return false; |
| 745 | } | 747 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ce3460938..f197774ed 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "video_core/control/channel_state_cache.h" | 15 | #include "video_core/control/channel_state_cache.h" |
| 16 | #include "video_core/engines/maxwell_dma.h" | 16 | #include "video_core/engines/maxwell_dma.h" |
| 17 | #include "video_core/rasterizer_accelerated.h" | ||
| 18 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 19 | #include "video_core/renderer_opengl/blit_image.h" | 18 | #include "video_core/renderer_opengl/blit_image.h" |
| 20 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 19 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| @@ -72,11 +71,11 @@ private: | |||
| 72 | TextureCache& texture_cache; | 71 | TextureCache& texture_cache; |
| 73 | }; | 72 | }; |
| 74 | 73 | ||
| 75 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, | 74 | class RasterizerOpenGL : public VideoCore::RasterizerInterface, |
| 76 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 75 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 77 | public: | 76 | public: |
| 78 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 77 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 79 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 78 | Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, |
| 80 | ScreenInfo& screen_info_, ProgramManager& program_manager_, | 79 | ScreenInfo& screen_info_, ProgramManager& program_manager_, |
| 81 | StateTracker& state_tracker_); | 80 | StateTracker& state_tracker_); |
| 82 | ~RasterizerOpenGL() override; | 81 | ~RasterizerOpenGL() override; |
| @@ -92,17 +91,17 @@ public: | |||
| 92 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 91 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 93 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 92 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 94 | void FlushAll() override; | 93 | void FlushAll() override; |
| 95 | void FlushRegion(VAddr addr, u64 size, | 94 | void FlushRegion(DAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 95 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 97 | bool MustFlushRegion(VAddr addr, u64 size, | 96 | bool MustFlushRegion(DAddr addr, u64 size, |
| 98 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 99 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 98 | VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override; |
| 100 | void InvalidateRegion(VAddr addr, u64 size, | 99 | void InvalidateRegion(DAddr addr, u64 size, |
| 101 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 102 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 101 | void OnCacheInvalidation(PAddr addr, u64 size) override; |
| 103 | bool OnCPUWrite(VAddr addr, u64 size) override; | 102 | bool OnCPUWrite(PAddr addr, u64 size) override; |
| 104 | void InvalidateGPUCache() override; | 103 | void InvalidateGPUCache() override; |
| 105 | void UnmapMemory(VAddr addr, u64 size) override; | 104 | void UnmapMemory(DAddr addr, u64 size) override; |
| 106 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 107 | void SignalFence(std::function<void()>&& func) override; | 106 | void SignalFence(std::function<void()>&& func) override; |
| 108 | void SyncOperation(std::function<void()>&& func) override; | 107 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -110,7 +109,7 @@ public: | |||
| 110 | void SignalReference() override; | 109 | void SignalReference() override; |
| 111 | void ReleaseFences(bool force = true) override; | 110 | void ReleaseFences(bool force = true) override; |
| 112 | void FlushAndInvalidateRegion( | 111 | void FlushAndInvalidateRegion( |
| 113 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 112 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 114 | void WaitForIdle() override; | 113 | void WaitForIdle() override; |
| 115 | void FragmentBarrier() override; | 114 | void FragmentBarrier() override; |
| 116 | void TiledCacheBarrier() override; | 115 | void TiledCacheBarrier() override; |
| @@ -123,7 +122,7 @@ public: | |||
| 123 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 122 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 124 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 123 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 125 | std::span<const u8> memory) override; | 124 | std::span<const u8> memory) override; |
| 126 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 125 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 127 | u32 pixel_stride) override; | 126 | u32 pixel_stride) override; |
| 128 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 127 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 129 | const VideoCore::DiskResourceLoadCallback& callback) override; | 128 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| @@ -235,6 +234,7 @@ private: | |||
| 235 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | 234 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); |
| 236 | 235 | ||
| 237 | Tegra::GPU& gpu; | 236 | Tegra::GPU& gpu; |
| 237 | Tegra::MaxwellDeviceMemoryManager& device_memory; | ||
| 238 | 238 | ||
| 239 | const Device& device; | 239 | const Device& device; |
| 240 | ScreenInfo& screen_info; | 240 | ScreenInfo& screen_info; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 30df41b7d..50462cdde 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs | |||
| 168 | } | 168 | } |
| 169 | } // Anonymous namespace | 169 | } // Anonymous namespace |
| 170 | 170 | ||
| 171 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 171 | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 172 | const Device& device_, TextureCache& texture_cache_, | 172 | Core::Frontend::EmuWindow& emu_window_, const Device& device_, |
| 173 | BufferCache& buffer_cache_, ProgramManager& program_manager_, | 173 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 174 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) | 174 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 175 | : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, | 175 | VideoCore::ShaderNotify& shader_notify_) |
| 176 | : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_}, | ||
| 176 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, | 177 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, |
| 177 | state_tracker{state_tracker_}, shader_notify{shader_notify_}, | 178 | state_tracker{state_tracker_}, shader_notify{shader_notify_}, |
| 178 | use_asynchronous_shaders{device.UseAsynchronousShaders()}, | 179 | use_asynchronous_shaders{device.UseAsynchronousShaders()}, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6b9732fca..5ac413529 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | 17 | ||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | class MemoryManager; | 19 | class MemoryManager; |
| 20 | } | 20 | } // namespace Tegra |
| 21 | 21 | ||
| 22 | namespace OpenGL { | 22 | namespace OpenGL { |
| 23 | 23 | ||
| @@ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | |||
| 28 | 28 | ||
| 29 | class ShaderCache : public VideoCommon::ShaderCache { | 29 | class ShaderCache : public VideoCommon::ShaderCache { |
| 30 | public: | 30 | public: |
| 31 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 31 | explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 32 | const Device& device_, TextureCache& texture_cache_, | 32 | Core::Frontend::EmuWindow& emu_window_, const Device& device_, |
| 33 | BufferCache& buffer_cache_, ProgramManager& program_manager_, | 33 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 34 | StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); | 34 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 35 | VideoCore::ShaderNotify& shader_notify_); | ||
| 35 | ~ShaderCache(); | 36 | ~ShaderCache(); |
| 36 | 37 | ||
| 37 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 38 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2933718b6..821a045ad 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -144,12 +144,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 144 | 144 | ||
| 145 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 145 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| 146 | Core::Frontend::EmuWindow& emu_window_, | 146 | Core::Frontend::EmuWindow& emu_window_, |
| 147 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 147 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 148 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 148 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 149 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | 149 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |
| 150 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, | 150 | emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, |
| 151 | state_tracker{}, program_manager{device}, | 151 | state_tracker{}, program_manager{device}, |
| 152 | rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | 152 | rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, |
| 153 | state_tracker) { | ||
| 153 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | 154 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 154 | glEnable(GL_DEBUG_OUTPUT); | 155 | glEnable(GL_DEBUG_OUTPUT); |
| 155 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | 156 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); |
| @@ -242,7 +243,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 242 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; | 243 | const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |
| 243 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( | 244 | const u64 size_in_bytes{Tegra::Texture::CalculateSize( |
| 244 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | 245 | true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; |
| 245 | const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | 246 | const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; |
| 246 | const std::span<const u8> input_data(host_ptr, size_in_bytes); | 247 | const std::span<const u8> input_data(host_ptr, size_in_bytes); |
| 247 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | 248 | Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, |
| 248 | framebuffer.width, framebuffer.height, 1, block_height_log2, | 249 | framebuffer.width, framebuffer.height, 1, block_height_log2, |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b70607635..18699610a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase { | |||
| 61 | public: | 61 | public: |
| 62 | explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 62 | explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| 63 | Core::Frontend::EmuWindow& emu_window_, | 63 | Core::Frontend::EmuWindow& emu_window_, |
| 64 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 64 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 65 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 65 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 66 | ~RendererOpenGL() override; | 66 | ~RendererOpenGL() override; |
| 67 | 67 | ||
| @@ -101,7 +101,7 @@ private: | |||
| 101 | 101 | ||
| 102 | Core::TelemetrySession& telemetry_session; | 102 | Core::TelemetrySession& telemetry_session; |
| 103 | Core::Frontend::EmuWindow& emu_window; | 103 | Core::Frontend::EmuWindow& emu_window; |
| 104 | Core::Memory::Memory& cpu_memory; | 104 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 105 | Tegra::GPU& gpu; | 105 | Tegra::GPU& gpu; |
| 106 | 106 | ||
| 107 | Device device; | 107 | Device device; |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 100b70918..0e1815076 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl | |||
| 82 | 82 | ||
| 83 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | 83 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, |
| 84 | Core::Frontend::EmuWindow& emu_window, | 84 | Core::Frontend::EmuWindow& emu_window, |
| 85 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 85 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 86 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | 86 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |
| 87 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | 87 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), |
| 88 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | 88 | device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())), |
| 89 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 89 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 90 | Settings::values.renderer_debug.GetValue())), | 90 | Settings::values.renderer_debug.GetValue())), |
| 91 | debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) | 91 | debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) |
| @@ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 97 | render_window.GetFramebufferLayout().height), | 97 | render_window.GetFramebufferLayout().height), |
| 98 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, | 98 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, |
| 99 | surface), | 99 | surface), |
| 100 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, | 100 | blit_screen(device_memory, render_window, device, memory_allocator, swapchain, present_manager, |
| 101 | scheduler, screen_info), | 101 | scheduler, screen_info), |
| 102 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | 102 | rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, |
| 103 | state_tracker, scheduler) { | 103 | state_tracker, scheduler) { |
| 104 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | 104 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { |
| 105 | turbo_mode.emplace(instance, dld); | 105 | turbo_mode.emplace(instance, dld); |
| @@ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 128 | screen_info.width = framebuffer->width; | 128 | screen_info.width = framebuffer->width; |
| 129 | screen_info.height = framebuffer->height; | 129 | screen_info.height = framebuffer->height; |
| 130 | 130 | ||
| 131 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 131 | const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 132 | const bool use_accelerated = | 132 | const bool use_accelerated = |
| 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 133 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 134 | RenderScreenshot(*framebuffer, use_accelerated); | 134 | RenderScreenshot(*framebuffer, use_accelerated); |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 14e257cf7..e5ce4692d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | 20 | #include "video_core/vulkan_common/vulkan_device.h" |
| 21 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 21 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 23 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 23 | 24 | ||
| 24 | namespace Core { | 25 | namespace Core { |
| 25 | class TelemetrySession; | 26 | class TelemetrySession; |
| @@ -42,7 +43,7 @@ class RendererVulkan final : public VideoCore::RendererBase { | |||
| 42 | public: | 43 | public: |
| 43 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | 44 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |
| 44 | Core::Frontend::EmuWindow& emu_window, | 45 | Core::Frontend::EmuWindow& emu_window, |
| 45 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 46 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, |
| 46 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 47 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 47 | ~RendererVulkan() override; | 48 | ~RendererVulkan() override; |
| 48 | 49 | ||
| @@ -62,7 +63,7 @@ private: | |||
| 62 | void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); | 63 | void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); |
| 63 | 64 | ||
| 64 | Core::TelemetrySession& telemetry_session; | 65 | Core::TelemetrySession& telemetry_session; |
| 65 | Core::Memory::Memory& cpu_memory; | 66 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 66 | Tegra::GPU& gpu; | 67 | Tegra::GPU& gpu; |
| 67 | 68 | ||
| 68 | std::shared_ptr<Common::DynamicLibrary> library; | 69 | std::shared_ptr<Common::DynamicLibrary> library; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 60432f5ad..610f27c84 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 15 | #include "core/core.h" | 15 | #include "core/core.h" |
| 16 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| 17 | #include "core/memory.h" | ||
| 18 | #include "video_core/gpu.h" | 17 | #include "video_core/gpu.h" |
| 18 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 19 | #include "video_core/host_shaders/fxaa_frag_spv.h" | 19 | #include "video_core/host_shaders/fxaa_frag_spv.h" |
| 20 | #include "video_core/host_shaders/fxaa_vert_spv.h" | 20 | #include "video_core/host_shaders/fxaa_vert_spv.h" |
| 21 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" | 21 | #include "video_core/host_shaders/present_bicubic_frag_spv.h" |
| @@ -121,11 +121,12 @@ struct BlitScreen::BufferData { | |||
| 121 | // Unaligned image data goes here | 121 | // Unaligned image data goes here |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
| 124 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | 124 | BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 125 | const Device& device_, MemoryAllocator& memory_allocator_, | 125 | Core::Frontend::EmuWindow& render_window_, const Device& device_, |
| 126 | Swapchain& swapchain_, PresentManager& present_manager_, | 126 | MemoryAllocator& memory_allocator_, Swapchain& swapchain_, |
| 127 | Scheduler& scheduler_, const ScreenInfo& screen_info_) | 127 | PresentManager& present_manager_, Scheduler& scheduler_, |
| 128 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | 128 | const ScreenInfo& screen_info_) |
| 129 | : device_memory{device_memory_}, render_window{render_window_}, device{device_}, | ||
| 129 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, | 130 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, |
| 130 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 131 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { |
| 131 | resource_ticks.resize(image_count); | 132 | resource_ticks.resize(image_count); |
| @@ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 219 | if (!use_accelerated) { | 220 | if (!use_accelerated) { |
| 220 | const u64 image_offset = GetRawImageOffset(framebuffer); | 221 | const u64 image_offset = GetRawImageOffset(framebuffer); |
| 221 | 222 | ||
| 222 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 223 | const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 223 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 224 | const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); |
| 224 | 225 | ||
| 225 | // TODO(Rodrigo): Read this from HLE | 226 | // TODO(Rodrigo): Read this from HLE |
| 226 | constexpr u32 block_height_log2 = 4; | 227 | constexpr u32 block_height_log2 = 4; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 78b32416d..cb941a956 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -8,15 +8,12 @@ | |||
| 8 | #include "core/frontend/framebuffer_layout.h" | 8 | #include "core/frontend/framebuffer_layout.h" |
| 9 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 9 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
| 13 | class System; | 14 | class System; |
| 14 | } | 15 | } |
| 15 | 16 | ||
| 16 | namespace Core::Memory { | ||
| 17 | class Memory; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace Core::Frontend { | 17 | namespace Core::Frontend { |
| 21 | class EmuWindow; | 18 | class EmuWindow; |
| 22 | } | 19 | } |
| @@ -56,7 +53,7 @@ struct ScreenInfo { | |||
| 56 | 53 | ||
| 57 | class BlitScreen { | 54 | class BlitScreen { |
| 58 | public: | 55 | public: |
| 59 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, | 56 | explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, Core::Frontend::EmuWindow& render_window, |
| 60 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | 57 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, |
| 61 | PresentManager& present_manager, Scheduler& scheduler, | 58 | PresentManager& present_manager, Scheduler& scheduler, |
| 62 | const ScreenInfo& screen_info); | 59 | const ScreenInfo& screen_info); |
| @@ -109,7 +106,7 @@ private: | |||
| 109 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | 106 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |
| 110 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; | 107 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; |
| 111 | 108 | ||
| 112 | Core::Memory::Memory& cpu_memory; | 109 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 113 | Core::Frontend::EmuWindow& render_window; | 110 | Core::Frontend::EmuWindow& render_window; |
| 114 | const Device& device; | 111 | const Device& device; |
| 115 | MemoryAllocator& memory_allocator; | 112 | MemoryAllocator& memory_allocator; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 3c61799fa..31001d142 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 79 | } // Anonymous namespace | 79 | } // Anonymous namespace |
| 80 | 80 | ||
| 81 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) | 81 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) |
| 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { | 82 | : VideoCommon::BufferBase(null_params), tracker{4096} { |
| 83 | if (runtime.device.HasNullDescriptor()) { | 83 | if (runtime.device.HasNullDescriptor()) { |
| 84 | return; | 84 | return; |
| 85 | } | 85 | } |
| @@ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p | |||
| 88 | is_null = true; | 88 | is_null = true; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 91 | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) |
| 92 | VAddr cpu_addr_, u64 size_bytes_) | 92 | : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device}, |
| 93 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | 93 | buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} { |
| 94 | device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, | ||
| 95 | tracker{SizeBytes()} { | ||
| 96 | if (runtime.device.HasDebuggingToolAttached()) { | 94 | if (runtime.device.HasDebuggingToolAttached()) { |
| 97 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 95 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 98 | } | 96 | } |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index dc300d7cb..e273f4988 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -23,11 +23,10 @@ struct HostVertexBinding; | |||
| 23 | 23 | ||
| 24 | class BufferCacheRuntime; | 24 | class BufferCacheRuntime; |
| 25 | 25 | ||
| 26 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | 26 | class Buffer : public VideoCommon::BufferBase { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | 28 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); |
| 29 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 29 | explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_); |
| 30 | VAddr cpu_addr_, u64 size_bytes_); | ||
| 31 | 30 | ||
| 32 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | 31 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); |
| 33 | 32 | ||
| @@ -173,7 +172,7 @@ struct BufferCacheParams { | |||
| 173 | using Runtime = Vulkan::BufferCacheRuntime; | 172 | using Runtime = Vulkan::BufferCacheRuntime; |
| 174 | using Buffer = Vulkan::Buffer; | 173 | using Buffer = Vulkan::Buffer; |
| 175 | using Async_Buffer = Vulkan::StagingBufferRef; | 174 | using Async_Buffer = Vulkan::StagingBufferRef; |
| 176 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | 175 | using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; |
| 177 | 176 | ||
| 178 | static constexpr bool IS_OPENGL = false; | 177 | static constexpr bool IS_OPENGL = false; |
| 179 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | 178 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1841198d..bec20c21a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 30 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 31 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 31 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 33 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 35 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 34 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 36 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 35 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| @@ -299,12 +298,12 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
| 299 | return std::memcmp(&rhs, this, Size()) == 0; | 298 | return std::memcmp(&rhs, this, Size()) == 0; |
| 300 | } | 299 | } |
| 301 | 300 | ||
| 302 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, | 301 | PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, |
| 303 | Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | 302 | Scheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 304 | GuestDescriptorQueue& guest_descriptor_queue_, | 303 | GuestDescriptorQueue& guest_descriptor_queue_, |
| 305 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | 304 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 306 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) | 305 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |
| 307 | : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, | 306 | : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_}, |
| 308 | descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, | 307 | descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, |
| 309 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, | 308 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, |
| 310 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, | 309 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e323ea0fd..354fdc8ed 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 26 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 28 | #include "video_core/shader_cache.h" | 28 | #include "video_core/shader_cache.h" |
| 29 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 29 | 30 | ||
| 30 | namespace Core { | 31 | namespace Core { |
| 31 | class System; | 32 | class System; |
| @@ -79,7 +80,6 @@ class ComputePipeline; | |||
| 79 | class DescriptorPool; | 80 | class DescriptorPool; |
| 80 | class Device; | 81 | class Device; |
| 81 | class PipelineStatistics; | 82 | class PipelineStatistics; |
| 82 | class RasterizerVulkan; | ||
| 83 | class RenderPassCache; | 83 | class RenderPassCache; |
| 84 | class Scheduler; | 84 | class Scheduler; |
| 85 | 85 | ||
| @@ -99,7 +99,7 @@ struct ShaderPools { | |||
| 99 | 99 | ||
| 100 | class PipelineCache : public VideoCommon::ShaderCache { | 100 | class PipelineCache : public VideoCommon::ShaderCache { |
| 101 | public: | 101 | public: |
| 102 | explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, | 102 | explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device, Scheduler& scheduler, |
| 103 | DescriptorPool& descriptor_pool, | 103 | DescriptorPool& descriptor_pool, |
| 104 | GuestDescriptorQueue& guest_descriptor_queue, | 104 | GuestDescriptorQueue& guest_descriptor_queue, |
| 105 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | 105 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ad4caf688..d59fe698c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -14,7 +14,9 @@ | |||
| 14 | #include "common/bit_util.h" | 14 | #include "common/bit_util.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| 17 | #include "video_core/rasterizer_interface.h" | ||
| 17 | #include "video_core/engines/draw_manager.h" | 18 | #include "video_core/engines/draw_manager.h" |
| 19 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 18 | #include "video_core/query_cache/query_cache.h" | 20 | #include "video_core/query_cache/query_cache.h" |
| 19 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 20 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -102,7 +104,7 @@ private: | |||
| 102 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | 104 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; |
| 103 | 105 | ||
| 104 | struct HostSyncValues { | 106 | struct HostSyncValues { |
| 105 | VAddr address; | 107 | DAddr address; |
| 106 | size_t size; | 108 | size_t size; |
| 107 | size_t offset; | 109 | size_t offset; |
| 108 | 110 | ||
| @@ -317,7 +319,7 @@ public: | |||
| 317 | pending_sync.clear(); | 319 | pending_sync.clear(); |
| 318 | } | 320 | } |
| 319 | 321 | ||
| 320 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 322 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 321 | [[maybe_unused]] std::optional<u32> subreport) override { | 323 | [[maybe_unused]] std::optional<u32> subreport) override { |
| 322 | PauseCounter(); | 324 | PauseCounter(); |
| 323 | auto index = BuildQuery(); | 325 | auto index = BuildQuery(); |
| @@ -738,7 +740,7 @@ public: | |||
| 738 | pending_sync.clear(); | 740 | pending_sync.clear(); |
| 739 | } | 741 | } |
| 740 | 742 | ||
| 741 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 743 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 742 | std::optional<u32> subreport_) override { | 744 | std::optional<u32> subreport_) override { |
| 743 | auto index = BuildQuery(); | 745 | auto index = BuildQuery(); |
| 744 | auto* new_query = GetQuery(index); | 746 | auto* new_query = GetQuery(index); |
| @@ -769,9 +771,9 @@ public: | |||
| 769 | return index; | 771 | return index; |
| 770 | } | 772 | } |
| 771 | 773 | ||
| 772 | std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { | 774 | std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) { |
| 773 | if (last_queries[stream] != 0) { | 775 | if (last_queries[stream] != 0) { |
| 774 | std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); | 776 | std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); |
| 775 | return result; | 777 | return result; |
| 776 | } | 778 | } |
| 777 | return std::nullopt; | 779 | return std::nullopt; |
| @@ -974,7 +976,7 @@ private: | |||
| 974 | size_t buffers_count{}; | 976 | size_t buffers_count{}; |
| 975 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | 977 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; |
| 976 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | 978 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; |
| 977 | std::array<VAddr, NUM_STREAMS> last_queries; | 979 | std::array<DAddr, NUM_STREAMS> last_queries; |
| 978 | std::array<size_t, NUM_STREAMS> last_queries_stride; | 980 | std::array<size_t, NUM_STREAMS> last_queries_stride; |
| 979 | Maxwell3D::Regs::PrimitiveTopology out_topology; | 981 | Maxwell3D::Regs::PrimitiveTopology out_topology; |
| 980 | u64 streams_mask; | 982 | u64 streams_mask; |
| @@ -987,7 +989,7 @@ public: | |||
| 987 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} | 989 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} |
| 988 | 990 | ||
| 989 | // Parameterized constructor | 991 | // Parameterized constructor |
| 990 | PrimitivesQueryBase(bool has_timestamp, VAddr address) | 992 | PrimitivesQueryBase(bool has_timestamp, DAddr address) |
| 991 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { | 993 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { |
| 992 | if (has_timestamp) { | 994 | if (has_timestamp) { |
| 993 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; | 995 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; |
| @@ -995,7 +997,7 @@ public: | |||
| 995 | } | 997 | } |
| 996 | 998 | ||
| 997 | u64 stride{}; | 999 | u64 stride{}; |
| 998 | VAddr dependant_address{}; | 1000 | DAddr dependant_address{}; |
| 999 | Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; | 1001 | Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; |
| 1000 | size_t dependant_index{}; | 1002 | size_t dependant_index{}; |
| 1001 | bool dependant_manage{}; | 1003 | bool dependant_manage{}; |
| @@ -1005,15 +1007,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive | |||
| 1005 | public: | 1007 | public: |
| 1006 | explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, | 1008 | explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, |
| 1007 | TFBCounterStreamer& tfb_streamer_, | 1009 | TFBCounterStreamer& tfb_streamer_, |
| 1008 | Core::Memory::Memory& cpu_memory_) | 1010 | Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 1009 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, | 1011 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, |
| 1010 | tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { | 1012 | tfb_streamer{tfb_streamer_}, device_memory{device_memory_} { |
| 1011 | MakeDependent(&tfb_streamer); | 1013 | MakeDependent(&tfb_streamer); |
| 1012 | } | 1014 | } |
| 1013 | 1015 | ||
| 1014 | ~PrimitivesSucceededStreamer() = default; | 1016 | ~PrimitivesSucceededStreamer() = default; |
| 1015 | 1017 | ||
| 1016 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 1018 | size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, |
| 1017 | std::optional<u32> subreport_) override { | 1019 | std::optional<u32> subreport_) override { |
| 1018 | auto index = BuildQuery(); | 1020 | auto index = BuildQuery(); |
| 1019 | auto* new_query = GetQuery(index); | 1021 | auto* new_query = GetQuery(index); |
| @@ -1063,6 +1065,8 @@ public: | |||
| 1063 | } | 1065 | } |
| 1064 | }); | 1066 | }); |
| 1065 | } | 1067 | } |
| 1068 | auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address); | ||
| 1069 | ASSERT(ptr != nullptr); | ||
| 1066 | 1070 | ||
| 1067 | new_query->dependant_manage = must_manage_dependance; | 1071 | new_query->dependant_manage = must_manage_dependance; |
| 1068 | pending_flush_queries.push_back(index); | 1072 | pending_flush_queries.push_back(index); |
| @@ -1100,7 +1104,7 @@ public: | |||
| 1100 | num_vertices = dependant_query->value / query->stride; | 1104 | num_vertices = dependant_query->value / query->stride; |
| 1101 | tfb_streamer.Free(query->dependant_index); | 1105 | tfb_streamer.Free(query->dependant_index); |
| 1102 | } else { | 1106 | } else { |
| 1103 | u8* pointer = cpu_memory.GetPointer(query->dependant_address); | 1107 | u8* pointer = device_memory.GetPointer<u8>(query->dependant_address); |
| 1104 | u32 result; | 1108 | u32 result; |
| 1105 | std::memcpy(&result, pointer, sizeof(u32)); | 1109 | std::memcpy(&result, pointer, sizeof(u32)); |
| 1106 | num_vertices = static_cast<u64>(result) / query->stride; | 1110 | num_vertices = static_cast<u64>(result) / query->stride; |
| @@ -1137,7 +1141,7 @@ public: | |||
| 1137 | private: | 1141 | private: |
| 1138 | QueryCacheRuntime& runtime; | 1142 | QueryCacheRuntime& runtime; |
| 1139 | TFBCounterStreamer& tfb_streamer; | 1143 | TFBCounterStreamer& tfb_streamer; |
| 1140 | Core::Memory::Memory& cpu_memory; | 1144 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 1141 | 1145 | ||
| 1142 | // syncing queue | 1146 | // syncing queue |
| 1143 | std::vector<size_t> pending_sync; | 1147 | std::vector<size_t> pending_sync; |
| @@ -1152,12 +1156,12 @@ private: | |||
| 1152 | 1156 | ||
| 1153 | struct QueryCacheRuntimeImpl { | 1157 | struct QueryCacheRuntimeImpl { |
| 1154 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | 1158 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, |
| 1155 | Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, | 1159 | Tegra::MaxwellDeviceMemoryManager& device_memory_, Vulkan::BufferCache& buffer_cache_, |
| 1156 | const Device& device_, const MemoryAllocator& memory_allocator_, | 1160 | const Device& device_, const MemoryAllocator& memory_allocator_, |
| 1157 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | 1161 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 1158 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 1162 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 1159 | DescriptorPool& descriptor_pool) | 1163 | DescriptorPool& descriptor_pool) |
| 1160 | : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, | 1164 | : rasterizer{rasterizer_}, device_memory{device_memory_}, |
| 1161 | buffer_cache{buffer_cache_}, device{device_}, | 1165 | buffer_cache{buffer_cache_}, device{device_}, |
| 1162 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | 1166 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, |
| 1163 | guest_streamer(0, runtime), | 1167 | guest_streamer(0, runtime), |
| @@ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1168 | scheduler, memory_allocator, staging_pool), | 1172 | scheduler, memory_allocator, staging_pool), |
| 1169 | primitives_succeeded_streamer( | 1173 | primitives_succeeded_streamer( |
| 1170 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, | 1174 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, |
| 1171 | cpu_memory_), | 1175 | device_memory_), |
| 1172 | primitives_needed_minus_succeeded_streamer( | 1176 | primitives_needed_minus_succeeded_streamer( |
| 1173 | static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), | 1177 | static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), |
| 1174 | hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { | 1178 | hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { |
| @@ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1195 | } | 1199 | } |
| 1196 | 1200 | ||
| 1197 | VideoCore::RasterizerInterface* rasterizer; | 1201 | VideoCore::RasterizerInterface* rasterizer; |
| 1198 | Core::Memory::Memory& cpu_memory; | 1202 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 1199 | Vulkan::BufferCache& buffer_cache; | 1203 | Vulkan::BufferCache& buffer_cache; |
| 1200 | 1204 | ||
| 1201 | const Device& device; | 1205 | const Device& device; |
| @@ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1210 | PrimitivesSucceededStreamer primitives_succeeded_streamer; | 1214 | PrimitivesSucceededStreamer primitives_succeeded_streamer; |
| 1211 | VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; | 1215 | VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; |
| 1212 | 1216 | ||
| 1213 | std::vector<std::pair<VAddr, VAddr>> little_cache; | 1217 | std::vector<std::pair<DAddr, DAddr>> little_cache; |
| 1214 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | 1218 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| 1215 | std::vector<size_t> redirect_cache; | 1219 | std::vector<size_t> redirect_cache; |
| 1216 | std::vector<std::vector<VkBufferCopy>> copies_setup; | 1220 | std::vector<std::vector<VkBufferCopy>> copies_setup; |
| @@ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl { | |||
| 1229 | }; | 1233 | }; |
| 1230 | 1234 | ||
| 1231 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | 1235 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 1232 | Core::Memory::Memory& cpu_memory_, | 1236 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 1233 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | 1237 | Vulkan::BufferCache& buffer_cache_, const Device& device_, |
| 1234 | const MemoryAllocator& memory_allocator_, | 1238 | const MemoryAllocator& memory_allocator_, |
| 1235 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | 1239 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 1236 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | 1240 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, |
| 1237 | DescriptorPool& descriptor_pool) { | 1241 | DescriptorPool& descriptor_pool) { |
| 1238 | impl = std::make_unique<QueryCacheRuntimeImpl>( | 1242 | impl = std::make_unique<QueryCacheRuntimeImpl>( |
| 1239 | *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | 1243 | *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, |
| 1240 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | 1244 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); |
| 1241 | } | 1245 | } |
| 1242 | 1246 | ||
| @@ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo | |||
| 1309 | ResumeHostConditionalRendering(); | 1313 | ResumeHostConditionalRendering(); |
| 1310 | } | 1314 | } |
| 1311 | 1315 | ||
| 1312 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { | 1316 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { |
| 1313 | VkBuffer to_resolve; | 1317 | VkBuffer to_resolve; |
| 1314 | u32 to_resolve_offset; | 1318 | u32 to_resolve_offset; |
| 1315 | { | 1319 | { |
| @@ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1350 | return false; | 1354 | return false; |
| 1351 | } | 1355 | } |
| 1352 | 1356 | ||
| 1353 | const auto check_in_bc = [&](VAddr address) { | 1357 | const auto check_in_bc = [&](DAddr address) { |
| 1354 | return impl->buffer_cache.IsRegionGpuModified(address, 8); | 1358 | return impl->buffer_cache.IsRegionGpuModified(address, 8); |
| 1355 | }; | 1359 | }; |
| 1356 | const auto check_value = [&](VAddr address) { | 1360 | const auto check_value = [&](DAddr address) { |
| 1357 | u8* ptr = impl->cpu_memory.GetPointer(address); | 1361 | u8* ptr = impl->device_memory.GetPointer<u8>(address); |
| 1358 | u64 value{}; | 1362 | u64 value{}; |
| 1359 | std::memcpy(&value, ptr, sizeof(value)); | 1363 | std::memcpy(&value, ptr, sizeof(value)); |
| 1360 | return value == 0; | 1364 | return value == 0; |
| @@ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba | |||
| 1477 | for (auto& sync_val : values) { | 1481 | for (auto& sync_val : values) { |
| 1478 | total_size += sync_val.size; | 1482 | total_size += sync_val.size; |
| 1479 | bool found = false; | 1483 | bool found = false; |
| 1480 | VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | 1484 | DAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); |
| 1481 | VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | 1485 | DAddr base_end = base + Core::Memory::YUZU_PAGESIZE; |
| 1482 | for (size_t i = 0; i < impl->little_cache.size(); i++) { | 1486 | for (size_t i = 0; i < impl->little_cache.size(); i++) { |
| 1483 | const auto set_found = [&] { | 1487 | const auto set_found = [&] { |
| 1484 | impl->redirect_cache.push_back(i); | 1488 | impl->redirect_cache.push_back(i); |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index e9a1ea169..f6151123e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl; | |||
| 27 | class QueryCacheRuntime { | 27 | class QueryCacheRuntime { |
| 28 | public: | 28 | public: |
| 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 30 | Core::Memory::Memory& cpu_memory_, | 30 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, |
| 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, |
| 33 | StagingBufferPool& staging_pool_, | 33 | StagingBufferPool& staging_pool_, |
| @@ -61,7 +61,7 @@ public: | |||
| 61 | 61 | ||
| 62 | private: | 62 | private: |
| 63 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | 63 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |
| 64 | void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); | 64 | void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal); |
| 65 | friend struct QueryCacheRuntimeImpl; | 65 | friend struct QueryCacheRuntimeImpl; |
| 66 | std::unique_ptr<QueryCacheRuntimeImpl> impl; | 66 | std::unique_ptr<QueryCacheRuntimeImpl> impl; |
| 67 | }; | 67 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 241fc34be..efcc349a0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "video_core/engines/draw_manager.h" | 18 | #include "video_core/engines/draw_manager.h" |
| 19 | #include "video_core/engines/kepler_compute.h" | 19 | #include "video_core/engines/kepler_compute.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 21 | #include "video_core/renderer_vulkan/blit_image.h" | 22 | #include "video_core/renderer_vulkan/blit_image.h" |
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 23 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| @@ -37,6 +38,7 @@ | |||
| 37 | #include "video_core/vulkan_common/vulkan_device.h" | 38 | #include "video_core/vulkan_common/vulkan_device.h" |
| 38 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 39 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 39 | 40 | ||
| 41 | |||
| 40 | namespace Vulkan { | 42 | namespace Vulkan { |
| 41 | 43 | ||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -163,10 +165,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | |||
| 163 | } // Anonymous namespace | 165 | } // Anonymous namespace |
| 164 | 166 | ||
| 165 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 167 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 166 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | 168 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 167 | const Device& device_, MemoryAllocator& memory_allocator_, | 169 | ScreenInfo& screen_info_, const Device& device_, |
| 168 | StateTracker& state_tracker_, Scheduler& scheduler_) | 170 | MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, |
| 169 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, | 171 | Scheduler& scheduler_) |
| 172 | : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, | ||
| 170 | memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, | 173 | memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 171 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 174 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 172 | guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), | 175 | guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), |
| @@ -174,14 +177,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 174 | texture_cache_runtime{ | 177 | texture_cache_runtime{ |
| 175 | device, scheduler, memory_allocator, staging_pool, | 178 | device, scheduler, memory_allocator, staging_pool, |
| 176 | blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, | 179 | blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, |
| 177 | texture_cache(texture_cache_runtime, *this), | 180 | texture_cache(texture_cache_runtime, device_memory), |
| 178 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 181 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 179 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | 182 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |
| 180 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 183 | buffer_cache(device_memory, buffer_cache_runtime), |
| 181 | query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | 184 | query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler, |
| 182 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), | 185 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), |
| 183 | query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | 186 | query_cache(gpu, *this, device_memory, query_cache_runtime), |
| 184 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | 187 | pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue, |
| 185 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 188 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 186 | accelerate_dma(buffer_cache, texture_cache, scheduler), | 189 | accelerate_dma(buffer_cache, texture_cache, scheduler), |
| 187 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 190 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| @@ -508,7 +511,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in | |||
| 508 | 511 | ||
| 509 | void RasterizerVulkan::FlushAll() {} | 512 | void RasterizerVulkan::FlushAll() {} |
| 510 | 513 | ||
| 511 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 514 | void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 512 | if (addr == 0 || size == 0) { | 515 | if (addr == 0 || size == 0) { |
| 513 | return; | 516 | return; |
| 514 | } | 517 | } |
| @@ -525,7 +528,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
| 525 | } | 528 | } |
| 526 | } | 529 | } |
| 527 | 530 | ||
| 528 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 531 | bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 529 | if ((True(which & VideoCommon::CacheType::BufferCache))) { | 532 | if ((True(which & VideoCommon::CacheType::BufferCache))) { |
| 530 | std::scoped_lock lock{buffer_cache.mutex}; | 533 | std::scoped_lock lock{buffer_cache.mutex}; |
| 531 | if (buffer_cache.IsRegionGpuModified(addr, size)) { | 534 | if (buffer_cache.IsRegionGpuModified(addr, size)) { |
| @@ -542,7 +545,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
| 542 | return false; | 545 | return false; |
| 543 | } | 546 | } |
| 544 | 547 | ||
| 545 | VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { | 548 | VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) { |
| 546 | { | 549 | { |
| 547 | std::scoped_lock lock{texture_cache.mutex}; | 550 | std::scoped_lock lock{texture_cache.mutex}; |
| 548 | auto area = texture_cache.GetFlushArea(addr, size); | 551 | auto area = texture_cache.GetFlushArea(addr, size); |
| @@ -558,7 +561,7 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 | |||
| 558 | return new_area; | 561 | return new_area; |
| 559 | } | 562 | } |
| 560 | 563 | ||
| 561 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | 564 | void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { |
| 562 | if (addr == 0 || size == 0) { | 565 | if (addr == 0 || size == 0) { |
| 563 | return; | 566 | return; |
| 564 | } | 567 | } |
| @@ -578,7 +581,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 578 | } | 581 | } |
| 579 | } | 582 | } |
| 580 | 583 | ||
| 581 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | 584 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { |
| 582 | { | 585 | { |
| 583 | std::scoped_lock lock{texture_cache.mutex}; | 586 | std::scoped_lock lock{texture_cache.mutex}; |
| 584 | for (const auto& [addr, size] : sequences) { | 587 | for (const auto& [addr, size] : sequences) { |
| @@ -599,7 +602,8 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||
| 599 | } | 602 | } |
| 600 | } | 603 | } |
| 601 | 604 | ||
| 602 | bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 605 | bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { |
| 606 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 603 | if (addr == 0 || size == 0) { | 607 | if (addr == 0 || size == 0) { |
| 604 | return false; | 608 | return false; |
| 605 | } | 609 | } |
| @@ -620,7 +624,8 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 620 | return false; | 624 | return false; |
| 621 | } | 625 | } |
| 622 | 626 | ||
| 623 | void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | 627 | void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) { |
| 628 | const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||
| 624 | if (addr == 0 || size == 0) { | 629 | if (addr == 0 || size == 0) { |
| 625 | return; | 630 | return; |
| 626 | } | 631 | } |
| @@ -640,7 +645,7 @@ void RasterizerVulkan::InvalidateGPUCache() { | |||
| 640 | gpu.InvalidateGPUCache(); | 645 | gpu.InvalidateGPUCache(); |
| 641 | } | 646 | } |
| 642 | 647 | ||
| 643 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 648 | void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) { |
| 644 | { | 649 | { |
| 645 | std::scoped_lock lock{texture_cache.mutex}; | 650 | std::scoped_lock lock{texture_cache.mutex}; |
| 646 | texture_cache.UnmapMemory(addr, size); | 651 | texture_cache.UnmapMemory(addr, size); |
| @@ -679,7 +684,7 @@ void RasterizerVulkan::ReleaseFences(bool force) { | |||
| 679 | fence_manager.WaitPendingFences(force); | 684 | fence_manager.WaitPendingFences(force); |
| 680 | } | 685 | } |
| 681 | 686 | ||
| 682 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | 687 | void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size, |
| 683 | VideoCommon::CacheType which) { | 688 | VideoCommon::CacheType which) { |
| 684 | if (Settings::IsGPULevelExtreme()) { | 689 | if (Settings::IsGPULevelExtreme()) { |
| 685 | FlushRegion(addr, size, which); | 690 | FlushRegion(addr, size, which); |
| @@ -782,7 +787,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| 782 | } | 787 | } |
| 783 | 788 | ||
| 784 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 789 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 785 | VAddr framebuffer_addr, u32 pixel_stride) { | 790 | DAddr framebuffer_addr, u32 pixel_stride) { |
| 786 | if (!framebuffer_addr) { | 791 | if (!framebuffer_addr) { |
| 787 | return false; | 792 | return false; |
| 788 | } | 793 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ad069556c..d593f35df 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/control/channel_state_cache.h" | 13 | #include "video_core/control/channel_state_cache.h" |
| 14 | #include "video_core/engines/maxwell_dma.h" | 14 | #include "video_core/engines/maxwell_dma.h" |
| 15 | #include "video_core/rasterizer_accelerated.h" | ||
| 16 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 17 | #include "video_core/renderer_vulkan/blit_image.h" | 16 | #include "video_core/renderer_vulkan/blit_image.h" |
| 18 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 17 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| @@ -25,6 +24,7 @@ | |||
| 25 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 24 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 26 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 25 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 27 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 26 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 27 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 28 | 28 | ||
| 29 | namespace Core { | 29 | namespace Core { |
| 30 | class System; | 30 | class System; |
| @@ -34,10 +34,14 @@ namespace Core::Frontend { | |||
| 34 | class EmuWindow; | 34 | class EmuWindow; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | namespace Tegra::Engines { | 37 | namespace Tegra { |
| 38 | |||
| 39 | namespace Engines { | ||
| 38 | class Maxwell3D; | 40 | class Maxwell3D; |
| 39 | } | 41 | } |
| 40 | 42 | ||
| 43 | } // namespace Tegra | ||
| 44 | |||
| 41 | namespace Vulkan { | 45 | namespace Vulkan { |
| 42 | 46 | ||
| 43 | struct ScreenInfo; | 47 | struct ScreenInfo; |
| @@ -70,13 +74,14 @@ private: | |||
| 70 | Scheduler& scheduler; | 74 | Scheduler& scheduler; |
| 71 | }; | 75 | }; |
| 72 | 76 | ||
| 73 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | 77 | class RasterizerVulkan final : public VideoCore::RasterizerInterface, |
| 74 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 78 | protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 75 | public: | 79 | public: |
| 76 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 80 | explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 77 | Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | 81 | Tegra::MaxwellDeviceMemoryManager& device_memory_, |
| 78 | const Device& device_, MemoryAllocator& memory_allocator_, | 82 | ScreenInfo& screen_info_, const Device& device_, |
| 79 | StateTracker& state_tracker_, Scheduler& scheduler_); | 83 | MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, |
| 84 | Scheduler& scheduler_); | ||
| 80 | ~RasterizerVulkan() override; | 85 | ~RasterizerVulkan() override; |
| 81 | 86 | ||
| 82 | void Draw(bool is_indexed, u32 instance_count) override; | 87 | void Draw(bool is_indexed, u32 instance_count) override; |
| @@ -90,18 +95,18 @@ public: | |||
| 90 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 95 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 91 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 96 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 92 | void FlushAll() override; | 97 | void FlushAll() override; |
| 93 | void FlushRegion(VAddr addr, u64 size, | 98 | void FlushRegion(DAddr addr, u64 size, |
| 94 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 99 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 95 | bool MustFlushRegion(VAddr addr, u64 size, | 100 | bool MustFlushRegion(DAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 101 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 97 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 102 | VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; |
| 98 | void InvalidateRegion(VAddr addr, u64 size, | 103 | void InvalidateRegion(DAddr addr, u64 size, |
| 99 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 104 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 100 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | 105 | void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override; |
| 101 | void OnCacheInvalidation(VAddr addr, u64 size) override; | 106 | void OnCacheInvalidation(DAddr addr, u64 size) override; |
| 102 | bool OnCPUWrite(VAddr addr, u64 size) override; | 107 | bool OnCPUWrite(DAddr addr, u64 size) override; |
| 103 | void InvalidateGPUCache() override; | 108 | void InvalidateGPUCache() override; |
| 104 | void UnmapMemory(VAddr addr, u64 size) override; | 109 | void UnmapMemory(DAddr addr, u64 size) override; |
| 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 110 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 106 | void SignalFence(std::function<void()>&& func) override; | 111 | void SignalFence(std::function<void()>&& func) override; |
| 107 | void SyncOperation(std::function<void()>&& func) override; | 112 | void SyncOperation(std::function<void()>&& func) override; |
| @@ -109,7 +114,7 @@ public: | |||
| 109 | void SignalReference() override; | 114 | void SignalReference() override; |
| 110 | void ReleaseFences(bool force = true) override; | 115 | void ReleaseFences(bool force = true) override; |
| 111 | void FlushAndInvalidateRegion( | 116 | void FlushAndInvalidateRegion( |
| 112 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 117 | DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 113 | void WaitForIdle() override; | 118 | void WaitForIdle() override; |
| 114 | void FragmentBarrier() override; | 119 | void FragmentBarrier() override; |
| 115 | void TiledCacheBarrier() override; | 120 | void TiledCacheBarrier() override; |
| @@ -122,7 +127,7 @@ public: | |||
| 122 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 127 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 123 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 128 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 124 | std::span<const u8> memory) override; | 129 | std::span<const u8> memory) override; |
| 125 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 130 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, |
| 126 | u32 pixel_stride) override; | 131 | u32 pixel_stride) override; |
| 127 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 132 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 128 | const VideoCore::DiskResourceLoadCallback& callback) override; | 133 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| @@ -176,6 +181,7 @@ private: | |||
| 176 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | 181 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); |
| 177 | 182 | ||
| 178 | Tegra::GPU& gpu; | 183 | Tegra::GPU& gpu; |
| 184 | Tegra::MaxwellDeviceMemoryManager& device_memory; | ||
| 179 | 185 | ||
| 180 | ScreenInfo& screen_info; | 186 | ScreenInfo& screen_info; |
| 181 | const Device& device; | 187 | const Device& device; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index e81cd031b..86fd62428 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 13 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| 15 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 15 | #include "video_core/memory_manager.h" | 16 | #include "video_core/memory_manager.h" |
| 16 | #include "video_core/shader_cache.h" | 17 | #include "video_core/shader_cache.h" |
| 17 | #include "video_core/shader_environment.h" | 18 | #include "video_core/shader_environment.h" |
| @@ -34,7 +35,7 @@ void ShaderCache::SyncGuestHost() { | |||
| 34 | RemovePendingShaders(); | 35 | RemovePendingShaders(); |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | 38 | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_) : device_memory{device_memory_} {} |
| 38 | 39 | ||
| 39 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | 40 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { |
| 40 | auto& dirty{maxwell3d->dirty.flags}; | 41 | auto& dirty{maxwell3d->dirty.flags}; |
| @@ -132,7 +133,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t | |||
| 132 | 133 | ||
| 133 | storage.push_back(std::move(data)); | 134 | storage.push_back(std::move(data)); |
| 134 | 135 | ||
| 135 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | 136 | device_memory.UpdatePagesCachedCount(addr, size, 1); |
| 136 | } | 137 | } |
| 137 | 138 | ||
| 138 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | 139 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { |
| @@ -209,7 +210,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { | |||
| 209 | 210 | ||
| 210 | const VAddr addr = entry->addr_start; | 211 | const VAddr addr = entry->addr_start; |
| 211 | const size_t size = entry->addr_end - addr; | 212 | const size_t size = entry->addr_end - addr; |
| 212 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | 213 | device_memory.UpdatePagesCachedCount(addr, size, -1); |
| 213 | } | 214 | } |
| 214 | 215 | ||
| 215 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | 216 | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index a76896620..02ef39483 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "video_core/control/channel_state_cache.h" | 16 | #include "video_core/control/channel_state_cache.h" |
| 17 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/shader_environment.h" | 18 | #include "video_core/shader_environment.h" |
| 19 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 19 | 20 | ||
| 20 | namespace Tegra { | 21 | namespace Tegra { |
| 21 | class MemoryManager; | 22 | class MemoryManager; |
| @@ -77,7 +78,7 @@ protected: | |||
| 77 | } | 78 | } |
| 78 | }; | 79 | }; |
| 79 | 80 | ||
| 80 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); | 81 | explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory); |
| 81 | 82 | ||
| 82 | /// @brief Update the hashes and information of shader stages | 83 | /// @brief Update the hashes and information of shader stages |
| 83 | /// @param unique_hashes Shader hashes to store into when a stage is enabled | 84 | /// @param unique_hashes Shader hashes to store into when a stage is enabled |
| @@ -145,7 +146,7 @@ private: | |||
| 145 | /// @brief Create a new shader entry and register it | 146 | /// @brief Create a new shader entry and register it |
| 146 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | 147 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); |
| 147 | 148 | ||
| 148 | VideoCore::RasterizerInterface& rasterizer; | 149 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 149 | 150 | ||
| 150 | mutable std::mutex lookup_mutex; | 151 | mutable std::mutex lookup_mutex; |
| 151 | std::mutex invalidation_mutex; | 152 | std::mutex invalidation_mutex; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..7398ed2ec 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -8,10 +8,11 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| 11 | #include "core/memory.h" | ||
| 12 | #include "video_core/control/channel_state.h" | 11 | #include "video_core/control/channel_state.h" |
| 13 | #include "video_core/dirty_flags.h" | 12 | #include "video_core/dirty_flags.h" |
| 14 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/guest_memory.h" | ||
| 15 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 15 | #include "video_core/texture_cache/image_view_base.h" | 16 | #include "video_core/texture_cache/image_view_base.h" |
| 16 | #include "video_core/texture_cache/samples_helper.h" | 17 | #include "video_core/texture_cache/samples_helper.h" |
| 17 | #include "video_core/texture_cache/texture_cache_base.h" | 18 | #include "video_core/texture_cache/texture_cache_base.h" |
| @@ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 27 | using namespace Common::Literals; | 28 | using namespace Common::Literals; |
| 28 | 29 | ||
| 29 | template <class P> | 30 | template <class P> |
| 30 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) | 31 | TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) |
| 31 | : runtime{runtime_}, rasterizer{rasterizer_} { | 32 | : runtime{runtime_}, device_memory{device_memory_} { |
| 32 | // Configure null sampler | 33 | // Configure null sampler |
| 33 | TSCEntry sampler_descriptor{}; | 34 | TSCEntry sampler_descriptor{}; |
| 34 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | 35 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
| @@ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 49 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 50 | 51 | ||
| 51 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 52 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | 53 | const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 53 | const s64 min_spacing_expected = device_memory - 1_GiB; | 54 | const s64 min_spacing_expected = device_local_memory - 1_GiB; |
| 54 | const s64 min_spacing_critical = device_memory - 512_MiB; | 55 | const s64 min_spacing_critical = device_local_memory - 512_MiB; |
| 55 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | 56 | const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); |
| 56 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | 57 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 57 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | 58 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 58 | expected_memory = static_cast<u64>( | 59 | expected_memory = static_cast<u64>( |
| 59 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | 60 | std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), |
| 60 | DEFAULT_EXPECTED_MEMORY)); | 61 | DEFAULT_EXPECTED_MEMORY)); |
| 61 | critical_memory = static_cast<u64>( | 62 | critical_memory = static_cast<u64>( |
| 62 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | 63 | std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), |
| 63 | DEFAULT_CRITICAL_MEMORY)); | 64 | DEFAULT_CRITICAL_MEMORY)); |
| 64 | minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); | 65 | minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2); |
| 65 | } else { | 66 | } else { |
| 66 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 67 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 67 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 68 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| @@ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | |||
| 513 | } | 514 | } |
| 514 | 515 | ||
| 515 | template <class P> | 516 | template <class P> |
| 516 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | 517 | void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) { |
| 517 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | 518 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |
| 518 | if (True(image.flags & ImageFlagBits::CpuModified)) { | 519 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 519 | return; | 520 | return; |
| @@ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 526 | } | 527 | } |
| 527 | 528 | ||
| 528 | template <class P> | 529 | template <class P> |
| 529 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 530 | void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { |
| 530 | boost::container::small_vector<ImageId, 16> images; | 531 | boost::container::small_vector<ImageId, 16> images; |
| 531 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 532 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 532 | if (!image.IsSafeDownload()) { | 533 | if (!image.IsSafeDownload()) { |
| @@ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 553 | } | 554 | } |
| 554 | 555 | ||
| 555 | template <class P> | 556 | template <class P> |
| 556 | std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, | 557 | std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr, |
| 557 | u64 size) { | 558 | u64 size) { |
| 558 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 559 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| 559 | ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { | 560 | ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { |
| @@ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 579 | } | 580 | } |
| 580 | 581 | ||
| 581 | template <class P> | 582 | template <class P> |
| 582 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 583 | void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) { |
| 583 | boost::container::small_vector<ImageId, 16> deleted_images; | 584 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 584 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 585 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 585 | for (const ImageId id : deleted_images) { | 586 | for (const ImageId id : deleted_images) { |
| @@ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 713 | 714 | ||
| 714 | template <class P> | 715 | template <class P> |
| 715 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( | 716 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( |
| 716 | const Tegra::FramebufferConfig& config, VAddr cpu_addr) { | 717 | const Tegra::FramebufferConfig& config, DAddr cpu_addr) { |
| 717 | // TODO: Properly implement this | 718 | // TODO: Properly implement this |
| 718 | const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); | 719 | const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); |
| 719 | if (it == page_table.end()) { | 720 | if (it == page_table.end()) { |
| @@ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep | |||
| 940 | } | 941 | } |
| 941 | 942 | ||
| 942 | template <class P> | 943 | template <class P> |
| 943 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 944 | bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 944 | bool is_modified = false; | 945 | bool is_modified = false; |
| 945 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | 946 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| 946 | if (False(image.flags & ImageFlagBits::GpuModified)) { | 947 | if (False(image.flags & ImageFlagBits::GpuModified)) { |
| @@ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 1059 | return; | 1060 | return; |
| 1060 | } | 1061 | } |
| 1061 | 1062 | ||
| 1062 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | 1063 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1063 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | 1064 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1064 | 1065 | ||
| 1065 | if (True(image.flags & ImageFlagBits::Converted)) { | 1066 | if (True(image.flags & ImageFlagBits::Converted)) { |
| @@ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| 1124 | template <class P> | 1125 | template <class P> |
| 1125 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1126 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1126 | RelaxedOptions options) { | 1127 | RelaxedOptions options) { |
| 1127 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1128 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1128 | if (!cpu_addr) { | 1129 | if (!cpu_addr) { |
| 1129 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | 1130 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1130 | if (!cpu_addr) { | 1131 | if (!cpu_addr) { |
| @@ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | |||
| 1265 | 1266 | ||
| 1266 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; | 1267 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; |
| 1267 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | 1268 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1268 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | 1269 | Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1269 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | 1270 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1270 | 1271 | ||
| 1271 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | 1272 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, |
| @@ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) { | |||
| 1339 | template <class P> | 1340 | template <class P> |
| 1340 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1341 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1341 | RelaxedOptions options) { | 1342 | RelaxedOptions options) { |
| 1342 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1343 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1343 | if (!cpu_addr) { | 1344 | if (!cpu_addr) { |
| 1344 | const auto size = CalculateGuestSizeInBytes(info); | 1345 | const auto size = CalculateGuestSizeInBytes(info); |
| 1345 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); | 1346 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); |
| 1346 | if (!cpu_addr) { | 1347 | if (!cpu_addr) { |
| 1347 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | 1348 | const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; |
| 1348 | virtual_invalid_space += Common::AlignUp(size, 32); | 1349 | virtual_invalid_space += Common::AlignUp(size, 32); |
| 1349 | cpu_addr = std::optional<VAddr>(fake_addr); | 1350 | cpu_addr = std::optional<DAddr>(fake_addr); |
| 1350 | } | 1351 | } |
| 1351 | } | 1352 | } |
| 1352 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | 1353 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
| @@ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1362 | } | 1363 | } |
| 1363 | 1364 | ||
| 1364 | template <class P> | 1365 | template <class P> |
| 1365 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | 1366 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { |
| 1366 | ImageInfo new_info = info; | 1367 | ImageInfo new_info = info; |
| 1367 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | 1368 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |
| 1368 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1369 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| @@ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag | |||
| 1650 | 1651 | ||
| 1651 | template <class P> | 1652 | template <class P> |
| 1652 | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | 1653 | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { |
| 1653 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1654 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1654 | if (!cpu_addr) { | 1655 | if (!cpu_addr) { |
| 1655 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | 1656 | cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1656 | if (!cpu_addr) { | 1657 | if (!cpu_addr) { |
| @@ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd | |||
| 1780 | 1781 | ||
| 1781 | template <class P> | 1782 | template <class P> |
| 1782 | template <typename Func> | 1783 | template <typename Func> |
| 1783 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | 1784 | void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) { |
| 1784 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | 1785 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |
| 1785 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1786 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1786 | boost::container::small_vector<ImageId, 32> images; | 1787 | boost::container::small_vector<ImageId, 32> images; |
| @@ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, | |||
| 1924 | template <class P> | 1925 | template <class P> |
| 1925 | template <typename Func> | 1926 | template <typename Func> |
| 1926 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | 1927 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { |
| 1927 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | 1928 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type; |
| 1928 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | 1929 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; |
| 1929 | const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | 1930 | const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |
| 1930 | for (const auto& [gpu_addr, size] : segments) { | 1931 | for (const auto& [gpu_addr, size] : segments) { |
| 1931 | std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1932 | std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1932 | ASSERT(cpu_addr); | 1933 | ASSERT(cpu_addr); |
| 1933 | if constexpr (RETURNS_BOOL) { | 1934 | if constexpr (RETURNS_BOOL) { |
| 1934 | if (func(gpu_addr, *cpu_addr, size)) { | 1935 | if (func(gpu_addr, *cpu_addr, size)) { |
| @@ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1980 | } | 1981 | } |
| 1981 | boost::container::small_vector<ImageViewId, 16> sparse_maps; | 1982 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1982 | ForEachSparseSegment( | 1983 | ForEachSparseSegment( |
| 1983 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1984 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { |
| 1984 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1985 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| 1985 | ForEachCPUPage(cpu_addr, size, | 1986 | ForEachCPUPage(cpu_addr, size, |
| 1986 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | 1987 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); |
| @@ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 2048 | auto& sparse_maps = it->second; | 2049 | auto& sparse_maps = it->second; |
| 2049 | for (auto& map_view_id : sparse_maps) { | 2050 | for (auto& map_view_id : sparse_maps) { |
| 2050 | const auto& map_range = slot_map_views[map_view_id]; | 2051 | const auto& map_range = slot_map_views[map_view_id]; |
| 2051 | const VAddr cpu_addr = map_range.cpu_addr; | 2052 | const DAddr cpu_addr = map_range.cpu_addr; |
| 2052 | const std::size_t size = map_range.size; | 2053 | const std::size_t size = map_range.size; |
| 2053 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | 2054 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { |
| 2054 | const auto page_it = page_table.find(page); | 2055 | const auto page_it = page_table.find(page); |
| @@ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2080 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 2081 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 2081 | image.flags |= ImageFlagBits::Tracked; | 2082 | image.flags |= ImageFlagBits::Tracked; |
| 2082 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2083 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2083 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 2084 | device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 2084 | return; | 2085 | return; |
| 2085 | } | 2086 | } |
| 2086 | if (True(image.flags & ImageFlagBits::Registered)) { | 2087 | if (True(image.flags & ImageFlagBits::Registered)) { |
| @@ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
| 2089 | auto& sparse_maps = it->second; | 2090 | auto& sparse_maps = it->second; |
| 2090 | for (auto& map_view_id : sparse_maps) { | 2091 | for (auto& map_view_id : sparse_maps) { |
| 2091 | const auto& map = slot_map_views[map_view_id]; | 2092 | const auto& map = slot_map_views[map_view_id]; |
| 2092 | const VAddr cpu_addr = map.cpu_addr; | 2093 | const DAddr cpu_addr = map.cpu_addr; |
| 2093 | const std::size_t size = map.size; | 2094 | const std::size_t size = map.size; |
| 2094 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2095 | device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 2095 | } | 2096 | } |
| 2096 | return; | 2097 | return; |
| 2097 | } | 2098 | } |
| 2098 | ForEachSparseSegment(image, | 2099 | ForEachSparseSegment(image, |
| 2099 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 2100 | [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { |
| 2100 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 2101 | device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 2101 | }); | 2102 | }); |
| 2102 | } | 2103 | } |
| 2103 | 2104 | ||
| @@ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2106 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | 2107 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 2107 | image.flags &= ~ImageFlagBits::Tracked; | 2108 | image.flags &= ~ImageFlagBits::Tracked; |
| 2108 | if (False(image.flags & ImageFlagBits::Sparse)) { | 2109 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 2109 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | 2110 | device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); |
| 2110 | return; | 2111 | return; |
| 2111 | } | 2112 | } |
| 2112 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | 2113 | ASSERT(True(image.flags & ImageFlagBits::Registered)); |
| @@ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
| 2115 | auto& sparse_maps = it->second; | 2116 | auto& sparse_maps = it->second; |
| 2116 | for (auto& map_view_id : sparse_maps) { | 2117 | for (auto& map_view_id : sparse_maps) { |
| 2117 | const auto& map = slot_map_views[map_view_id]; | 2118 | const auto& map = slot_map_views[map_view_id]; |
| 2118 | const VAddr cpu_addr = map.cpu_addr; | 2119 | const DAddr cpu_addr = map.cpu_addr; |
| 2119 | const std::size_t size = map.size; | 2120 | const std::size_t size = map.size; |
| 2120 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 2121 | device_memory.UpdatePagesCachedCount(cpu_addr, size, -1); |
| 2121 | } | 2122 | } |
| 2122 | } | 2123 | } |
| 2123 | 2124 | ||
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 6caf75b46..8699d40d4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -36,9 +36,11 @@ | |||
| 36 | #include "video_core/texture_cache/types.h" | 36 | #include "video_core/texture_cache/types.h" |
| 37 | #include "video_core/textures/texture.h" | 37 | #include "video_core/textures/texture.h" |
| 38 | 38 | ||
| 39 | namespace Tegra::Control { | 39 | namespace Tegra { |
| 40 | namespace Control { | ||
| 40 | struct ChannelState; | 41 | struct ChannelState; |
| 41 | } | 42 | } |
| 43 | } // namespace Tegra | ||
| 42 | 44 | ||
| 43 | namespace VideoCommon { | 45 | namespace VideoCommon { |
| 44 | 46 | ||
| @@ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
| 126 | }; | 128 | }; |
| 127 | 129 | ||
| 128 | public: | 130 | public: |
| 129 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); | 131 | explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&); |
| 130 | 132 | ||
| 131 | /// Notify the cache that a new frame has been queued | 133 | /// Notify the cache that a new frame has been queued |
| 132 | void TickFrame(); | 134 | void TickFrame(); |
| @@ -190,15 +192,15 @@ public: | |||
| 190 | Framebuffer* GetFramebuffer(); | 192 | Framebuffer* GetFramebuffer(); |
| 191 | 193 | ||
| 192 | /// Mark images in a range as modified from the CPU | 194 | /// Mark images in a range as modified from the CPU |
| 193 | void WriteMemory(VAddr cpu_addr, size_t size); | 195 | void WriteMemory(DAddr cpu_addr, size_t size); |
| 194 | 196 | ||
| 195 | /// Download contents of host images to guest memory in a region | 197 | /// Download contents of host images to guest memory in a region |
| 196 | void DownloadMemory(VAddr cpu_addr, size_t size); | 198 | void DownloadMemory(DAddr cpu_addr, size_t size); |
| 197 | 199 | ||
| 198 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 200 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size); |
| 199 | 201 | ||
| 200 | /// Remove images in a region | 202 | /// Remove images in a region |
| 201 | void UnmapMemory(VAddr cpu_addr, size_t size); | 203 | void UnmapMemory(DAddr cpu_addr, size_t size); |
| 202 | 204 | ||
| 203 | /// Remove images in a region | 205 | /// Remove images in a region |
| 204 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | 206 | void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); |
| @@ -210,7 +212,7 @@ public: | |||
| 210 | 212 | ||
| 211 | /// Try to find a cached image view in the given CPU address | 213 | /// Try to find a cached image view in the given CPU address |
| 212 | [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, | 214 | [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, |
| 213 | VAddr cpu_addr); | 215 | DAddr cpu_addr); |
| 214 | 216 | ||
| 215 | /// Return true when there are uncommitted images to be downloaded | 217 | /// Return true when there are uncommitted images to be downloaded |
| 216 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | 218 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| @@ -235,7 +237,7 @@ public: | |||
| 235 | GPUVAddr address = 0, size_t size = 0); | 237 | GPUVAddr address = 0, size_t size = 0); |
| 236 | 238 | ||
| 237 | /// Return true when a CPU region is modified from the GPU | 239 | /// Return true when a CPU region is modified from the GPU |
| 238 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 240 | [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); |
| 239 | 241 | ||
| 240 | [[nodiscard]] bool IsRescaling() const noexcept; | 242 | [[nodiscard]] bool IsRescaling() const noexcept; |
| 241 | 243 | ||
| @@ -252,7 +254,7 @@ public: | |||
| 252 | private: | 254 | private: |
| 253 | /// Iterate over all page indices in a range | 255 | /// Iterate over all page indices in a range |
| 254 | template <typename Func> | 256 | template <typename Func> |
| 255 | static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | 257 | static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) { |
| 256 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | 258 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 257 | const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; | 259 | const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; |
| 258 | for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { | 260 | for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { |
| @@ -326,7 +328,7 @@ private: | |||
| 326 | 328 | ||
| 327 | /// Create a new image and join perfectly matching existing images | 329 | /// Create a new image and join perfectly matching existing images |
| 328 | /// Remove joined images from the cache | 330 | /// Remove joined images from the cache |
| 329 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 331 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr); |
| 330 | 332 | ||
| 331 | [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); | 333 | [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); |
| 332 | 334 | ||
| @@ -349,7 +351,7 @@ private: | |||
| 349 | 351 | ||
| 350 | /// Iterates over all the images in a region calling func | 352 | /// Iterates over all the images in a region calling func |
| 351 | template <typename Func> | 353 | template <typename Func> |
| 352 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | 354 | void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func); |
| 353 | 355 | ||
| 354 | template <typename Func> | 356 | template <typename Func> |
| 355 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | 357 | void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); |
| @@ -421,7 +423,7 @@ private: | |||
| 421 | 423 | ||
| 422 | Runtime& runtime; | 424 | Runtime& runtime; |
| 423 | 425 | ||
| 424 | VideoCore::RasterizerInterface& rasterizer; | 426 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 425 | std::deque<TextureCacheGPUMap> gpu_page_table_storage; | 427 | std::deque<TextureCacheGPUMap> gpu_page_table_storage; |
| 426 | 428 | ||
| 427 | RenderTargets render_targets; | 429 | RenderTargets render_targets; |
| @@ -432,7 +434,7 @@ private: | |||
| 432 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 434 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 433 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | 435 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 434 | 436 | ||
| 435 | VAddr virtual_invalid_space{}; | 437 | DAddr virtual_invalid_space{}; |
| 436 | 438 | ||
| 437 | bool has_deleted_images = false; | 439 | bool has_deleted_images = false; |
| 438 | bool is_rescaling = false; | 440 | bool is_rescaling = false; |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index fcf70068e..96f04b6c8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "core/memory.h" | 23 | #include "core/memory.h" |
| 24 | #include "video_core/compatible_formats.h" | 24 | #include "video_core/compatible_formats.h" |
| 25 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 26 | #include "video_core/guest_memory.h" | ||
| 26 | #include "video_core/memory_manager.h" | 27 | #include "video_core/memory_manager.h" |
| 27 | #include "video_core/surface.h" | 28 | #include "video_core/surface.h" |
| 28 | #include "video_core/texture_cache/decode_bc.h" | 29 | #include "video_core/texture_cache/decode_bc.h" |
| @@ -552,7 +553,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 552 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 553 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 553 | const std::span<const u8> src = input.subspan(host_offset); | 554 | const std::span<const u8> src = input.subspan(host_offset); |
| 554 | { | 555 | { |
| 555 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | 556 | Tegra::Memory::GpuGuestMemoryScoped<u8, |
| 557 | Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||
| 556 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); | 558 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); |
| 557 | 559 | ||
| 558 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 560 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index b42d48416..0efb7b49d 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/settings.h" | 7 | #include "common/settings.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 10 | #include "video_core/host1x/host1x.h" | ||
| 9 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 10 | #include "video_core/renderer_null/renderer_null.h" | 12 | #include "video_core/renderer_null/renderer_null.h" |
| 11 | #include "video_core/renderer_opengl/renderer_opengl.h" | 13 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| @@ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | |||
| 18 | Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | 20 | Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| 19 | std::unique_ptr<Core::Frontend::GraphicsContext> context) { | 21 | std::unique_ptr<Core::Frontend::GraphicsContext> context) { |
| 20 | auto& telemetry_session = system.TelemetrySession(); | 22 | auto& telemetry_session = system.TelemetrySession(); |
| 21 | auto& cpu_memory = system.ApplicationMemory(); | 23 | auto& device_memory = system.Host1x().MemoryManager(); |
| 22 | 24 | ||
| 23 | switch (Settings::values.renderer_backend.GetValue()) { | 25 | switch (Settings::values.renderer_backend.GetValue()) { |
| 24 | case Settings::RendererBackend::OpenGL: | 26 | case Settings::RendererBackend::OpenGL: |
| 25 | return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, | 27 | return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, |
| 26 | gpu, std::move(context)); | 28 | device_memory, gpu, std::move(context)); |
| 27 | case Settings::RendererBackend::Vulkan: | 29 | case Settings::RendererBackend::Vulkan: |
| 28 | return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, | 30 | return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, |
| 29 | gpu, std::move(context)); | 31 | device_memory, gpu, std::move(context)); |
| 30 | case Settings::RendererBackend::Null: | 32 | case Settings::RendererBackend::Null: |
| 31 | return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, | 33 | return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context)); |
| 32 | std::move(context)); | ||
| 33 | default: | 34 | default: |
| 34 | return nullptr; | 35 | return nullptr; |
| 35 | } | 36 | } |