summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-12-29 07:53:52 +0100
committerGravatar Liam2024-01-18 21:12:30 -0500
commit34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718 (patch)
treeafa899bb63e97df9c80e5de49395495143799dbd
parentSMMU: Initial adaptation to video_core. (diff)
downloadyuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.gz
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.tar.xz
yuzu-34a8d0cc8e04b4b9d8e5a75e552f0adb31b5d718.zip
SMMU: Implement physical memory mirroring
-rw-r--r--src/core/device_memory_manager.h27
-rw-r--r--src/core/device_memory_manager.inc154
-rw-r--r--src/core/hle/service/nvdrv/core/container.cpp6
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp7
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h6
-rw-r--r--src/core/memory.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
8 files changed, 226 insertions, 40 deletions
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index 1a63cbd09..7c7726348 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -10,8 +10,10 @@
10#include <mutex> 10#include <mutex>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/scratch_buffer.h"
13#include "common/virtual_buffer.h" 14#include "common/virtual_buffer.h"
14 15
16
15namespace Core { 17namespace Core {
16 18
17class DeviceMemory; 19class DeviceMemory;
@@ -49,9 +51,25 @@ public:
49 template <typename T> 51 template <typename T>
50 const T* GetPointer(DAddr address) const; 52 const T* GetPointer(DAddr address) const;
51 53
52 DAddr GetAddressFromPAddr(PAddr address) const { 54 template <typename Func>
55 void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
53 DAddr subbits = static_cast<DAddr>(address & page_mask); 56 DAddr subbits = static_cast<DAddr>(address & page_mask);
54 return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; 57 const u32 base = compressed_device_addr[(address >> page_bits)];
58 if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
59 const DAddr d_address = static_cast<DAddr>(base << page_bits) + subbits;
60 operation(d_address);
61 return;
62 }
63 InnerGatherDeviceAddresses(buffer, address);
64 for (u32 value : buffer) {
65 operation(static_cast<DAddr>(value << page_bits) + subbits);
66 }
67 }
68
69 template <typename Func>
70 void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
71 PAddr address = GetRawPhysicalAddr<u8>(p);
72 ApplyOpOnPAddr(address, buffer, operation);
55 } 73 }
56 74
57 PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { 75 PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
@@ -98,6 +116,9 @@ private:
98 static constexpr size_t page_size = 1ULL << page_bits; 116 static constexpr size_t page_size = 1ULL << page_bits;
99 static constexpr size_t page_mask = page_size - 1ULL; 117 static constexpr size_t page_mask = page_size - 1ULL;
100 static constexpr u32 physical_address_base = 1U << page_bits; 118 static constexpr u32 physical_address_base = 1U << page_bits;
119 static constexpr u32 MULTI_FLAG_BITS = 31;
120 static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
121 static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
101 122
102 template <typename T> 123 template <typename T>
103 T* GetPointerFromRaw(PAddr addr) { 124 T* GetPointerFromRaw(PAddr addr) {
@@ -117,6 +138,8 @@ private:
117 void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory, 138 void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
118 auto increment); 139 auto increment);
119 140
141 void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
142
120 std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl; 143 std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
121 144
122 const uintptr_t physical_base; 145 const uintptr_t physical_base;
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 8c5f82d31..4fb3ad3ab 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -18,10 +18,117 @@
18 18
19namespace Core { 19namespace Core {
20 20
21namespace {
22
23class PhysicalAddressContainer {
24public:
25 PhysicalAddressContainer() = default;
26 ~PhysicalAddressContainer() = default;
27
28 void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
29 buffer.resize(8);
30 buffer.resize(0);
31 size_t index = 0;
32 const auto add_value = [&](u32 value) {
33 buffer[index] = value;
34 index++;
35 buffer.resize(index);
36 };
37
38 u32 iter_entry = start_entry;
39 Entry* current = &storage[iter_entry - 1];
40 add_value(current->value);
41 while (current->next_entry != 0) {
42 iter_entry = current->next_entry;
43 current = &storage[iter_entry - 1];
44 add_value(current->value);
45 }
46 }
47
48 u32 Register(u32 value) {
49 return RegisterImplementation(value);
50 }
51
52 void Register(u32 value, u32 start_entry) {
53 auto entry_id = RegisterImplementation(value);
54 u32 iter_entry = start_entry;
55 Entry* current = &storage[iter_entry - 1];
56 while (current->next_entry != 0) {
57 iter_entry = current->next_entry;
58 current = &storage[iter_entry - 1];
59 }
60 current->next_entry = entry_id;
61 }
62
63 std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
64 u32 iter_entry = start_entry;
65 Entry* previous{};
66 Entry* current = &storage[iter_entry - 1];
67 Entry* next{};
68 bool more_than_one_remaining = false;
69 u32 result_start{start_entry};
70 size_t count = 0;
71 while (current->value != value) {
72 count++;
73 previous = current;
74 iter_entry = current->next_entry;
75 current = &storage[iter_entry - 1];
76 }
77 // Find next
78 u32 next_entry = current->next_entry;
79 if (next_entry != 0) {
80 next = &storage[next_entry - 1];
81 more_than_one_remaining = next->next_entry != 0;
82 }
83 if (previous) {
84 previous->next_entry = next_entry;
85 } else {
86 result_start = next_entry;
87 }
88 free_entries.emplace_back(iter_entry);
89 return std::make_pair(more_than_one_remaining || count > 1, result_start);
90 }
91
92 u32 ReleaseEntry(u32 start_entry) {
93 Entry* current = &storage[start_entry - 1];
94 free_entries.emplace_back(start_entry);
95 return current->value;
96 }
97
98private:
99 u32 RegisterImplementation(u32 value) {
100 auto entry_id = GetNewEntry();
101 auto& entry = storage[entry_id - 1];
102 entry.next_entry = 0;
103 entry.value = value;
104 return entry_id;
105 }
106 u32 GetNewEntry() {
107 if (!free_entries.empty()) {
108 u32 result = free_entries.front();
109 free_entries.pop_front();
110 return result;
111 }
112 storage.emplace_back();
113 u32 new_entry = static_cast<u32>(storage.size());
114 return new_entry;
115 }
116
117 struct Entry {
118 u32 next_entry{};
119 u32 value{};
120 };
121
122 std::deque<Entry> storage;
123 std::deque<u32> free_entries;
124};
125
21struct EmptyAllocator { 126struct EmptyAllocator {
22 EmptyAllocator([[maybe_unused]] DAddr address) {} 127 EmptyAllocator([[maybe_unused]] DAddr address) {}
23}; 128};
24 129
130} // namespace
131
25template <typename DTraits> 132template <typename DTraits>
26struct DeviceMemoryManagerAllocator { 133struct DeviceMemoryManagerAllocator {
27 static constexpr bool supports_pinning = DTraits::supports_pinning; 134 static constexpr bool supports_pinning = DTraits::supports_pinning;
@@ -38,6 +145,7 @@ struct DeviceMemoryManagerAllocator {
38 std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator> 145 std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
39 pin_allocator; 146 pin_allocator;
40 Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator; 147 Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
148 PhysicalAddressContainer multi_dev_address;
41 149
42 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer 150 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
43 template <bool pin_area> 151 template <bool pin_area>
@@ -109,6 +217,9 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
109 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { 217 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
110 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); 218 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
111 cached_pages = std::make_unique<CachedPages>(); 219 cached_pages = std::make_unique<CachedPages>();
220 for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
221 compressed_device_addr[i] = 0;
222 }
112} 223}
113 224
114template <typename Traits> 225template <typename Traits>
@@ -155,8 +266,19 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
155 } 266 }
156 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; 267 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
157 compressed_physical_ptr[start_page_d + i] = phys_addr; 268 compressed_physical_ptr[start_page_d + i] = phys_addr;
158 compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
159 InsertCPUBacking(start_page_d + i, new_vaddress, process_id); 269 InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
270 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
271 const u32 new_dev = static_cast<u32>(start_page_d + i);
272 if (base_dev == 0) [[likely]] {
273 compressed_device_addr[phys_addr - 1U] = new_dev;
274 continue;
275 }
276 u32 start_id = base_dev & MULTI_MASK;
277 if ((base_dev >> MULTI_FLAG_BITS) == 0) {
278 start_id = impl->multi_dev_address.Register(base_dev);
279 compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
280 }
281 impl->multi_dev_address.Register(new_dev, start_id);
160 } 282 }
161} 283}
162 284
@@ -170,13 +292,39 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
170 auto phys_addr = compressed_physical_ptr[start_page_d + i]; 292 auto phys_addr = compressed_physical_ptr[start_page_d + i];
171 compressed_physical_ptr[start_page_d + i] = 0; 293 compressed_physical_ptr[start_page_d + i] = 0;
172 cpu_backing_address[start_page_d + i] = 0; 294 cpu_backing_address[start_page_d + i] = 0;
173 if (phys_addr != 0) { 295 if (phys_addr != 0) [[likely]] {
174 compressed_device_addr[phys_addr - 1] = 0; 296 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
297 if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
298 compressed_device_addr[phys_addr - 1] = 0;
299 continue;
300 }
301 const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
302 static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
303 if (!more_entries) {
304 compressed_device_addr[phys_addr - 1] =
305 impl->multi_dev_address.ReleaseEntry(new_start);
306 continue;
307 }
308 compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
175 } 309 }
176 } 310 }
177} 311}
178 312
179template <typename Traits> 313template <typename Traits>
314void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
315 PAddr address) {
316 size_t phys_addr = address >> page_bits;
317 std::scoped_lock lk(mapping_guard);
318 u32 backing = compressed_device_addr[phys_addr];
319 if ((backing >> MULTI_FLAG_BITS) != 0) {
320 impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
321 return;
322 }
323 buffer.resize(1);
324 buffer[0] = backing;
325}
326
327template <typename Traits>
180template <typename T> 328template <typename T>
181T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) { 329T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
182 const size_t index = address >> Memory::YUZU_PAGEBITS; 330 const size_t index = address >> Memory::YUZU_PAGEBITS;
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index 7c2231fe6..e12ce05c1 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -16,8 +16,8 @@
16namespace Service::Nvidia::NvCore { 16namespace Service::Nvidia::NvCore {
17 17
18struct ContainerImpl { 18struct ContainerImpl {
19 explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) 19 explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_)
20 : host1x{host1x_}, file{host1x_}, manager{host1x_}, device_file_data{} {} 20 : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {}
21 Tegra::Host1x::Host1x& host1x; 21 Tegra::Host1x::Host1x& host1x;
22 NvMap file; 22 NvMap file;
23 SyncpointManager manager; 23 SyncpointManager manager;
@@ -29,7 +29,7 @@ struct ContainerImpl {
29}; 29};
30 30
31Container::Container(Tegra::Host1x::Host1x& host1x_) { 31Container::Container(Tegra::Host1x::Host1x& host1x_) {
32 impl = std::make_unique<ContainerImpl>(host1x_); 32 impl = std::make_unique<ContainerImpl>(*this, host1x_);
33} 33}
34 34
35Container::~Container() = default; 35Container::~Container() = default;
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 7879c6f04..e4168a37c 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -7,6 +7,7 @@
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/hle/service/nvdrv/core/container.h"
10#include "core/hle/service/nvdrv/core/nvmap.h" 11#include "core/hle/service/nvdrv/core/nvmap.h"
11#include "core/memory.h" 12#include "core/memory.h"
12#include "video_core/host1x/host1x.h" 13#include "video_core/host1x/host1x.h"
@@ -64,7 +65,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) {
64 return NvResult::Success; 65 return NvResult::Success;
65} 66}
66 67
67NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} 68NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {}
68 69
69void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { 70void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
70 std::scoped_lock lock(handles_lock); 71 std::scoped_lock lock(handles_lock);
@@ -160,6 +161,8 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
160 // If not then allocate some space and map it 161 // If not then allocate some space and map it
161 DAddr address{}; 162 DAddr address{};
162 auto& smmu = host1x.MemoryManager(); 163 auto& smmu = host1x.MemoryManager();
164 auto* session = core.GetSession(session_id);
165
163 auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); 166 auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
164 //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); 167 //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
165 while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { 168 while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
@@ -179,7 +182,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
179 handle_description->d_address = address; 182 handle_description->d_address = address;
180 183
181 smmu.Map(address, handle_description->address, handle_description->aligned_size, 184 smmu.Map(address, handle_description->address, handle_description->aligned_size,
182 session_id); 185 session->smmu_id);
183 } 186 }
184 187
185 handle_description->pins++; 188 handle_description->pins++;
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index e9e9e8b5b..7dd6d26c3 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -25,6 +25,8 @@ class Host1x;
25} // namespace Tegra 25} // namespace Tegra
26 26
27namespace Service::Nvidia::NvCore { 27namespace Service::Nvidia::NvCore {
28
29class Container;
28/** 30/**
29 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage 31 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage
30 * handles 32 * handles
@@ -109,7 +111,7 @@ public:
109 bool can_unlock; //!< If the address region is ready to be unlocked 111 bool can_unlock; //!< If the address region is ready to be unlocked
110 }; 112 };
111 113
112 explicit NvMap(Tegra::Host1x::Host1x& host1x); 114 explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x);
113 115
114 /** 116 /**
115 * @brief Creates an unallocated handle of the given size 117 * @brief Creates an unallocated handle of the given size
@@ -173,5 +175,7 @@ private:
173 * @return If the handle was removed from the map 175 * @return If the handle was removed from the map
174 */ 176 */
175 bool TryRemoveHandle(const Handle& handle_description); 177 bool TryRemoveHandle(const Handle& handle_description);
178
179 Container& core;
176}; 180};
177} // namespace Service::Nvidia::NvCore 181} // namespace Service::Nvidia::NvCore
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 609e775ae..f126840cb 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -44,7 +44,8 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
44// from outside classes. This also allows modification to the internals of the memory 44// from outside classes. This also allows modification to the internals of the memory
45// subsystem without needing to rebuild all files that make use of the memory interface. 45// subsystem without needing to rebuild all files that make use of the memory interface.
46struct Memory::Impl { 46struct Memory::Impl {
47 explicit Impl(Core::System& system_) : system{system_} {} 47 explicit Impl(Core::System& system_)
48 : system{system_} {}
48 49
49 void SetCurrentPageTable(Kernel::KProcess& process) { 50 void SetCurrentPageTable(Kernel::KProcess& process) {
50 current_page_table = &process.GetPageTable().GetImpl(); 51 current_page_table = &process.GetPageTable().GetImpl();
@@ -817,26 +818,31 @@ struct Memory::Impl {
817 void HandleRasterizerDownload(VAddr v_address, size_t size) { 818 void HandleRasterizerDownload(VAddr v_address, size_t size) {
818 const auto* p = GetPointerImpl( 819 const auto* p = GetPointerImpl(
819 v_address, []() {}, []() {}); 820 v_address, []() {}, []() {});
820 auto& gpu_device_memory = system.Host1x().MemoryManager(); 821 if (!gpu_device_memory) [[unlikely]] {
821 DAddr address = 822 gpu_device_memory = &system.Host1x().MemoryManager();
822 gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); 823 }
823 const size_t core = system.GetCurrentHostThreadID(); 824 const size_t core = system.GetCurrentHostThreadID();
824 auto& current_area = rasterizer_read_areas[core]; 825 auto& current_area = rasterizer_read_areas[core];
825 const DAddr end_address = address + size; 826 gpu_device_memory->ApplyOpOnPointer(
826 if (current_area.start_address <= address && end_address <= current_area.end_address) 827 p, scratch_buffers[core], [&](DAddr address) {
827 [[likely]] { 828 const DAddr end_address = address + size;
828 return; 829 if (current_area.start_address <= address && end_address <= current_area.end_address)
829 } 830 [[likely]] {
830 current_area = system.GPU().OnCPURead(address, size); 831 return;
832 }
833 current_area = system.GPU().OnCPURead(address, size);
834 });
831 } 835 }
832 836
833 void HandleRasterizerWrite(VAddr v_address, size_t size) { 837 void HandleRasterizerWrite(VAddr v_address, size_t size) {
834 const auto* p = GetPointerImpl( 838 const auto* p = GetPointerImpl(
835 v_address, []() {}, []() {}); 839 v_address, []() {}, []() {});
836 PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p);
837 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; 840 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
838 const size_t core = std::min(system.GetCurrentHostThreadID(), 841 const size_t core = std::min(system.GetCurrentHostThreadID(),
839 sys_core); // any other calls threads go to syscore. 842 sys_core); // any other calls threads go to syscore.
843 if (!gpu_device_memory) [[unlikely]] {
844 gpu_device_memory = &system.Host1x().MemoryManager();
845 }
840 // Guard on sys_core; 846 // Guard on sys_core;
841 if (core == sys_core) [[unlikely]] { 847 if (core == sys_core) [[unlikely]] {
842 sys_core_guard.lock(); 848 sys_core_guard.lock();
@@ -846,17 +852,20 @@ struct Memory::Impl {
846 sys_core_guard.unlock(); 852 sys_core_guard.unlock();
847 } 853 }
848 }); 854 });
849 auto& current_area = rasterizer_write_areas[core]; 855 gpu_device_memory->ApplyOpOnPointer(
850 PAddr subaddress = address >> YUZU_PAGEBITS; 856 p, scratch_buffers[core], [&](DAddr address) {
851 bool do_collection = current_area.last_address == subaddress; 857 auto& current_area = rasterizer_write_areas[core];
852 if (!do_collection) [[unlikely]] { 858 PAddr subaddress = address >> YUZU_PAGEBITS;
853 do_collection = system.GPU().OnCPUWrite(address, size); 859 bool do_collection = current_area.last_address == subaddress;
854 if (!do_collection) { 860 if (!do_collection) [[unlikely]] {
855 return; 861 do_collection = system.GPU().OnCPUWrite(address, size);
862 if (!do_collection) {
863 return;
864 }
865 current_area.last_address = subaddress;
856 } 866 }
857 current_area.last_address = subaddress; 867 gpu_dirty_managers[core].Collect(address, size);
858 } 868 });
859 gpu_dirty_managers[core].Collect(address, size);
860 } 869 }
861 870
862 struct GPUDirtyState { 871 struct GPUDirtyState {
@@ -872,10 +881,12 @@ struct Memory::Impl {
872 } 881 }
873 882
874 Core::System& system; 883 Core::System& system;
884 Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
875 Common::PageTable* current_page_table = nullptr; 885 Common::PageTable* current_page_table = nullptr;
876 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> 886 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
877 rasterizer_read_areas{}; 887 rasterizer_read_areas{};
878 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; 888 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
889 std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{};
879 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; 890 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
880 std::mutex sys_core_guard; 891 std::mutex sys_core_guard;
881 892
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ca31e2fbd..71b748c74 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -554,9 +554,8 @@ void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::Cache
554 } 554 }
555} 555}
556 556
557bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { 557bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) {
558 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 558 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
559 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
560 if (addr == 0 || size == 0) { 559 if (addr == 0 || size == 0) {
561 return false; 560 return false;
562 } 561 }
@@ -577,9 +576,9 @@ bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) {
577 return false; 576 return false;
578} 577}
579 578
580void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) { 579void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) {
581 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 580 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
582 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); 581
583 if (addr == 0 || size == 0) { 582 if (addr == 0 || size == 0) {
584 return; 583 return;
585 } 584 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index efcc349a0..7db131985 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -602,8 +602,7 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::s
602 } 602 }
603} 603}
604 604
605bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { 605bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) {
606 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
607 if (addr == 0 || size == 0) { 606 if (addr == 0 || size == 0) {
608 return false; 607 return false;
609 } 608 }
@@ -624,8 +623,7 @@ bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) {
624 return false; 623 return false;
625} 624}
626 625
627void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) { 626void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) {
628 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
629 if (addr == 0 || size == 0) { 627 if (addr == 0 || size == 0) {
630 return; 628 return;
631 } 629 }