summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-12-25 07:32:16 +0100
committerGravatar Liam2024-01-18 21:12:30 -0500
commit0a2536a0df1f4aea406f2132d3edda0430acc9d1 (patch)
treec0ad53890581c9c7e180c5ccb3b66e3c63e3ba64
parentSMMU: Implement backing CPU page protect/unprotect (diff)
downloadyuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.gz
yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.xz
yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.zip
SMMU: Initial adaptation to video_core.
-rw-r--r--src/audio_core/device/device_session.cpp1
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp1
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/core.cpp2
-rw-r--r--src/core/core.h2
-rw-r--r--src/core/device_memory_manager.h43
-rw-r--r--src/core/device_memory_manager.inc72
-rw-r--r--src/core/gpu_dirty_memory_manager.h10
-rw-r--r--src/core/guest_memory.h218
-rw-r--r--src/core/hle/service/hle_ipc.cpp61
-rw-r--r--src/core/hle/service/hle_ipc.h9
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp64
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp57
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp4
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp6
-rw-r--r--src/core/memory.cpp25
-rw-r--r--src/core/memory.h205
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_base.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h450
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h98
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h18
-rw-r--r--src/video_core/buffer_cache/word_manager.h24
-rw-r--r--src/video_core/dma_pusher.cpp10
-rw-r--r--src/video_core/engines/engine_upload.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp25
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp5
-rw-r--r--src/video_core/framebuffer_config.h2
-rw-r--r--src/video_core/gpu.cpp28
-rw-r--r--src/video_core/gpu.h12
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h18
-rw-r--r--src/video_core/guest_memory.h29
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.h6
-rw-r--r--src/video_core/memory_manager.cpp204
-rw-r--r--src/video_core/memory_manager.h30
-rw-r--r--src/video_core/query_cache.h29
-rw-r--r--src/video_core/query_cache/query_base.h4
-rw-r--r--src/video_core/query_cache/query_cache.h22
-rw-r--r--src/video_core/query_cache/query_cache_base.h7
-rw-r--r--src/video_core/rasterizer_accelerated.cpp72
-rw-r--r--src/video_core/rasterizer_accelerated.h49
-rw-r--r--src/video_core/rasterizer_interface.h23
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h23
-rw-r--r--src/video_core/renderer_null/renderer_null.cpp5
-rw-r--r--src/video_core/renderer_null/renderer_null.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp38
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h11
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp9
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp10
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp43
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h38
-rw-r--r--src/video_core/shader_cache.cpp7
-rw-r--r--src/video_core/shader_cache.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h75
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h28
-rw-r--r--src/video_core/texture_cache/util.cpp4
-rw-r--r--src/video_core/video_core.cpp15
79 files changed, 1262 insertions, 1263 deletions
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index 3c214ec00..d9fc8c3e0 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -9,6 +9,7 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "core/core_timing.h" 10#include "core/core_timing.h"
11#include "core/memory.h" 11#include "core/memory.h"
12#include "core/guest_memory.h"
12 13
13#include "core/hle/kernel/k_process.h" 14#include "core/hle/kernel/k_process.h"
14 15
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index 911dae3c1..77a33a87a 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -10,6 +10,7 @@
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/scratch_buffer.h" 11#include "common/scratch_buffer.h"
12#include "core/memory.h" 12#include "core/memory.h"
13#include "core/guest_memory.h"
13 14
14namespace AudioCore::Renderer { 15namespace AudioCore::Renderer {
15 16
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 293d9647b..ca54eb6c6 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -37,6 +37,8 @@ add_library(core STATIC
37 debugger/gdbstub_arch.h 37 debugger/gdbstub_arch.h
38 debugger/gdbstub.cpp 38 debugger/gdbstub.cpp
39 debugger/gdbstub.h 39 debugger/gdbstub.h
40 device_memory_manager.h
41 device_memory_manager.inc
40 device_memory.cpp 42 device_memory.cpp
41 device_memory.h 43 device_memory.h
42 file_sys/fssystem/fs_i_storage.h 44 file_sys/fssystem/fs_i_storage.h
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 461eea9c8..04e1f13ff 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -651,7 +651,7 @@ size_t System::GetCurrentHostThreadID() const {
651 return impl->kernel.GetCurrentHostThreadID(); 651 return impl->kernel.GetCurrentHostThreadID();
652} 652}
653 653
654void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { 654void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) {
655 return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); 655 return this->ApplicationProcess()->GatherGPUDirtyMemory(callback);
656} 656}
657 657
diff --git a/src/core/core.h b/src/core/core.h
index ba5add0dc..20ec2ffff 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -224,7 +224,7 @@ public:
224 /// Prepare the core emulation for a reschedule 224 /// Prepare the core emulation for a reschedule
225 void PrepareReschedule(u32 core_index); 225 void PrepareReschedule(u32 core_index);
226 226
227 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); 227 void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback);
228 228
229 [[nodiscard]] size_t GetCurrentHostThreadID() const; 229 [[nodiscard]] size_t GetCurrentHostThreadID() const;
230 230
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
index 71b95016c..1a63cbd09 100644
--- a/src/core/device_memory_manager.h
+++ b/src/core/device_memory_manager.h
@@ -3,10 +3,11 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <deque>
7#include <memory>
8#include <array> 6#include <array>
9#include <atomic> 7#include <atomic>
8#include <deque>
9#include <memory>
10#include <mutex>
10 11
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "common/virtual_buffer.h" 13#include "common/virtual_buffer.h"
@@ -48,26 +49,54 @@ public:
48 template <typename T> 49 template <typename T>
49 const T* GetPointer(DAddr address) const; 50 const T* GetPointer(DAddr address) const;
50 51
52 DAddr GetAddressFromPAddr(PAddr address) const {
53 DAddr subbits = static_cast<DAddr>(address & page_mask);
54 return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits;
55 }
56
57 PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
58 PAddr subbits = static_cast<PAddr>(address & page_mask);
59 auto paddr = compressed_physical_ptr[(address >> page_bits)];
60 if (paddr == 0) {
61 return 0;
62 }
63 return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits;
64 }
65
51 template <typename T> 66 template <typename T>
52 void Write(DAddr address, T value); 67 void Write(DAddr address, T value);
53 68
54 template <typename T> 69 template <typename T>
55 T Read(DAddr address) const; 70 T Read(DAddr address) const;
56 71
72 const u8* GetSpan(const DAddr src_addr, const std::size_t size) const {
73 return nullptr;
74 }
75
76 u8* GetSpan(const DAddr src_addr, const std::size_t size) {
77 return nullptr;
78 }
79
57 void ReadBlock(DAddr address, void* dest_pointer, size_t size); 80 void ReadBlock(DAddr address, void* dest_pointer, size_t size);
58 void WriteBlock(DAddr address, void* src_pointer, size_t size); 81 void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
82 void WriteBlock(DAddr address, const void* src_pointer, size_t size);
83 void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
59 84
60 size_t RegisterProcess(Memory::Memory* memory); 85 size_t RegisterProcess(Memory::Memory* memory);
61 void UnregisterProcess(size_t id); 86 void UnregisterProcess(size_t id);
62 87
63 void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); 88 void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
64 89
90 static constexpr size_t AS_BITS = Traits::device_virtual_bits;
91
65private: 92private:
66 static constexpr bool supports_pinning = Traits::supports_pinning; 93 static constexpr bool supports_pinning = Traits::supports_pinning;
67 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; 94 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
68 static constexpr size_t device_as_size = 1ULL << device_virtual_bits; 95 static constexpr size_t device_as_size = 1ULL << device_virtual_bits;
69 static constexpr size_t physical_max_bits = 33; 96 static constexpr size_t physical_max_bits = 33;
70 static constexpr size_t page_bits = 12; 97 static constexpr size_t page_bits = 12;
98 static constexpr size_t page_size = 1ULL << page_bits;
99 static constexpr size_t page_mask = page_size - 1ULL;
71 static constexpr u32 physical_address_base = 1U << page_bits; 100 static constexpr u32 physical_address_base = 1U << page_bits;
72 101
73 template <typename T> 102 template <typename T>
@@ -136,11 +165,15 @@ private:
136 private: 165 private:
137 std::array<std::atomic_uint16_t, subentries> values{}; 166 std::array<std::atomic_uint16_t, subentries> values{};
138 }; 167 };
139 static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); 168 static_assert(sizeof(CounterEntry) == subentries * sizeof(u16),
169 "CounterEntry should be 8 bytes!");
140 170
141 static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; 171 static constexpr size_t num_counter_entries =
172 (1ULL << (device_virtual_bits - page_bits)) / subentries;
142 using CachedPages = std::array<CounterEntry, num_counter_entries>; 173 using CachedPages = std::array<CounterEntry, num_counter_entries>;
143 std::unique_ptr<CachedPages> cached_pages; 174 std::unique_ptr<CachedPages> cached_pages;
175 std::mutex counter_guard;
176 std::mutex mapping_guard;
144}; 177};
145 178
146} // namespace Core \ No newline at end of file 179} // namespace Core \ No newline at end of file
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 77410f72f..8c5f82d31 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -105,7 +105,8 @@ template <typename Traits>
105DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) 105DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_)
106 : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, 106 : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())},
107 interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), 107 interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
108 compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { 108 compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)),
109 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
109 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); 110 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
110 cached_pages = std::make_unique<CachedPages>(); 111 cached_pages = std::make_unique<CachedPages>();
111} 112}
@@ -144,10 +145,10 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
144 Core::Memory::Memory* process_memory = registered_processes[process_id]; 145 Core::Memory::Memory* process_memory = registered_processes[process_id];
145 size_t start_page_d = address >> Memory::YUZU_PAGEBITS; 146 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
146 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; 147 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
147 std::atomic_thread_fence(std::memory_order_acquire); 148 std::scoped_lock lk(mapping_guard);
148 for (size_t i = 0; i < num_pages; i++) { 149 for (size_t i = 0; i < num_pages; i++) {
149 const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; 150 const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
150 auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); 151 auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress));
151 if (ptr == nullptr) [[unlikely]] { 152 if (ptr == nullptr) [[unlikely]] {
152 compressed_physical_ptr[start_page_d + i] = 0; 153 compressed_physical_ptr[start_page_d + i] = 0;
153 continue; 154 continue;
@@ -157,14 +158,14 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
157 compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); 158 compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
158 InsertCPUBacking(start_page_d + i, new_vaddress, process_id); 159 InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
159 } 160 }
160 std::atomic_thread_fence(std::memory_order_release);
161} 161}
162 162
163template <typename Traits> 163template <typename Traits>
164void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { 164void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
165 size_t start_page_d = address >> Memory::YUZU_PAGEBITS; 165 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
166 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; 166 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
167 std::atomic_thread_fence(std::memory_order_acquire); 167 interface->InvalidateRegion(address, size);
168 std::scoped_lock lk(mapping_guard);
168 for (size_t i = 0; i < num_pages; i++) { 169 for (size_t i = 0; i < num_pages; i++) {
169 auto phys_addr = compressed_physical_ptr[start_page_d + i]; 170 auto phys_addr = compressed_physical_ptr[start_page_d + i];
170 compressed_physical_ptr[start_page_d + i] = 0; 171 compressed_physical_ptr[start_page_d + i] = 0;
@@ -173,7 +174,6 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
173 compressed_device_addr[phys_addr - 1] = 0; 174 compressed_device_addr[phys_addr - 1] = 0;
174 } 175 }
175 } 176 }
176 std::atomic_thread_fence(std::memory_order_release);
177} 177}
178 178
179template <typename Traits> 179template <typename Traits>
@@ -256,6 +256,45 @@ void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto o
256 256
257template <typename Traits> 257template <typename Traits>
258void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { 258void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) {
259 interface->FlushRegion(address, size);
260 WalkBlock(
261 address, size,
262 [&](size_t copy_amount, DAddr current_vaddr) {
263 LOG_ERROR(
264 HW_Memory,
265 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
266 current_vaddr, address, size);
267 std::memset(dest_pointer, 0, copy_amount);
268 },
269 [&](size_t copy_amount, const u8* const src_ptr) {
270 std::memcpy(dest_pointer, src_ptr, copy_amount);
271 },
272 [&](const std::size_t copy_amount) {
273 dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
274 });
275}
276
277template <typename Traits>
278void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) {
279 WalkBlock(
280 address, size,
281 [&](size_t copy_amount, DAddr current_vaddr) {
282 LOG_ERROR(
283 HW_Memory,
284 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
285 current_vaddr, address, size);
286 },
287 [&](size_t copy_amount, u8* const dst_ptr) {
288 std::memcpy(dst_ptr, src_pointer, copy_amount);
289 },
290 [&](const std::size_t copy_amount) {
291 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
292 });
293 interface->InvalidateRegion(address, size);
294}
295
296template <typename Traits>
297void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) {
259 WalkBlock( 298 WalkBlock(
260 address, size, 299 address, size,
261 [&](size_t copy_amount, DAddr current_vaddr) { 300 [&](size_t copy_amount, DAddr current_vaddr) {
@@ -274,7 +313,8 @@ void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, s
274} 313}
275 314
276template <typename Traits> 315template <typename Traits>
277void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, size_t size) { 316void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
317 size_t size) {
278 WalkBlock( 318 WalkBlock(
279 address, size, 319 address, size,
280 [&](size_t copy_amount, DAddr current_vaddr) { 320 [&](size_t copy_amount, DAddr current_vaddr) {
@@ -287,7 +327,7 @@ void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, s
287 std::memcpy(dst_ptr, src_pointer, copy_amount); 327 std::memcpy(dst_ptr, src_pointer, copy_amount);
288 }, 328 },
289 [&](const std::size_t copy_amount) { 329 [&](const std::size_t copy_amount) {
290 src_pointer = static_cast<u8*>(src_pointer) + copy_amount; 330 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
291 }); 331 });
292} 332}
293 333
@@ -313,6 +353,18 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) {
313 353
314template <typename Traits> 354template <typename Traits>
315void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { 355void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
356 bool locked = false;
357 auto lock = [&] {
358 if (!locked) {
359 counter_guard.lock();
360 locked = true;
361 }
362 };
363 SCOPE_EXIT({
364 if (locked) {
365 counter_guard.unlock();
366 }
367 });
316 u64 uncache_begin = 0; 368 u64 uncache_begin = 0;
317 u64 cache_begin = 0; 369 u64 cache_begin = 0;
318 u64 uncache_bytes = 0; 370 u64 uncache_bytes = 0;
@@ -347,6 +399,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
347 } 399 }
348 uncache_bytes += Memory::YUZU_PAGESIZE; 400 uncache_bytes += Memory::YUZU_PAGESIZE;
349 } else if (uncache_bytes > 0) { 401 } else if (uncache_bytes > 0) {
402 lock();
350 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, 403 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS,
351 uncache_bytes, false); 404 uncache_bytes, false);
352 uncache_bytes = 0; 405 uncache_bytes = 0;
@@ -357,6 +410,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
357 } 410 }
358 cache_bytes += Memory::YUZU_PAGESIZE; 411 cache_bytes += Memory::YUZU_PAGESIZE;
359 } else if (cache_bytes > 0) { 412 } else if (cache_bytes > 0) {
413 lock();
360 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, 414 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
361 true); 415 true);
362 cache_bytes = 0; 416 cache_bytes = 0;
@@ -364,10 +418,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
364 vpage++; 418 vpage++;
365 } 419 }
366 if (uncache_bytes > 0) { 420 if (uncache_bytes > 0) {
421 lock();
367 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, 422 MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
368 false); 423 false);
369 } 424 }
370 if (cache_bytes > 0) { 425 if (cache_bytes > 0) {
426 lock();
371 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, 427 MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
372 true); 428 true);
373 } 429 }
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
index 9687531e8..f1abf4f83 100644
--- a/src/core/gpu_dirty_memory_manager.h
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -23,7 +23,7 @@ public:
23 23
24 ~GPUDirtyMemoryManager() = default; 24 ~GPUDirtyMemoryManager() = default;
25 25
26 void Collect(VAddr address, size_t size) { 26 void Collect(PAddr address, size_t size) {
27 TransformAddress t = BuildTransform(address, size); 27 TransformAddress t = BuildTransform(address, size);
28 TransformAddress tmp, original; 28 TransformAddress tmp, original;
29 do { 29 do {
@@ -47,7 +47,7 @@ public:
47 std::memory_order_relaxed)); 47 std::memory_order_relaxed));
48 } 48 }
49 49
50 void Gather(std::function<void(VAddr, size_t)>& callback) { 50 void Gather(std::function<void(PAddr, size_t)>& callback) {
51 { 51 {
52 std::scoped_lock lk(guard); 52 std::scoped_lock lk(guard);
53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); 53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
@@ -65,7 +65,7 @@ public:
65 mask = mask >> empty_bits; 65 mask = mask >> empty_bits;
66 66
67 const size_t continuous_bits = std::countr_one(mask); 67 const size_t continuous_bits = std::countr_one(mask);
68 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, 68 callback((static_cast<PAddr>(transform.address) << page_bits) + offset,
69 continuous_bits << align_bits); 69 continuous_bits << align_bits);
70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; 70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
71 offset += continuous_bits << align_bits; 71 offset += continuous_bits << align_bits;
@@ -89,7 +89,7 @@ private:
89 constexpr static size_t align_mask = align_size - 1; 89 constexpr static size_t align_mask = align_size - 1;
90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; 90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
91 91
92 bool IsValid(VAddr address) { 92 bool IsValid(PAddr address) {
93 return address < (1ULL << 39); 93 return address < (1ULL << 39);
94 } 94 }
95 95
@@ -103,7 +103,7 @@ private:
103 return mask; 103 return mask;
104 } 104 }
105 105
106 TransformAddress BuildTransform(VAddr address, size_t size) { 106 TransformAddress BuildTransform(PAddr address, size_t size) {
107 const size_t minor_address = address & page_mask; 107 const size_t minor_address = address & page_mask;
108 const size_t minor_bit = minor_address >> align_bits; 108 const size_t minor_bit = minor_address >> align_bits;
109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits; 109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h
new file mode 100644
index 000000000..0b349cc17
--- /dev/null
+++ b/src/core/guest_memory.h
@@ -0,0 +1,218 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <iterator>
7#include <memory>
8#include <optional>
9#include <span>
10#include <vector>
11
12#include "common/scratch_buffer.h"
13#include "core/memory.h"
14
15namespace Core::Memory {
16
17enum GuestMemoryFlags : u32 {
18 Read = 1 << 0,
19 Write = 1 << 1,
20 Safe = 1 << 2,
21 Cached = 1 << 3,
22
23 SafeRead = Read | Safe,
24 SafeWrite = Write | Safe,
25 SafeReadWrite = SafeRead | SafeWrite,
26 SafeReadCachedWrite = SafeReadWrite | Cached,
27
28 UnsafeRead = Read,
29 UnsafeWrite = Write,
30 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
31 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
32};
33
34namespace {
35template <typename M, typename T, GuestMemoryFlags FLAGS>
36class GuestMemory {
37 using iterator = T*;
38 using const_iterator = const T*;
39 using value_type = T;
40 using element_type = T;
41 using iterator_category = std::contiguous_iterator_tag;
42
43public:
44 GuestMemory() = delete;
45 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
46 Common::ScratchBuffer<T>* backup = nullptr)
47 : m_memory{memory}, m_addr{addr}, m_size{size} {
48 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
49 if constexpr (FLAGS & GuestMemoryFlags::Read) {
50 Read(addr, size, backup);
51 }
52 }
53
54 ~GuestMemory() = default;
55
56 T* data() noexcept {
57 return m_data_span.data();
58 }
59
60 const T* data() const noexcept {
61 return m_data_span.data();
62 }
63
64 size_t size() const noexcept {
65 return m_size;
66 }
67
68 size_t size_bytes() const noexcept {
69 return this->size() * sizeof(T);
70 }
71
72 [[nodiscard]] T* begin() noexcept {
73 return this->data();
74 }
75
76 [[nodiscard]] const T* begin() const noexcept {
77 return this->data();
78 }
79
80 [[nodiscard]] T* end() noexcept {
81 return this->data() + this->size();
82 }
83
84 [[nodiscard]] const T* end() const noexcept {
85 return this->data() + this->size();
86 }
87
88 T& operator[](size_t index) noexcept {
89 return m_data_span[index];
90 }
91
92 const T& operator[](size_t index) const noexcept {
93 return m_data_span[index];
94 }
95
96 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
97 m_addr = addr;
98 m_size = size;
99 m_addr_changed = true;
100 }
101
102 std::span<T> Read(u64 addr, std::size_t size,
103 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
104 m_addr = addr;
105 m_size = size;
106 if (m_size == 0) {
107 m_is_data_copy = true;
108 return {};
109 }
110
111 if (this->TrySetSpan()) {
112 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
113 m_memory.FlushRegion(m_addr, this->size_bytes());
114 }
115 } else {
116 if (backup) {
117 backup->resize_destructive(this->size());
118 m_data_span = *backup;
119 } else {
120 m_data_copy.resize(this->size());
121 m_data_span = std::span(m_data_copy);
122 }
123 m_is_data_copy = true;
124 m_span_valid = true;
125 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
126 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
127 } else {
128 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
129 }
130 }
131 return m_data_span;
132 }
133
134 void Write(std::span<T> write_data) noexcept {
135 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
136 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
137 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
138 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
139 } else {
140 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
141 }
142 }
143
144 bool TrySetSpan() noexcept {
145 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
146 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
147 m_span_valid = true;
148 return true;
149 }
150 return false;
151 }
152
153protected:
154 bool IsDataCopy() const noexcept {
155 return m_is_data_copy;
156 }
157
158 bool AddressChanged() const noexcept {
159 return m_addr_changed;
160 }
161
162 M& m_memory;
163 u64 m_addr{};
164 size_t m_size{};
165 std::span<T> m_data_span{};
166 std::vector<T> m_data_copy{};
167 bool m_span_valid{false};
168 bool m_is_data_copy{false};
169 bool m_addr_changed{false};
170};
171
172template <typename M, typename T, GuestMemoryFlags FLAGS>
173class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
174public:
175 GuestMemoryScoped() = delete;
176 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
177 Common::ScratchBuffer<T>* backup = nullptr)
178 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
179 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
180 if (!this->TrySetSpan()) {
181 if (backup) {
182 this->m_data_span = *backup;
183 this->m_span_valid = true;
184 this->m_is_data_copy = true;
185 }
186 }
187 }
188 }
189
190 ~GuestMemoryScoped() {
191 if constexpr (FLAGS & GuestMemoryFlags::Write) {
192 if (this->size() == 0) [[unlikely]] {
193 return;
194 }
195
196 if (this->AddressChanged() || this->IsDataCopy()) {
197 ASSERT(this->m_span_valid);
198 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
199 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
200 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
201 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
202 } else {
203 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
204 }
205 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || (FLAGS & GuestMemoryFlags::Cached)) {
206 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
207 }
208 }
209 }
210};
211} // namespace
212
213template <typename T, GuestMemoryFlags FLAGS>
214using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>;
215template <typename T, GuestMemoryFlags FLAGS>
216using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>;
217
218} // namespace Tegra::Memory
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp
index 3f38ceb03..9f6274c7d 100644
--- a/src/core/hle/service/hle_ipc.cpp
+++ b/src/core/hle/service/hle_ipc.cpp
@@ -22,19 +22,7 @@
22#include "core/hle/service/hle_ipc.h" 22#include "core/hle/service/hle_ipc.h"
23#include "core/hle/service/ipc_helpers.h" 23#include "core/hle/service/ipc_helpers.h"
24#include "core/memory.h" 24#include "core/memory.h"
25 25#include "core/guest_memory.h"
26namespace {
27static thread_local std::array read_buffer_data_a{
28 Common::ScratchBuffer<u8>(),
29 Common::ScratchBuffer<u8>(),
30 Common::ScratchBuffer<u8>(),
31};
32static thread_local std::array read_buffer_data_x{
33 Common::ScratchBuffer<u8>(),
34 Common::ScratchBuffer<u8>(),
35 Common::ScratchBuffer<u8>(),
36};
37} // Anonymous namespace
38 26
39namespace Service { 27namespace Service {
40 28
@@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
343} 331}
344 332
345std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { 333std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const {
346 static thread_local std::array read_buffer_a{ 334 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
347 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
348 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
349 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
350 };
351 335
352 ASSERT_OR_EXECUTE_MSG( 336 ASSERT_OR_EXECUTE_MSG(
353 BufferDescriptorA().size() > buffer_index, { return {}; }, 337 BufferDescriptorA().size() > buffer_index, { return {}; },
354 "BufferDescriptorA invalid buffer_index {}", buffer_index); 338 "BufferDescriptorA invalid buffer_index {}", buffer_index);
355 auto& read_buffer = read_buffer_a[buffer_index]; 339 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
356 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 340 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
357 BufferDescriptorA()[buffer_index].Size(),
358 &read_buffer_data_a[buffer_index]);
359} 341}
360 342
361std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { 343std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const {
362 static thread_local std::array read_buffer_x{ 344 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
363 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
364 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
365 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
366 };
367 345
368 ASSERT_OR_EXECUTE_MSG( 346 ASSERT_OR_EXECUTE_MSG(
369 BufferDescriptorX().size() > buffer_index, { return {}; }, 347 BufferDescriptorX().size() > buffer_index, { return {}; },
370 "BufferDescriptorX invalid buffer_index {}", buffer_index); 348 "BufferDescriptorX invalid buffer_index {}", buffer_index);
371 auto& read_buffer = read_buffer_x[buffer_index]; 349 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
372 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 350 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
373 BufferDescriptorX()[buffer_index].Size(),
374 &read_buffer_data_x[buffer_index]);
375} 351}
376 352
377std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 353std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
378 static thread_local std::array read_buffer_a{ 354 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
379 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
380 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
381 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
382 };
383 static thread_local std::array read_buffer_x{
384 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
385 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
386 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
387 };
388 355
389 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 356 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
390 BufferDescriptorA()[buffer_index].Size()}; 357 BufferDescriptorA()[buffer_index].Size()};
@@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
401 ASSERT_OR_EXECUTE_MSG( 368 ASSERT_OR_EXECUTE_MSG(
402 BufferDescriptorA().size() > buffer_index, { return {}; }, 369 BufferDescriptorA().size() > buffer_index, { return {}; },
403 "BufferDescriptorA invalid buffer_index {}", buffer_index); 370 "BufferDescriptorA invalid buffer_index {}", buffer_index);
404 auto& read_buffer = read_buffer_a[buffer_index]; 371 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
405 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 372 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
406 BufferDescriptorA()[buffer_index].Size(),
407 &read_buffer_data_a[buffer_index]);
408 } else { 373 } else {
409 ASSERT_OR_EXECUTE_MSG( 374 ASSERT_OR_EXECUTE_MSG(
410 BufferDescriptorX().size() > buffer_index, { return {}; }, 375 BufferDescriptorX().size() > buffer_index, { return {}; },
411 "BufferDescriptorX invalid buffer_index {}", buffer_index); 376 "BufferDescriptorX invalid buffer_index {}", buffer_index);
412 auto& read_buffer = read_buffer_x[buffer_index]; 377 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
413 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 378 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
414 BufferDescriptorX()[buffer_index].Size(),
415 &read_buffer_data_x[buffer_index]);
416 } 379 }
417} 380}
418 381
diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h
index d550a11b7..8329d7265 100644
--- a/src/core/hle/service/hle_ipc.h
+++ b/src/core/hle/service/hle_ipc.h
@@ -19,8 +19,6 @@
19#include "core/hle/ipc.h" 19#include "core/hle/ipc.h"
20#include "core/hle/kernel/k_handle_table.h" 20#include "core/hle/kernel/k_handle_table.h"
21#include "core/hle/kernel/svc_common.h" 21#include "core/hle/kernel/svc_common.h"
22#include "core/hle/kernel/k_auto_object.h"
23#include "core/hle/kernel/k_handle_table.h"
24 22
25union Result; 23union Result;
26 24
@@ -377,10 +375,6 @@ public:
377 return nullptr; 375 return nullptr;
378 } 376 }
379 377
380 Kernel::KScopedAutoObject<Kernel::KAutoObject> GetObjectFromHandle(u32 handle) {
381 return GetClientHandleTable().GetObjectForIpc(handle, thread);
382 }
383
384 [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { 378 [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const {
385 return manager.lock(); 379 return manager.lock();
386 } 380 }
@@ -432,6 +426,9 @@ private:
432 426
433 Kernel::KernelCore& kernel; 427 Kernel::KernelCore& kernel;
434 Core::Memory::Memory& memory; 428 Core::Memory::Memory& memory;
429
430 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{};
431 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{};
435}; 432};
436 433
437} // namespace Service 434} // namespace Service
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index fd6c9aa0c..7879c6f04 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -2,6 +2,8 @@
2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors 2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include <functional>
6
5#include "common/alignment.h" 7#include "common/alignment.h"
6#include "common/assert.h" 8#include "common/assert.h"
7#include "common/logging/log.h" 9#include "common/logging/log.h"
@@ -18,6 +20,7 @@ NvMap::Handle::Handle(u64 size_, Id id_)
18} 20}
19 21
20NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { 22NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) {
23 std::scoped_lock lock(mutex);
21 // Handles cannot be allocated twice 24 // Handles cannot be allocated twice
22 if (allocated) { 25 if (allocated) {
23 return NvResult::AccessDenied; 26 return NvResult::AccessDenied;
@@ -78,11 +81,9 @@ void NvMap::UnmapHandle(Handle& handle_description) {
78 81
79 // Free and unmap the handle from the SMMU 82 // Free and unmap the handle from the SMMU
80 auto& smmu = host1x.MemoryManager(); 83 auto& smmu = host1x.MemoryManager();
81 smmu.Unmap(static_cast<DAddr>(handle_description.pin_virt_address), 84 smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
82 handle_description.aligned_size); 85 smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size));
83 smmu.Free(handle_description.pin_virt_address, 86 handle_description.d_address = 0;
84 static_cast<size_t>(handle_description.aligned_size));
85 handle_description.pin_virt_address = 0;
86} 87}
87 88
88bool NvMap::TryRemoveHandle(const Handle& handle_description) { 89bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -123,41 +124,16 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
123 } 124 }
124} 125}
125 126
126VAddr NvMap::GetHandleAddress(Handle::Id handle) { 127DAddr NvMap::GetHandleAddress(Handle::Id handle) {
127 std::scoped_lock lock(handles_lock); 128 std::scoped_lock lock(handles_lock);
128 try { 129 try {
129 return handles.at(handle)->address; 130 return handles.at(handle)->d_address;
130 } catch (std::out_of_range&) { 131 } catch (std::out_of_range&) {
131 return 0; 132 return 0;
132 } 133 }
133} 134}
134 135
135NvResult NvMap::AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id) { 136DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_area_pin) {
136 auto handle_description{GetHandle(handle)};
137 if (!handle_description) [[unlikely]] {
138 return NvResult::BadParameter;
139 }
140
141 if (handle_description->allocated) [[unlikely]] {
142 return NvResult::InsufficientMemory;
143 }
144
145 std::scoped_lock lock(handle_description->mutex);
146 NvResult result = handle_description->Alloc(pFlags, pAlign, pKind, pAddress);
147 if (result != NvResult::Success) {
148 return result;
149 }
150 auto& smmu = host1x.MemoryManager();
151 size_t total_size = static_cast<size_t>(handle_description->aligned_size);
152 handle_description->d_address = smmu.Allocate(total_size);
153 if (handle_description->d_address == 0) {
154 return NvResult::InsufficientMemory;
155 }
156 smmu.Map(handle_description->d_address, handle_description->address, total_size, session_id);
157 return NvResult::Success;
158}
159
160u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) {
161 auto handle_description{GetHandle(handle)}; 137 auto handle_description{GetHandle(handle)};
162 if (!handle_description) [[unlikely]] { 138 if (!handle_description) [[unlikely]] {
163 return 0; 139 return 0;
@@ -176,35 +152,38 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) {
176 handle_description->unmap_queue_entry.reset(); 152 handle_description->unmap_queue_entry.reset();
177 153
178 handle_description->pins++; 154 handle_description->pins++;
179 return handle_description->pin_virt_address; 155 return handle_description->d_address;
180 } 156 }
181 } 157 }
182 158
159 using namespace std::placeholders;
183 // If not then allocate some space and map it 160 // If not then allocate some space and map it
184 DAddr address{}; 161 DAddr address{};
185 auto& smmu = host1x.MemoryManager(); 162 auto& smmu = host1x.MemoryManager();
186 while ((address = smmu.AllocatePinned( 163 auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
187 static_cast<size_t>(handle_description->aligned_size))) == 0) { 164 //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
165 while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
188 // Free handles until the allocation succeeds 166 // Free handles until the allocation succeeds
189 std::scoped_lock queueLock(unmap_queue_lock); 167 std::scoped_lock queueLock(unmap_queue_lock);
190 if (auto freeHandleDesc{unmap_queue.front()}) { 168 if (auto freeHandleDesc{unmap_queue.front()}) {
191 // Handles in the unmap queue are guaranteed not to be pinned so don't bother 169 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
192 // checking if they are before unmapping 170 // checking if they are before unmapping
193 std::scoped_lock freeLock(freeHandleDesc->mutex); 171 std::scoped_lock freeLock(freeHandleDesc->mutex);
194 if (handle_description->pin_virt_address) 172 if (handle_description->d_address)
195 UnmapHandle(*freeHandleDesc); 173 UnmapHandle(*freeHandleDesc);
196 } else { 174 } else {
197 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); 175 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
198 } 176 }
199 } 177 }
200 178
179 handle_description->d_address = address;
180
201 smmu.Map(address, handle_description->address, handle_description->aligned_size, 181 smmu.Map(address, handle_description->address, handle_description->aligned_size,
202 session_id); 182 session_id);
203 handle_description->pin_virt_address = static_cast<u32>(address);
204 } 183 }
205 184
206 handle_description->pins++; 185 handle_description->pins++;
207 return handle_description->pin_virt_address; 186 return handle_description->d_address;
208} 187}
209 188
210void NvMap::UnpinHandle(Handle::Id handle) { 189void NvMap::UnpinHandle(Handle::Id handle) {
@@ -255,15 +234,10 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna
255 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); 234 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
256 } else if (handle_description->dupes == 0) { 235 } else if (handle_description->dupes == 0) {
257 // Force unmap the handle 236 // Force unmap the handle
258 if (handle_description->pin_virt_address) { 237 if (handle_description->d_address) {
259 std::scoped_lock queueLock(unmap_queue_lock); 238 std::scoped_lock queueLock(unmap_queue_lock);
260 UnmapHandle(*handle_description); 239 UnmapHandle(*handle_description);
261 } 240 }
262 if (handle_description->allocated) {
263 auto& smmu = host1x.MemoryManager();
264 smmu.Free(handle_description->d_address, handle_description->aligned_size);
265 smmu.Unmap(handle_description->d_address, handle_description->aligned_size);
266 }
267 241
268 handle_description->pins = 0; 242 handle_description->pins = 0;
269 } 243 }
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index 7c3110d91..e9e9e8b5b 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -48,7 +48,7 @@ public:
48 using Id = u32; 48 using Id = u32;
49 Id id; //!< A globally unique identifier for this handle 49 Id id; //!< A globally unique identifier for this handle
50 50
51 s32 pins{}; 51 s64 pins{};
52 u32 pin_virt_address{}; 52 u32 pin_virt_address{};
53 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; 53 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
54 54
@@ -63,15 +63,14 @@ public:
63 63
64 VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to, 64 VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to,
65 //!< this can also be in the nvdrv tmem 65 //!< this can also be in the nvdrv tmem
66 DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
67 //!< this can also be in the nvdrv tmem
68 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC 66 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
69 //!< call 67 //!< call
70 68
71 u8 kind{}; //!< Used for memory compression 69 u8 kind{}; //!< Used for memory compression
72 bool allocated{}; //!< If the handle has been allocated with `Alloc` 70 bool allocated{}; //!< If the handle has been allocated with `Alloc`
73 71
74 u64 dma_map_addr{}; //! remove me after implementing pinning. 72 DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
73 //!< this can also be in the nvdrv tmem
75 74
76 Handle(u64 size, Id id); 75 Handle(u64 size, Id id);
77 76
@@ -119,15 +118,7 @@ public:
119 118
120 std::shared_ptr<Handle> GetHandle(Handle::Id handle); 119 std::shared_ptr<Handle> GetHandle(Handle::Id handle);
121 120
122 VAddr GetHandleAddress(Handle::Id handle); 121 DAddr GetHandleAddress(Handle::Id handle);
123
124 /**
125 * @brief Maps a handle into the SMMU address space
126 * @note This operation is refcounted, the number of calls to this must eventually match the
127 * number of calls to `UnpinHandle`
128 * @return The SMMU virtual address that the handle has been mapped to
129 */
130 u32 PinHandle(Handle::Id handle, size_t session_id);
131 122
132 /** 123 /**
133 * @brief Maps a handle into the SMMU address space 124 * @brief Maps a handle into the SMMU address space
@@ -135,7 +126,7 @@ public:
135 * number of calls to `UnpinHandle` 126 * number of calls to `UnpinHandle`
136 * @return The SMMU virtual address that the handle has been mapped to 127 * @return The SMMU virtual address that the handle has been mapped to
137 */ 128 */
138 NvResult AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id); 129 DAddr PinHandle(Handle::Id handle, size_t session_id, bool low_area_pin);
139 130
140 /** 131 /**
141 * @brief When this has been called an equal number of times to `PinHandle` for the supplied 132 * @brief When this has been called an equal number of times to `PinHandle` for the supplied
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 0ff41c6b2..f1404b9da 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -42,7 +42,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
42 u32 height, u32 stride, android::BufferTransformFlags transform, 42 u32 height, u32 stride, android::BufferTransformFlags transform,
43 const Common::Rectangle<int>& crop_rect, 43 const Common::Rectangle<int>& crop_rect,
44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { 44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
45 const VAddr addr = nvmap.GetHandleAddress(buffer_handle); 45 const DAddr addr = nvmap.GetHandleAddress(buffer_handle);
46 LOG_TRACE(Service, 46 LOG_TRACE(Service,
47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
48 addr, offset, width, height, stride, format); 48 addr, offset, width, height, stride, format);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index c92a7b2f6..8bc10eac2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -40,15 +40,15 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
40 case 0x3: 40 case 0x3:
41 return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); 41 return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output);
42 case 0x5: 42 case 0x5:
43 return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); 43 return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output, fd);
44 case 0x6: 44 case 0x6:
45 return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); 45 return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output, fd);
46 case 0x8: 46 case 0x8:
47 return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); 47 return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output);
48 case 0x9: 48 case 0x9:
49 return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); 49 return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output);
50 case 0x14: 50 case 0x14:
51 return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); 51 return WrapVariable(this, &nvhost_as_gpu::Remap, input, output, fd);
52 default: 52 default:
53 break; 53 break;
54 } 54 }
@@ -86,8 +86,15 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
86 return NvResult::NotImplemented; 86 return NvResult::NotImplemented;
87} 87}
88 88
89void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {} 89void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {
90void nvhost_as_gpu::OnClose(DeviceFD fd) {} 90 sessions[fd] = session_id;
91}
92void nvhost_as_gpu::OnClose(DeviceFD fd) {
93 auto it = sessions.find(fd);
94 if (it != sessions.end()) {
95 sessions.erase(it);
96 }
97}
91 98
92NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { 99NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
93 LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); 100 LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size);
@@ -206,6 +213,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
206 static_cast<u32>(aligned_size >> page_size_bits)); 213 static_cast<u32>(aligned_size >> page_size_bits));
207 } 214 }
208 215
216 nvmap.UnpinHandle(mapping->handle);
217
209 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state 218 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
210 // Only FreeSpace can unmap them fully 219 // Only FreeSpace can unmap them fully
211 if (mapping->sparse_alloc) { 220 if (mapping->sparse_alloc) {
@@ -259,7 +268,7 @@ NvResult nvhost_as_gpu::FreeSpace(IoctlFreeSpace& params) {
259 return NvResult::Success; 268 return NvResult::Success;
260} 269}
261 270
262NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { 271NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries, DeviceFD fd) {
263 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); 272 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size());
264 273
265 if (!vm.initialised) { 274 if (!vm.initialised) {
@@ -293,19 +302,19 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) {
293 return NvResult::BadValue; 302 return NvResult::BadValue;
294 } 303 }
295 304
296 VAddr cpu_address{static_cast<VAddr>( 305 DAddr base = nvmap.PinHandle(entry.handle, sessions[fd], false);
297 handle->address + 306 DAddr device_address{static_cast<DAddr>(
298 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; 307 base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
299 308
300 gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), 309 gmmu->Map(virtual_address, device_address, size,
301 use_big_pages); 310 static_cast<Tegra::PTEKind>(entry.kind), use_big_pages);
302 } 311 }
303 } 312 }
304 313
305 return NvResult::Success; 314 return NvResult::Success;
306} 315}
307 316
308NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { 317NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd) {
309 LOG_DEBUG(Service_NVDRV, 318 LOG_DEBUG(Service_NVDRV,
310 "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" 319 "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}"
311 ", offset={}", 320 ", offset={}",
@@ -331,9 +340,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
331 } 340 }
332 341
333 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; 342 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
334 VAddr cpu_address{mapping->ptr + params.buffer_offset}; 343 VAddr device_address{mapping->ptr + params.buffer_offset};
335 344
336 gmmu->Map(gpu_address, cpu_address, params.mapping_size, 345 gmmu->Map(gpu_address, device_address, params.mapping_size,
337 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); 346 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page);
338 347
339 return NvResult::Success; 348 return NvResult::Success;
@@ -349,7 +358,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
349 return NvResult::BadValue; 358 return NvResult::BadValue;
350 } 359 }
351 360
352 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; 361 DAddr device_address{static_cast<DAddr>(nvmap.PinHandle(params.handle, sessions[fd], false) +
362 params.buffer_offset)};
353 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; 363 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
354 364
355 bool big_page{[&]() { 365 bool big_page{[&]() {
@@ -373,15 +383,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
373 } 383 }
374 384
375 const bool use_big_pages = alloc->second.big_pages && big_page; 385 const bool use_big_pages = alloc->second.big_pages && big_page;
376 gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), 386 gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind),
377 use_big_pages); 387 use_big_pages);
378 388
379 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, 389 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
380 use_big_pages, alloc->second.sparse)}; 390 true, use_big_pages, alloc->second.sparse)};
381 alloc->second.mappings.push_back(mapping); 391 alloc->second.mappings.push_back(mapping);
382 mapping_map[params.offset] = mapping; 392 mapping_map[params.offset] = mapping;
383 } else { 393 } else {
384
385 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; 394 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
386 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; 395 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
387 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; 396 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
@@ -394,18 +403,18 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
394 return NvResult::InsufficientMemory; 403 return NvResult::InsufficientMemory;
395 } 404 }
396 405
397 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), 406 gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size),
398 static_cast<Tegra::PTEKind>(params.kind), big_page); 407 static_cast<Tegra::PTEKind>(params.kind), big_page);
399 408
400 auto mapping{ 409 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
401 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; 410 false, big_page, false)};
402 mapping_map[params.offset] = mapping; 411 mapping_map[params.offset] = mapping;
403 } 412 }
404 413
405 return NvResult::Success; 414 return NvResult::Success;
406} 415}
407 416
408NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { 417NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd) {
409 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); 418 LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
410 419
411 std::scoped_lock lock(mutex); 420 std::scoped_lock lock(mutex);
@@ -433,6 +442,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) {
433 gmmu->Unmap(params.offset, mapping->size); 442 gmmu->Unmap(params.offset, mapping->size);
434 } 443 }
435 444
445 nvmap.UnpinHandle(mapping->handle);
446
436 mapping_map.erase(params.offset); 447 mapping_map.erase(params.offset);
437 } catch (const std::out_of_range&) { 448 } catch (const std::out_of_range&) {
438 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); 449 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 0dd279f88..4b28f5078 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -141,9 +141,9 @@ private:
141 141
142 NvResult AllocAsEx(IoctlAllocAsEx& params); 142 NvResult AllocAsEx(IoctlAllocAsEx& params);
143 NvResult AllocateSpace(IoctlAllocSpace& params); 143 NvResult AllocateSpace(IoctlAllocSpace& params);
144 NvResult Remap(std::span<IoctlRemapEntry> params); 144 NvResult Remap(std::span<IoctlRemapEntry> params, DeviceFD fd);
145 NvResult MapBufferEx(IoctlMapBufferEx& params); 145 NvResult MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd);
146 NvResult UnmapBuffer(IoctlUnmapBuffer& params); 146 NvResult UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd);
147 NvResult FreeSpace(IoctlFreeSpace& params); 147 NvResult FreeSpace(IoctlFreeSpace& params);
148 NvResult BindChannel(IoctlBindChannel& params); 148 NvResult BindChannel(IoctlBindChannel& params);
149 149
@@ -159,16 +159,18 @@ private:
159 NvCore::NvMap& nvmap; 159 NvCore::NvMap& nvmap;
160 160
161 struct Mapping { 161 struct Mapping {
162 VAddr ptr; 162 NvCore::NvMap::Handle::Id handle;
163 DAddr ptr;
163 u64 offset; 164 u64 offset;
164 u64 size; 165 u64 size;
165 bool fixed; 166 bool fixed;
166 bool big_page; // Only valid if fixed == false 167 bool big_page; // Only valid if fixed == false
167 bool sparse_alloc; 168 bool sparse_alloc;
168 169
169 Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) 170 Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_,
170 : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), 171 bool big_page_, bool sparse_alloc_)
171 sparse_alloc(sparse_alloc_) {} 172 : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_),
173 big_page(big_page_), sparse_alloc(sparse_alloc_) {}
172 }; 174 };
173 175
174 struct Allocation { 176 struct Allocation {
@@ -212,9 +214,7 @@ private:
212 bool initialised{}; 214 bool initialised{};
213 } vm; 215 } vm;
214 std::shared_ptr<Tegra::MemoryManager> gmmu; 216 std::shared_ptr<Tegra::MemoryManager> gmmu;
215 217 std::unordered_map<DeviceFD, size_t> sessions;
216 // s32 channel{};
217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
218}; 218};
219 219
220} // namespace Service::Nvidia::Devices 220} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 9ab0ae4d8..78bc5f3c4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -95,6 +95,9 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
95 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); 95 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
96 96
97 auto& gpu = system.GPU(); 97 auto& gpu = system.GPU();
98 //auto& device_memory = system.Host1x().MemoryManager();
99 auto* session = core.GetSession(sessions[fd]);
100
98 if (gpu.UseNvdec()) { 101 if (gpu.UseNvdec()) {
99 for (std::size_t i = 0; i < syncpt_increments.size(); i++) { 102 for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
100 const SyncptIncr& syncpt_incr = syncpt_increments[i]; 103 const SyncptIncr& syncpt_incr = syncpt_increments[i];
@@ -106,7 +109,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
106 const auto object = nvmap.GetHandle(cmd_buffer.memory_id); 109 const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
107 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 110 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
108 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); 111 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
109 system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), 112 session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
110 cmdlist.size() * sizeof(u32)); 113 cmdlist.size() * sizeof(u32));
111 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); 114 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
112 } 115 }
@@ -136,7 +139,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
136NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { 139NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) {
137 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); 140 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
138 for (size_t i = 0; i < num_entries; i++) { 141 for (size_t i = 0; i < num_entries; i++) {
139 entries[i].map_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd]); 142 DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);
143 entries[i].map_address = static_cast<u32>(pin_address);
140 } 144 }
141 145
142 return NvResult::Success; 146 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 2b107f009..7765ca1be 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -123,8 +123,8 @@ NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) {
123 return NvResult::InsufficientMemory; 123 return NvResult::InsufficientMemory;
124 } 124 }
125 125
126 const auto result = file.AllocateHandle(params.handle, params.flags, params.align, params.kind, 126 const auto result =
127 params.address, sessions[fd]); 127 handle_description->Alloc(params.flags, params.align, params.kind, params.address);
128 if (result != NvResult::Success) { 128 if (result != NvResult::Success) {
129 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); 129 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
130 return result; 130 return result;
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index 492ad849a..6e4825313 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -13,8 +13,6 @@
13#include "core/hle/service/nvdrv/nvdrv.h" 13#include "core/hle/service/nvdrv/nvdrv.h"
14#include "core/hle/service/nvdrv/nvdrv_interface.h" 14#include "core/hle/service/nvdrv/nvdrv_interface.h"
15 15
16#pragma optimize("", off)
17
18namespace Service::Nvidia { 16namespace Service::Nvidia {
19 17
20void NVDRV::Open(HLERequestContext& ctx) { 18void NVDRV::Open(HLERequestContext& ctx) {
@@ -173,8 +171,8 @@ void NVDRV::Initialize(HLERequestContext& ctx) {
173 [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); 171 [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>();
174 172
175 auto& container = nvdrv->GetContainer(); 173 auto& container = nvdrv->GetContainer();
176 auto process = ctx.GetObjectFromHandle(process_handle); 174 auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle);
177 session_id = container.OpenSession(process->DynamicCast<Kernel::KProcess*>()); 175 session_id = container.OpenSession(process.GetPointerUnsafe());
178 176
179 is_initialized = true; 177 is_initialized = true;
180} 178}
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 8176a41be..609e775ae 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -24,6 +24,8 @@
24#include "core/hle/kernel/k_process.h" 24#include "core/hle/kernel/k_process.h"
25#include "core/memory.h" 25#include "core/memory.h"
26#include "video_core/gpu.h" 26#include "video_core/gpu.h"
27#include "video_core/host1x/gpu_device_memory_manager.h"
28#include "video_core/host1x/host1x.h"
27#include "video_core/rasterizer_download_area.h" 29#include "video_core/rasterizer_download_area.h"
28 30
29namespace Core::Memory { 31namespace Core::Memory {
@@ -638,15 +640,16 @@ struct Memory::Impl {
638 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); 640 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
639 641
640 // During boot, current_page_table might not be set yet, in which case we need not flush 642 // During boot, current_page_table might not be set yet, in which case we need not flush
641 if (system.IsPoweredOn()) { 643 /*if (system.IsPoweredOn()) {
642 auto& gpu = system.GPU(); 644 auto& gpu = system.GPU();
643 for (u64 i = 0; i < size; i++) { 645 for (u64 i = 0; i < size; i++) {
644 const auto page = base + i; 646 const auto page = base + i;
645 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { 647 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
648
646 gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); 649 gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
647 } 650 }
648 } 651 }
649 } 652 }*/
650 653
651 const auto end = base + size; 654 const auto end = base + size;
652 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 655 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -811,10 +814,15 @@ struct Memory::Impl {
811 return true; 814 return true;
812 } 815 }
813 816
814 void HandleRasterizerDownload(VAddr address, size_t size) { 817 void HandleRasterizerDownload(VAddr v_address, size_t size) {
818 const auto* p = GetPointerImpl(
819 v_address, []() {}, []() {});
820 auto& gpu_device_memory = system.Host1x().MemoryManager();
821 DAddr address =
822 gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p));
815 const size_t core = system.GetCurrentHostThreadID(); 823 const size_t core = system.GetCurrentHostThreadID();
816 auto& current_area = rasterizer_read_areas[core]; 824 auto& current_area = rasterizer_read_areas[core];
817 const VAddr end_address = address + size; 825 const DAddr end_address = address + size;
818 if (current_area.start_address <= address && end_address <= current_area.end_address) 826 if (current_area.start_address <= address && end_address <= current_area.end_address)
819 [[likely]] { 827 [[likely]] {
820 return; 828 return;
@@ -822,7 +830,10 @@ struct Memory::Impl {
822 current_area = system.GPU().OnCPURead(address, size); 830 current_area = system.GPU().OnCPURead(address, size);
823 } 831 }
824 832
825 void HandleRasterizerWrite(VAddr address, size_t size) { 833 void HandleRasterizerWrite(VAddr v_address, size_t size) {
834 const auto* p = GetPointerImpl(
835 v_address, []() {}, []() {});
836 PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p);
826 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; 837 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
827 const size_t core = std::min(system.GetCurrentHostThreadID(), 838 const size_t core = std::min(system.GetCurrentHostThreadID(),
828 sys_core); // any other calls threads go to syscore. 839 sys_core); // any other calls threads go to syscore.
@@ -836,7 +847,7 @@ struct Memory::Impl {
836 } 847 }
837 }); 848 });
838 auto& current_area = rasterizer_write_areas[core]; 849 auto& current_area = rasterizer_write_areas[core];
839 VAddr subaddress = address >> YUZU_PAGEBITS; 850 PAddr subaddress = address >> YUZU_PAGEBITS;
840 bool do_collection = current_area.last_address == subaddress; 851 bool do_collection = current_area.last_address == subaddress;
841 if (!do_collection) [[unlikely]] { 852 if (!do_collection) [[unlikely]] {
842 do_collection = system.GPU().OnCPUWrite(address, size); 853 do_collection = system.GPU().OnCPUWrite(address, size);
@@ -849,7 +860,7 @@ struct Memory::Impl {
849 } 860 }
850 861
851 struct GPUDirtyState { 862 struct GPUDirtyState {
852 VAddr last_address; 863 PAddr last_address;
853 }; 864 };
854 865
855 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { 866 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
diff --git a/src/core/memory.h b/src/core/memory.h
index dddfaf4a4..47ca6a35a 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -498,209 +498,4 @@ private:
498 std::unique_ptr<Impl> impl; 498 std::unique_ptr<Impl> impl;
499}; 499};
500 500
501enum GuestMemoryFlags : u32 {
502 Read = 1 << 0,
503 Write = 1 << 1,
504 Safe = 1 << 2,
505 Cached = 1 << 3,
506
507 SafeRead = Read | Safe,
508 SafeWrite = Write | Safe,
509 SafeReadWrite = SafeRead | SafeWrite,
510 SafeReadCachedWrite = SafeReadWrite | Cached,
511
512 UnsafeRead = Read,
513 UnsafeWrite = Write,
514 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
515 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
516};
517
518namespace {
519template <typename M, typename T, GuestMemoryFlags FLAGS>
520class GuestMemory {
521 using iterator = T*;
522 using const_iterator = const T*;
523 using value_type = T;
524 using element_type = T;
525 using iterator_category = std::contiguous_iterator_tag;
526
527public:
528 GuestMemory() = delete;
529 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
530 Common::ScratchBuffer<T>* backup = nullptr)
531 : m_memory{memory}, m_addr{addr}, m_size{size} {
532 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
533 if constexpr (FLAGS & GuestMemoryFlags::Read) {
534 Read(addr, size, backup);
535 }
536 }
537
538 ~GuestMemory() = default;
539
540 T* data() noexcept {
541 return m_data_span.data();
542 }
543
544 const T* data() const noexcept {
545 return m_data_span.data();
546 }
547
548 size_t size() const noexcept {
549 return m_size;
550 }
551
552 size_t size_bytes() const noexcept {
553 return this->size() * sizeof(T);
554 }
555
556 [[nodiscard]] T* begin() noexcept {
557 return this->data();
558 }
559
560 [[nodiscard]] const T* begin() const noexcept {
561 return this->data();
562 }
563
564 [[nodiscard]] T* end() noexcept {
565 return this->data() + this->size();
566 }
567
568 [[nodiscard]] const T* end() const noexcept {
569 return this->data() + this->size();
570 }
571
572 T& operator[](size_t index) noexcept {
573 return m_data_span[index];
574 }
575
576 const T& operator[](size_t index) const noexcept {
577 return m_data_span[index];
578 }
579
580 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
581 m_addr = addr;
582 m_size = size;
583 m_addr_changed = true;
584 }
585
586 std::span<T> Read(u64 addr, std::size_t size,
587 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
588 m_addr = addr;
589 m_size = size;
590 if (m_size == 0) {
591 m_is_data_copy = true;
592 return {};
593 }
594
595 if (this->TrySetSpan()) {
596 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
597 m_memory.FlushRegion(m_addr, this->size_bytes());
598 }
599 } else {
600 if (backup) {
601 backup->resize_destructive(this->size());
602 m_data_span = *backup;
603 } else {
604 m_data_copy.resize(this->size());
605 m_data_span = std::span(m_data_copy);
606 }
607 m_is_data_copy = true;
608 m_span_valid = true;
609 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
610 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
611 } else {
612 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
613 }
614 }
615 return m_data_span;
616 }
617
618 void Write(std::span<T> write_data) noexcept {
619 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
620 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
621 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
622 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
623 } else {
624 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
625 }
626 }
627
628 bool TrySetSpan() noexcept {
629 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
630 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
631 m_span_valid = true;
632 return true;
633 }
634 return false;
635 }
636
637protected:
638 bool IsDataCopy() const noexcept {
639 return m_is_data_copy;
640 }
641
642 bool AddressChanged() const noexcept {
643 return m_addr_changed;
644 }
645
646 M& m_memory;
647 u64 m_addr{};
648 size_t m_size{};
649 std::span<T> m_data_span{};
650 std::vector<T> m_data_copy{};
651 bool m_span_valid{false};
652 bool m_is_data_copy{false};
653 bool m_addr_changed{false};
654};
655
656template <typename M, typename T, GuestMemoryFlags FLAGS>
657class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
658public:
659 GuestMemoryScoped() = delete;
660 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
661 Common::ScratchBuffer<T>* backup = nullptr)
662 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
663 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
664 if (!this->TrySetSpan()) {
665 if (backup) {
666 this->m_data_span = *backup;
667 this->m_span_valid = true;
668 this->m_is_data_copy = true;
669 }
670 }
671 }
672 }
673
674 ~GuestMemoryScoped() {
675 if constexpr (FLAGS & GuestMemoryFlags::Write) {
676 if (this->size() == 0) [[unlikely]] {
677 return;
678 }
679
680 if (this->AddressChanged() || this->IsDataCopy()) {
681 ASSERT(this->m_span_valid);
682 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
683 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
684 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
685 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
686 } else {
687 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
688 }
689 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
690 (FLAGS & GuestMemoryFlags::Cached)) {
691 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
692 }
693 }
694 }
695};
696} // namespace
697
698template <typename T, GuestMemoryFlags FLAGS>
699using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>;
700template <typename T, GuestMemoryFlags FLAGS>
701using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>;
702template <typename T, GuestMemoryFlags FLAGS>
703using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
704template <typename T, GuestMemoryFlags FLAGS>
705using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
706} // namespace Core::Memory 501} // namespace Core::Memory
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2dda8ebc2..5ed0ad0ed 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -95,6 +95,7 @@ add_library(video_core STATIC
95 gpu.h 95 gpu.h
96 gpu_thread.cpp 96 gpu_thread.cpp
97 gpu_thread.h 97 gpu_thread.h
98 guest_memory.h
98 invalidation_accumulator.h 99 invalidation_accumulator.h
99 memory_manager.cpp 100 memory_manager.cpp
100 memory_manager.h 101 memory_manager.h
@@ -107,8 +108,6 @@ add_library(video_core STATIC
107 query_cache/query_stream.h 108 query_cache/query_stream.h
108 query_cache/types.h 109 query_cache/types.h
109 query_cache.h 110 query_cache.h
110 rasterizer_accelerated.cpp
111 rasterizer_accelerated.h
112 rasterizer_interface.h 111 rasterizer_interface.h
113 renderer_base.cpp 112 renderer_base.cpp
114 renderer_base.h 113 renderer_base.h
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 0bb3bf8ae..40e98e395 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -33,13 +33,12 @@ struct NullBufferParams {};
33 * 33 *
34 * The buffer size and address is forcefully aligned to CPU page boundaries. 34 * The buffer size and address is forcefully aligned to CPU page boundaries.
35 */ 35 */
36template <class RasterizerInterface>
37class BufferBase { 36class BufferBase {
38public: 37public:
39 static constexpr u64 BASE_PAGE_BITS = 16; 38 static constexpr u64 BASE_PAGE_BITS = 16;
40 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
41 40
42 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) 41 explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_)
43 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} 42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
44 43
45 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 6d1fc3887..6fe2e8b93 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -8,16 +8,16 @@
8#include <numeric> 8#include <numeric>
9 9
10#include "video_core/buffer_cache/buffer_cache_base.h" 10#include "video_core/buffer_cache/buffer_cache_base.h"
11#include "video_core/guest_memory.h"
12#include "video_core/host1x/gpu_device_memory_manager.h"
11 13
12namespace VideoCommon { 14namespace VideoCommon {
13 15
14using Core::Memory::YUZU_PAGESIZE; 16using Core::Memory::YUZU_PAGESIZE;
15 17
16template <class P> 18template <class P>
17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 19BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 20 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
21 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 23 common_ranges.clear();
@@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
29 return; 29 return;
30 } 30 }
31 31
32 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 32 const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
33 const s64 min_spacing_expected = device_memory - 1_GiB; 33 const s64 min_spacing_expected = device_local_memory - 1_GiB;
34 const s64 min_spacing_critical = device_memory - 512_MiB; 34 const s64 min_spacing_critical = device_local_memory - 512_MiB;
35 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 35 const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
38 minimum_memory = static_cast<u64>( 38 minimum_memory = static_cast<u64>(
39 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), 39 std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
40 DEFAULT_EXPECTED_MEMORY)); 40 DEFAULT_EXPECTED_MEMORY));
41 critical_memory = static_cast<u64>( 41 critical_memory = static_cast<u64>(
42 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), 42 std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
43 DEFAULT_CRITICAL_MEMORY)); 43 DEFAULT_CRITICAL_MEMORY));
44} 44}
45 45
@@ -105,71 +105,72 @@ void BufferCache<P>::TickFrame() {
105} 105}
106 106
107template <class P> 107template <class P>
108void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 108void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
109 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 109 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
110 const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; 110 const IntervalType subtract_interval{device_addr, device_addr + size};
111 ClearDownload(subtract_interval); 111 ClearDownload(subtract_interval);
112 common_ranges.subtract(subtract_interval); 112 common_ranges.subtract(subtract_interval);
113 } 113 }
114 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); 114 memory_tracker.MarkRegionAsCpuModified(device_addr, size);
115} 115}
116 116
117template <class P> 117template <class P>
118void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 118void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) {
119 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 119 const bool is_dirty = IsRegionRegistered(device_addr, size);
120 if (!is_dirty) { 120 if (!is_dirty) {
121 return; 121 return;
122 } 122 }
123 VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); 123 DAddr aligned_start = Common::AlignDown(device_addr, YUZU_PAGESIZE);
124 VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); 124 DAddr aligned_end = Common::AlignUp(device_addr + size, YUZU_PAGESIZE);
125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
126 WriteMemory(cpu_addr, size); 126 WriteMemory(device_addr, size);
127 return; 127 return;
128 } 128 }
129 129
130 tmp_buffer.resize_destructive(size); 130 tmp_buffer.resize_destructive(size);
131 cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); 131 device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size);
132 132
133 InlineMemoryImplementation(cpu_addr, size, tmp_buffer); 133 InlineMemoryImplementation(device_addr, size, tmp_buffer);
134} 134}
135 135
136template <class P> 136template <class P>
137bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { 137bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) {
138 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 138 const bool is_dirty = IsRegionRegistered(device_addr, size);
139 if (!is_dirty) { 139 if (!is_dirty) {
140 return false; 140 return false;
141 } 141 }
142 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 142 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
143 return true; 143 return true;
144 } 144 }
145 WriteMemory(cpu_addr, size); 145 WriteMemory(device_addr, size);
146 return false; 146 return false;
147} 147}
148 148
149template <class P> 149template <class P>
150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, 150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr,
151 u64 size) { 151 u64 size) {
152 std::optional<VideoCore::RasterizerDownloadArea> area{}; 152 std::optional<VideoCore::RasterizerDownloadArea> area{};
153 area.emplace(); 153 area.emplace();
154 VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); 154 DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::Memory::YUZU_PAGESIZE);
155 VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 155 DAddr device_addr_end_aligned =
156 area->start_address = cpu_addr_start_aligned; 156 Common::AlignUp(device_addr + size, Core::Memory::YUZU_PAGESIZE);
157 area->end_address = cpu_addr_end_aligned; 157 area->start_address = device_addr_start_aligned;
158 if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { 158 area->end_address = device_addr_end_aligned;
159 if (memory_tracker.IsRegionPreflushable(device_addr, size)) {
159 area->preemtive = true; 160 area->preemtive = true;
160 return area; 161 return area;
161 }; 162 };
162 area->preemtive = 163 area->preemtive = !IsRegionGpuModified(device_addr_start_aligned,
163 !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); 164 device_addr_end_aligned - device_addr_start_aligned);
164 memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, 165 memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned,
165 cpu_addr_end_aligned - cpu_addr_start_aligned); 166 device_addr_end_aligned - device_addr_start_aligned);
166 return area; 167 return area;
167} 168}
168 169
169template <class P> 170template <class P>
170void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 171void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
171 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 172 ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) {
172 DownloadBufferMemory(buffer, cpu_addr, size); 173 DownloadBufferMemory(buffer, device_addr, size);
173 }); 174 });
174} 175}
175 176
@@ -184,8 +185,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
184 185
185template <class P> 186template <class P>
186bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 187bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
187 const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); 188 const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
188 const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); 189 const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
189 if (!cpu_src_address || !cpu_dest_address) { 190 if (!cpu_src_address || !cpu_dest_address) {
190 return false; 191 return false;
191 } 192 }
@@ -216,10 +217,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
216 }}; 217 }};
217 218
218 boost::container::small_vector<IntervalType, 4> tmp_intervals; 219 boost::container::small_vector<IntervalType, 4> tmp_intervals;
219 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 220 auto mirror = [&](DAddr base_address, DAddr base_address_end) {
220 const u64 size = base_address_end - base_address; 221 const u64 size = base_address_end - base_address;
221 const VAddr diff = base_address - *cpu_src_address; 222 const DAddr diff = base_address - *cpu_src_address;
222 const VAddr new_base_address = *cpu_dest_address + diff; 223 const DAddr new_base_address = *cpu_dest_address + diff;
223 const IntervalType add_interval{new_base_address, new_base_address + size}; 224 const IntervalType add_interval{new_base_address, new_base_address + size};
224 tmp_intervals.push_back(add_interval); 225 tmp_intervals.push_back(add_interval);
225 uncommitted_ranges.add(add_interval); 226 uncommitted_ranges.add(add_interval);
@@ -239,15 +240,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 240 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
240 } 241 }
241 242
242 Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( 243 Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp(
243 cpu_memory, *cpu_src_address, amount, &tmp_buffer); 244 device_memory, *cpu_src_address, amount, &tmp_buffer);
244 tmp.SetAddressAndSize(*cpu_dest_address, amount); 245 tmp.SetAddressAndSize(*cpu_dest_address, amount);
245 return true; 246 return true;
246} 247}
247 248
248template <class P> 249template <class P>
249bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { 250bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
250 const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); 251 const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
251 if (!cpu_dst_address) { 252 if (!cpu_dst_address) {
252 return false; 253 return false;
253 } 254 }
@@ -273,23 +274,23 @@ template <class P>
273std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, 274std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
274 ObtainBufferSynchronize sync_info, 275 ObtainBufferSynchronize sync_info,
275 ObtainBufferOperation post_op) { 276 ObtainBufferOperation post_op) {
276 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 277 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
277 if (!cpu_addr) { 278 if (!device_addr) {
278 return {&slot_buffers[NULL_BUFFER_ID], 0}; 279 return {&slot_buffers[NULL_BUFFER_ID], 0};
279 } 280 }
280 return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); 281 return ObtainCPUBuffer(*device_addr, size, sync_info, post_op);
281} 282}
282 283
283template <class P> 284template <class P>
284std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( 285std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
285 VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { 286 DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
286 const BufferId buffer_id = FindBuffer(cpu_addr, size); 287 const BufferId buffer_id = FindBuffer(device_addr, size);
287 Buffer& buffer = slot_buffers[buffer_id]; 288 Buffer& buffer = slot_buffers[buffer_id];
288 289
289 // synchronize op 290 // synchronize op
290 switch (sync_info) { 291 switch (sync_info) {
291 case ObtainBufferSynchronize::FullSynchronize: 292 case ObtainBufferSynchronize::FullSynchronize:
292 SynchronizeBuffer(buffer, cpu_addr, size); 293 SynchronizeBuffer(buffer, device_addr, size);
293 break; 294 break;
294 default: 295 default:
295 break; 296 break;
@@ -297,12 +298,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
297 298
298 switch (post_op) { 299 switch (post_op) {
299 case ObtainBufferOperation::MarkAsWritten: 300 case ObtainBufferOperation::MarkAsWritten:
300 MarkWrittenBuffer(buffer_id, cpu_addr, size); 301 MarkWrittenBuffer(buffer_id, device_addr, size);
301 break; 302 break;
302 case ObtainBufferOperation::DiscardWrite: { 303 case ObtainBufferOperation::DiscardWrite: {
303 VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); 304 DAddr device_addr_start = Common::AlignDown(device_addr, 64);
304 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); 305 DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
305 IntervalType interval{cpu_addr_start, cpu_addr_end}; 306 IntervalType interval{device_addr_start, device_addr_end};
306 ClearDownload(interval); 307 ClearDownload(interval);
307 common_ranges.subtract(interval); 308 common_ranges.subtract(interval);
308 break; 309 break;
@@ -311,15 +312,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
311 break; 312 break;
312 } 313 }
313 314
314 return {&buffer, buffer.Offset(cpu_addr)}; 315 return {&buffer, buffer.Offset(device_addr)};
315} 316}
316 317
317template <class P> 318template <class P>
318void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 319void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
319 u32 size) { 320 u32 size) {
320 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 321 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
321 const Binding binding{ 322 const Binding binding{
322 .cpu_addr = *cpu_addr, 323 .device_addr = *device_addr,
323 .size = size, 324 .size = size,
324 .buffer_id = BufferId{}, 325 .buffer_id = BufferId{},
325 }; 326 };
@@ -555,16 +556,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
555 for (const IntervalSet& intervals : committed_ranges) { 556 for (const IntervalSet& intervals : committed_ranges) {
556 for (auto& interval : intervals) { 557 for (auto& interval : intervals) {
557 const std::size_t size = interval.upper() - interval.lower(); 558 const std::size_t size = interval.upper() - interval.lower();
558 const VAddr cpu_addr = interval.lower(); 559 const DAddr device_addr = interval.lower();
559 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 560 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
560 const VAddr buffer_start = buffer.CpuAddr(); 561 const DAddr buffer_start = buffer.CpuAddr();
561 const VAddr buffer_end = buffer_start + buffer.SizeBytes(); 562 const DAddr buffer_end = buffer_start + buffer.SizeBytes();
562 const VAddr new_start = std::max(buffer_start, cpu_addr); 563 const DAddr new_start = std::max(buffer_start, device_addr);
563 const VAddr new_end = std::min(buffer_end, cpu_addr + size); 564 const DAddr new_end = std::min(buffer_end, device_addr + size);
564 memory_tracker.ForEachDownloadRange( 565 memory_tracker.ForEachDownloadRange(
565 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { 566 new_start, new_end - new_start, false,
566 const VAddr buffer_addr = buffer.CpuAddr(); 567 [&](u64 device_addr_out, u64 range_size) {
567 const auto add_download = [&](VAddr start, VAddr end) { 568 const DAddr buffer_addr = buffer.CpuAddr();
569 const auto add_download = [&](DAddr start, DAddr end) {
568 const u64 new_offset = start - buffer_addr; 570 const u64 new_offset = start - buffer_addr;
569 const u64 new_size = end - start; 571 const u64 new_size = end - start;
570 downloads.push_back({ 572 downloads.push_back({
@@ -582,7 +584,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
582 largest_copy = std::max(largest_copy, new_size); 584 largest_copy = std::max(largest_copy, new_size);
583 }; 585 };
584 586
585 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); 587 ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download);
586 }); 588 });
587 }); 589 });
588 } 590 }
@@ -605,8 +607,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
605 BufferCopy second_copy{copy}; 607 BufferCopy second_copy{copy};
606 Buffer& buffer = slot_buffers[buffer_id]; 608 Buffer& buffer = slot_buffers[buffer_id];
607 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 609 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
608 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 610 DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
609 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 611 const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size};
610 async_downloads += std::make_pair(base_interval, 1); 612 async_downloads += std::make_pair(base_interval, 1);
611 buffer.MarkUsage(copy.src_offset, copy.size); 613 buffer.MarkUsage(copy.src_offset, copy.size);
612 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 614 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
@@ -635,11 +637,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
635 runtime.Finish(); 637 runtime.Finish();
636 for (const auto& [copy, buffer_id] : downloads) { 638 for (const auto& [copy, buffer_id] : downloads) {
637 const Buffer& buffer = slot_buffers[buffer_id]; 639 const Buffer& buffer = slot_buffers[buffer_id];
638 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 640 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
639 // Undo the modified offset 641 // Undo the modified offset
640 const u64 dst_offset = copy.dst_offset - download_staging.offset; 642 const u64 dst_offset = copy.dst_offset - download_staging.offset;
641 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 643 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
642 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 644 device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
643 } 645 }
644 } else { 646 } else {
645 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 647 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
@@ -647,8 +649,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
647 Buffer& buffer = slot_buffers[buffer_id]; 649 Buffer& buffer = slot_buffers[buffer_id];
648 buffer.ImmediateDownload(copy.src_offset, 650 buffer.ImmediateDownload(copy.src_offset,
649 immediate_buffer.subspan(0, copy.size)); 651 immediate_buffer.subspan(0, copy.size));
650 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 652 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
651 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 653 device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
652 } 654 }
653 } 655 }
654 } 656 }
@@ -681,19 +683,19 @@ void BufferCache<P>::PopAsyncBuffers() {
681 u8* base = async_buffer->mapped_span.data(); 683 u8* base = async_buffer->mapped_span.data();
682 const size_t base_offset = async_buffer->offset; 684 const size_t base_offset = async_buffer->offset;
683 for (const auto& copy : downloads) { 685 for (const auto& copy : downloads) {
684 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); 686 const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
685 const u64 dst_offset = copy.dst_offset - base_offset; 687 const u64 dst_offset = copy.dst_offset - base_offset;
686 const u8* read_mapped_memory = base + dst_offset; 688 const u8* read_mapped_memory = base + dst_offset;
687 ForEachInOverlapCounter( 689 ForEachInOverlapCounter(
688 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { 690 async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) {
689 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], 691 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
690 end - start); 692 end - start);
691 if (count == 1) { 693 if (count == 1) {
692 const IntervalType base_interval{start, end}; 694 const IntervalType base_interval{start, end};
693 common_ranges.subtract(base_interval); 695 common_ranges.subtract(base_interval);
694 } 696 }
695 }); 697 });
696 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; 698 const IntervalType subtract_interval{device_addr, device_addr + copy.size};
697 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); 699 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
698 } 700 }
699 async_buffers_death_ring.emplace_back(*async_buffer); 701 async_buffers_death_ring.emplace_back(*async_buffer);
@@ -703,15 +705,15 @@ void BufferCache<P>::PopAsyncBuffers() {
703} 705}
704 706
705template <class P> 707template <class P>
706bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 708bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
707 bool is_dirty = false; 709 bool is_dirty = false;
708 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); 710 ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; });
709 return is_dirty; 711 return is_dirty;
710} 712}
711 713
712template <class P> 714template <class P>
713bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 715bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) {
714 const VAddr end_addr = addr + size; 716 const DAddr end_addr = addr + size;
715 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); 717 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
716 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { 718 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
717 const BufferId buffer_id = page_table[page]; 719 const BufferId buffer_id = page_table[page];
@@ -720,8 +722,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
720 continue; 722 continue;
721 } 723 }
722 Buffer& buffer = slot_buffers[buffer_id]; 724 Buffer& buffer = slot_buffers[buffer_id];
723 const VAddr buf_start_addr = buffer.CpuAddr(); 725 const DAddr buf_start_addr = buffer.CpuAddr();
724 const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); 726 const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
725 if (buf_start_addr < end_addr && addr < buf_end_addr) { 727 if (buf_start_addr < end_addr && addr < buf_end_addr) {
726 return true; 728 return true;
727 } 729 }
@@ -731,7 +733,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
731} 733}
732 734
733template <class P> 735template <class P>
734bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 736bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) {
735 return memory_tracker.IsRegionCpuModified(addr, size); 737 return memory_tracker.IsRegionCpuModified(addr, size);
736} 738}
737 739
@@ -739,7 +741,7 @@ template <class P>
739void BufferCache<P>::BindHostIndexBuffer() { 741void BufferCache<P>::BindHostIndexBuffer() {
740 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; 742 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
741 TouchBuffer(buffer, channel_state->index_buffer.buffer_id); 743 TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
742 const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); 744 const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
743 const u32 size = channel_state->index_buffer.size; 745 const u32 size = channel_state->index_buffer.size;
744 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 746 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
745 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { 747 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
@@ -754,7 +756,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
754 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 756 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
755 } 757 }
756 } else { 758 } else {
757 SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); 759 SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size);
758 } 760 }
759 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 761 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
760 const u32 new_offset = 762 const u32 new_offset =
@@ -777,7 +779,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
777 const Binding& binding = channel_state->vertex_buffers[index]; 779 const Binding& binding = channel_state->vertex_buffers[index];
778 Buffer& buffer = slot_buffers[binding.buffer_id]; 780 Buffer& buffer = slot_buffers[binding.buffer_id];
779 TouchBuffer(buffer, binding.buffer_id); 781 TouchBuffer(buffer, binding.buffer_id);
780 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 782 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
781 if (!flags[Dirty::VertexBuffer0 + index]) { 783 if (!flags[Dirty::VertexBuffer0 + index]) {
782 continue; 784 continue;
783 } 785 }
@@ -797,7 +799,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
797 Buffer& buffer = slot_buffers[binding.buffer_id]; 799 Buffer& buffer = slot_buffers[binding.buffer_id];
798 800
799 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 801 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
800 const u32 offset = buffer.Offset(binding.cpu_addr); 802 const u32 offset = buffer.Offset(binding.device_addr);
801 buffer.MarkUsage(offset, binding.size); 803 buffer.MarkUsage(offset, binding.size);
802 804
803 host_bindings.buffers.push_back(&buffer); 805 host_bindings.buffers.push_back(&buffer);
@@ -814,7 +816,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
814 const auto bind_buffer = [this](const Binding& binding) { 816 const auto bind_buffer = [this](const Binding& binding) {
815 Buffer& buffer = slot_buffers[binding.buffer_id]; 817 Buffer& buffer = slot_buffers[binding.buffer_id];
816 TouchBuffer(buffer, binding.buffer_id); 818 TouchBuffer(buffer, binding.buffer_id);
817 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 819 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
818 }; 820 };
819 if (current_draw_indirect->include_count) { 821 if (current_draw_indirect->include_count) {
820 bind_buffer(channel_state->count_buffer_binding); 822 bind_buffer(channel_state->count_buffer_binding);
@@ -842,13 +844,13 @@ template <class P>
842void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, 844void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
843 bool needs_bind) { 845 bool needs_bind) {
844 const Binding& binding = channel_state->uniform_buffers[stage][index]; 846 const Binding& binding = channel_state->uniform_buffers[stage][index];
845 const VAddr cpu_addr = binding.cpu_addr; 847 const DAddr device_addr = binding.device_addr;
846 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); 848 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
847 Buffer& buffer = slot_buffers[binding.buffer_id]; 849 Buffer& buffer = slot_buffers[binding.buffer_id];
848 TouchBuffer(buffer, binding.buffer_id); 850 TouchBuffer(buffer, binding.buffer_id);
849 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 851 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
850 size <= channel_state->uniform_buffer_skip_cache_size && 852 size <= channel_state->uniform_buffer_skip_cache_size &&
851 !memory_tracker.IsRegionGpuModified(cpu_addr, size); 853 !memory_tracker.IsRegionGpuModified(device_addr, size);
852 if (use_fast_buffer) { 854 if (use_fast_buffer) {
853 if constexpr (IS_OPENGL) { 855 if constexpr (IS_OPENGL) {
854 if (runtime.HasFastBufferSubData()) { 856 if (runtime.HasFastBufferSubData()) {
@@ -862,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
862 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; 864 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
863 runtime.BindFastUniformBuffer(stage, binding_index, size); 865 runtime.BindFastUniformBuffer(stage, binding_index, size);
864 } 866 }
865 const auto span = ImmediateBufferWithData(cpu_addr, size); 867 const auto span = ImmediateBufferWithData(device_addr, size);
866 runtime.PushFastUniformBuffer(stage, binding_index, span); 868 runtime.PushFastUniformBuffer(stage, binding_index, span);
867 return; 869 return;
868 } 870 }
@@ -873,11 +875,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
873 } 875 }
874 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 876 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
875 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 877 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
876 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 878 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
877 return; 879 return;
878 } 880 }
879 // Classic cached path 881 // Classic cached path
880 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); 882 const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size);
881 if (sync_cached) { 883 if (sync_cached) {
882 ++channel_state->uniform_cache_hits[0]; 884 ++channel_state->uniform_cache_hits[0];
883 } 885 }
@@ -892,7 +894,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
892 if (!needs_bind) { 894 if (!needs_bind) {
893 return; 895 return;
894 } 896 }
895 const u32 offset = buffer.Offset(cpu_addr); 897 const u32 offset = buffer.Offset(device_addr);
896 if constexpr (IS_OPENGL) { 898 if constexpr (IS_OPENGL) {
897 // Fast buffer will be unbound 899 // Fast buffer will be unbound
898 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); 900 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
@@ -920,14 +922,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
920 Buffer& buffer = slot_buffers[binding.buffer_id]; 922 Buffer& buffer = slot_buffers[binding.buffer_id];
921 TouchBuffer(buffer, binding.buffer_id); 923 TouchBuffer(buffer, binding.buffer_id);
922 const u32 size = binding.size; 924 const u32 size = binding.size;
923 SynchronizeBuffer(buffer, binding.cpu_addr, size); 925 SynchronizeBuffer(buffer, binding.device_addr, size);
924 926
925 const u32 offset = buffer.Offset(binding.cpu_addr); 927 const u32 offset = buffer.Offset(binding.device_addr);
926 buffer.MarkUsage(offset, size); 928 buffer.MarkUsage(offset, size);
927 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; 929 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
928 930
929 if (is_written) { 931 if (is_written) {
930 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 932 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
931 } 933 }
932 934
933 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 935 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -945,14 +947,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
945 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; 947 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
946 Buffer& buffer = slot_buffers[binding.buffer_id]; 948 Buffer& buffer = slot_buffers[binding.buffer_id];
947 const u32 size = binding.size; 949 const u32 size = binding.size;
948 SynchronizeBuffer(buffer, binding.cpu_addr, size); 950 SynchronizeBuffer(buffer, binding.device_addr, size);
949 951
950 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; 952 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
951 if (is_written) { 953 if (is_written) {
952 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 954 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
953 } 955 }
954 956
955 const u32 offset = buffer.Offset(binding.cpu_addr); 957 const u32 offset = buffer.Offset(binding.device_addr);
956 const PixelFormat format = binding.format; 958 const PixelFormat format = binding.format;
957 buffer.MarkUsage(offset, size); 959 buffer.MarkUsage(offset, size);
958 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 960 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -982,11 +984,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
982 Buffer& buffer = slot_buffers[binding.buffer_id]; 984 Buffer& buffer = slot_buffers[binding.buffer_id];
983 TouchBuffer(buffer, binding.buffer_id); 985 TouchBuffer(buffer, binding.buffer_id);
984 const u32 size = binding.size; 986 const u32 size = binding.size;
985 SynchronizeBuffer(buffer, binding.cpu_addr, size); 987 SynchronizeBuffer(buffer, binding.device_addr, size);
986 988
987 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 989 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
988 990
989 const u32 offset = buffer.Offset(binding.cpu_addr); 991 const u32 offset = buffer.Offset(binding.device_addr);
990 buffer.MarkUsage(offset, size); 992 buffer.MarkUsage(offset, size);
991 host_bindings.buffers.push_back(&buffer); 993 host_bindings.buffers.push_back(&buffer);
992 host_bindings.offsets.push_back(offset); 994 host_bindings.offsets.push_back(offset);
@@ -1011,9 +1013,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1011 TouchBuffer(buffer, binding.buffer_id); 1013 TouchBuffer(buffer, binding.buffer_id);
1012 const u32 size = 1014 const u32 size =
1013 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); 1015 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
1014 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1016 SynchronizeBuffer(buffer, binding.device_addr, size);
1015 1017
1016 const u32 offset = buffer.Offset(binding.cpu_addr); 1018 const u32 offset = buffer.Offset(binding.device_addr);
1017 buffer.MarkUsage(offset, size); 1019 buffer.MarkUsage(offset, size);
1018 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1020 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
1019 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); 1021 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
@@ -1032,15 +1034,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1032 Buffer& buffer = slot_buffers[binding.buffer_id]; 1034 Buffer& buffer = slot_buffers[binding.buffer_id];
1033 TouchBuffer(buffer, binding.buffer_id); 1035 TouchBuffer(buffer, binding.buffer_id);
1034 const u32 size = binding.size; 1036 const u32 size = binding.size;
1035 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1037 SynchronizeBuffer(buffer, binding.device_addr, size);
1036 1038
1037 const u32 offset = buffer.Offset(binding.cpu_addr); 1039 const u32 offset = buffer.Offset(binding.device_addr);
1038 buffer.MarkUsage(offset, size); 1040 buffer.MarkUsage(offset, size);
1039 const bool is_written = 1041 const bool is_written =
1040 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; 1042 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
1041 1043
1042 if (is_written) { 1044 if (is_written) {
1043 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1045 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1044 } 1046 }
1045 1047
1046 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 1048 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -1058,15 +1060,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1058 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; 1060 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
1059 Buffer& buffer = slot_buffers[binding.buffer_id]; 1061 Buffer& buffer = slot_buffers[binding.buffer_id];
1060 const u32 size = binding.size; 1062 const u32 size = binding.size;
1061 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1063 SynchronizeBuffer(buffer, binding.device_addr, size);
1062 1064
1063 const bool is_written = 1065 const bool is_written =
1064 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; 1066 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
1065 if (is_written) { 1067 if (is_written) {
1066 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1068 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1067 } 1069 }
1068 1070
1069 const u32 offset = buffer.Offset(binding.cpu_addr); 1071 const u32 offset = buffer.Offset(binding.device_addr);
1070 const PixelFormat format = binding.format; 1072 const PixelFormat format = binding.format;
1071 buffer.MarkUsage(offset, size); 1073 buffer.MarkUsage(offset, size);
1072 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 1074 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1131,7 +1133,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1131 inline_buffer_id = CreateBuffer(0, buffer_size); 1133 inline_buffer_id = CreateBuffer(0, buffer_size);
1132 } 1134 }
1133 channel_state->index_buffer = Binding{ 1135 channel_state->index_buffer = Binding{
1134 .cpu_addr = 0, 1136 .device_addr = 0,
1135 .size = inline_index_size, 1137 .size = inline_index_size,
1136 .buffer_id = inline_buffer_id, 1138 .buffer_id = inline_buffer_id,
1137 }; 1139 };
@@ -1140,19 +1142,19 @@ void BufferCache<P>::UpdateIndexBuffer() {
1140 1142
1141 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); 1143 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1142 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); 1144 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1143 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1145 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1144 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1146 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1145 const u32 draw_size = 1147 const u32 draw_size =
1146 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); 1148 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1147 const u32 size = std::min(address_size, draw_size); 1149 const u32 size = std::min(address_size, draw_size);
1148 if (size == 0 || !cpu_addr) { 1150 if (size == 0 || !device_addr) {
1149 channel_state->index_buffer = NULL_BINDING; 1151 channel_state->index_buffer = NULL_BINDING;
1150 return; 1152 return;
1151 } 1153 }
1152 channel_state->index_buffer = Binding{ 1154 channel_state->index_buffer = Binding{
1153 .cpu_addr = *cpu_addr, 1155 .device_addr = *device_addr,
1154 .size = size, 1156 .size = size,
1155 .buffer_id = FindBuffer(*cpu_addr, size), 1157 .buffer_id = FindBuffer(*device_addr, size),
1156 }; 1158 };
1157} 1159}
1158 1160
@@ -1178,19 +1180,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1178 const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; 1180 const auto& limit = maxwell3d->regs.vertex_stream_limits[index];
1179 const GPUVAddr gpu_addr_begin = array.Address(); 1181 const GPUVAddr gpu_addr_begin = array.Address();
1180 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1182 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1181 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1183 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1182 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1184 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1183 u32 size = address_size; // TODO: Analyze stride and number of vertices 1185 u32 size = address_size; // TODO: Analyze stride and number of vertices
1184 if (array.enable == 0 || size == 0 || !cpu_addr) { 1186 if (array.enable == 0 || size == 0 || !device_addr) {
1185 channel_state->vertex_buffers[index] = NULL_BINDING; 1187 channel_state->vertex_buffers[index] = NULL_BINDING;
1186 return; 1188 return;
1187 } 1189 }
1188 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1190 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1189 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1191 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1190 } 1192 }
1191 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1193 const BufferId buffer_id = FindBuffer(*device_addr, size);
1192 channel_state->vertex_buffers[index] = Binding{ 1194 channel_state->vertex_buffers[index] = Binding{
1193 .cpu_addr = *cpu_addr, 1195 .device_addr = *device_addr,
1194 .size = size, 1196 .size = size,
1195 .buffer_id = buffer_id, 1197 .buffer_id = buffer_id,
1196 }; 1198 };
@@ -1199,15 +1201,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1199template <class P> 1201template <class P>
1200void BufferCache<P>::UpdateDrawIndirect() { 1202void BufferCache<P>::UpdateDrawIndirect() {
1201 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { 1203 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
1202 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1204 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1203 if (!cpu_addr) { 1205 if (!device_addr) {
1204 binding = NULL_BINDING; 1206 binding = NULL_BINDING;
1205 return; 1207 return;
1206 } 1208 }
1207 binding = Binding{ 1209 binding = Binding{
1208 .cpu_addr = *cpu_addr, 1210 .device_addr = *device_addr,
1209 .size = static_cast<u32>(size), 1211 .size = static_cast<u32>(size),
1210 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), 1212 .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)),
1211 }; 1213 };
1212 }; 1214 };
1213 if (current_draw_indirect->include_count) { 1215 if (current_draw_indirect->include_count) {
@@ -1231,7 +1233,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1231 channel_state->dirty_uniform_buffers[stage] |= 1U << index; 1233 channel_state->dirty_uniform_buffers[stage] |= 1U << index;
1232 } 1234 }
1233 // Resolve buffer 1235 // Resolve buffer
1234 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1236 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1235 }); 1237 });
1236} 1238}
1237 1239
@@ -1240,7 +1242,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1240 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { 1242 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
1241 // Resolve buffer 1243 // Resolve buffer
1242 Binding& binding = channel_state->storage_buffers[stage][index]; 1244 Binding& binding = channel_state->storage_buffers[stage][index];
1243 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1245 const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size);
1244 binding.buffer_id = buffer_id; 1246 binding.buffer_id = buffer_id;
1245 }); 1247 });
1246} 1248}
@@ -1249,7 +1251,7 @@ template <class P>
1249void BufferCache<P>::UpdateTextureBuffers(size_t stage) { 1251void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1250 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { 1252 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
1251 Binding& binding = channel_state->texture_buffers[stage][index]; 1253 Binding& binding = channel_state->texture_buffers[stage][index];
1252 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1254 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1253 }); 1255 });
1254} 1256}
1255 1257
@@ -1268,14 +1270,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1268 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; 1270 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index];
1269 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; 1271 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset;
1270 const u32 size = binding.size; 1272 const u32 size = binding.size;
1271 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1273 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1272 if (binding.enable == 0 || size == 0 || !cpu_addr) { 1274 if (binding.enable == 0 || size == 0 || !device_addr) {
1273 channel_state->transform_feedback_buffers[index] = NULL_BINDING; 1275 channel_state->transform_feedback_buffers[index] = NULL_BINDING;
1274 return; 1276 return;
1275 } 1277 }
1276 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1278 const BufferId buffer_id = FindBuffer(*device_addr, size);
1277 channel_state->transform_feedback_buffers[index] = Binding{ 1279 channel_state->transform_feedback_buffers[index] = Binding{
1278 .cpu_addr = *cpu_addr, 1280 .device_addr = *device_addr,
1279 .size = size, 1281 .size = size,
1280 .buffer_id = buffer_id, 1282 .buffer_id = buffer_id,
1281 }; 1283 };
@@ -1289,13 +1291,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1289 const auto& launch_desc = kepler_compute->launch_description; 1291 const auto& launch_desc = kepler_compute->launch_description;
1290 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1292 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
1291 const auto& cbuf = launch_desc.const_buffer_config[index]; 1293 const auto& cbuf = launch_desc.const_buffer_config[index];
1292 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); 1294 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
1293 if (cpu_addr) { 1295 if (device_addr) {
1294 binding.cpu_addr = *cpu_addr; 1296 binding.device_addr = *device_addr;
1295 binding.size = cbuf.size; 1297 binding.size = cbuf.size;
1296 } 1298 }
1297 } 1299 }
1298 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1300 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1299 }); 1301 });
1300} 1302}
1301 1303
@@ -1304,7 +1306,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1304 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { 1306 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
1305 // Resolve buffer 1307 // Resolve buffer
1306 Binding& binding = channel_state->compute_storage_buffers[index]; 1308 Binding& binding = channel_state->compute_storage_buffers[index];
1307 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1309 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1308 }); 1310 });
1309} 1311}
1310 1312
@@ -1312,45 +1314,63 @@ template <class P>
1312void BufferCache<P>::UpdateComputeTextureBuffers() { 1314void BufferCache<P>::UpdateComputeTextureBuffers() {
1313 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { 1315 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
1314 Binding& binding = channel_state->compute_texture_buffers[index]; 1316 Binding& binding = channel_state->compute_texture_buffers[index];
1315 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1317 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1316 }); 1318 });
1317} 1319}
1318 1320
1319template <class P> 1321template <class P>
1320void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1322void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
1321 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); 1323 memory_tracker.MarkRegionAsGpuModified(device_addr, size);
1322 1324
1323 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1325 const IntervalType base_interval{device_addr, device_addr + size};
1324 common_ranges.add(base_interval); 1326 common_ranges.add(base_interval);
1325 uncommitted_ranges.add(base_interval); 1327 uncommitted_ranges.add(base_interval);
1326} 1328}
1327 1329
1328template <class P> 1330template <class P>
1329BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { 1331BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) {
1330 if (cpu_addr == 0) { 1332 if (device_addr == 0) {
1331 return NULL_BUFFER_ID; 1333 return NULL_BUFFER_ID;
1332 } 1334 }
1333 const u64 page = cpu_addr >> CACHING_PAGEBITS; 1335 const u64 page = device_addr >> CACHING_PAGEBITS;
1334 const BufferId buffer_id = page_table[page]; 1336 const BufferId buffer_id = page_table[page];
1335 if (!buffer_id) { 1337 if (!buffer_id) {
1336 return CreateBuffer(cpu_addr, size); 1338 return CreateBuffer(device_addr, size);
1337 } 1339 }
1338 const Buffer& buffer = slot_buffers[buffer_id]; 1340 const Buffer& buffer = slot_buffers[buffer_id];
1339 if (buffer.IsInBounds(cpu_addr, size)) { 1341 if (buffer.IsInBounds(device_addr, size)) {
1340 return buffer_id; 1342 return buffer_id;
1341 } 1343 }
1342 return CreateBuffer(cpu_addr, size); 1344 return CreateBuffer(device_addr, size);
1343} 1345}
1344 1346
1345template <class P> 1347template <class P>
1346typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1348typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr,
1347 u32 wanted_size) { 1349 u32 wanted_size) {
1348 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1350 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1349 boost::container::small_vector<BufferId, 16> overlap_ids; 1351 boost::container::small_vector<BufferId, 16> overlap_ids;
1350 VAddr begin = cpu_addr; 1352 DAddr begin = device_addr;
1351 VAddr end = cpu_addr + wanted_size; 1353 DAddr end = device_addr + wanted_size;
1352 int stream_score = 0; 1354 int stream_score = 0;
1353 bool has_stream_leap = false; 1355 bool has_stream_leap = false;
1356 auto expand_begin = [&](DAddr add_value) {
1357 static constexpr DAddr min_page = CACHING_PAGESIZE + Core::Memory::YUZU_PAGESIZE;
1358 if (add_value > begin - min_page ) {
1359 begin = min_page;
1360 device_addr = Core::Memory::YUZU_PAGESIZE;
1361 return;
1362 }
1363 begin -= add_value;
1364 device_addr = begin - CACHING_PAGESIZE;
1365 };
1366 auto expand_end = [&](DAddr add_value) {
1367 static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS;
1368 if (add_value > max_page - end ) {
1369 end = max_page;
1370 return;
1371 }
1372 end += add_value;
1373 };
1354 if (begin == 0) { 1374 if (begin == 0) {
1355 return OverlapResult{ 1375 return OverlapResult{
1356 .ids = std::move(overlap_ids), 1376 .ids = std::move(overlap_ids),
@@ -1359,9 +1379,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1359 .has_stream_leap = has_stream_leap, 1379 .has_stream_leap = has_stream_leap,
1360 }; 1380 };
1361 } 1381 }
1362 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); 1382 for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1363 cpu_addr += CACHING_PAGESIZE) { 1383 device_addr += CACHING_PAGESIZE) {
1364 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; 1384 const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS];
1365 if (!overlap_id) { 1385 if (!overlap_id) {
1366 continue; 1386 continue;
1367 } 1387 }
@@ -1371,12 +1391,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1371 } 1391 }
1372 overlap_ids.push_back(overlap_id); 1392 overlap_ids.push_back(overlap_id);
1373 overlap.Pick(); 1393 overlap.Pick();
1374 const VAddr overlap_cpu_addr = overlap.CpuAddr(); 1394 const DAddr overlap_device_addr = overlap.CpuAddr();
1375 const bool expands_left = overlap_cpu_addr < begin; 1395 const bool expands_left = overlap_device_addr < begin;
1376 if (expands_left) { 1396 if (expands_left) {
1377 begin = overlap_cpu_addr; 1397 begin = overlap_device_addr;
1378 } 1398 }
1379 const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); 1399 const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes();
1380 const bool expands_right = overlap_end > end; 1400 const bool expands_right = overlap_end > end;
1381 if (overlap_end > end) { 1401 if (overlap_end > end) {
1382 end = overlap_end; 1402 end = overlap_end;
@@ -1387,11 +1407,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1387 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1407 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1388 has_stream_leap = true; 1408 has_stream_leap = true;
1389 if (expands_right) { 1409 if (expands_right) {
1390 begin -= CACHING_PAGESIZE * 256; 1410 expand_begin(CACHING_PAGESIZE * 128);
1391 cpu_addr = begin - CACHING_PAGESIZE;
1392 } 1411 }
1393 if (expands_left) { 1412 if (expands_left) {
1394 end += CACHING_PAGESIZE * 256; 1413 expand_end(CACHING_PAGESIZE * 128);
1395 } 1414 }
1396 } 1415 }
1397 } 1416 }
@@ -1424,13 +1443,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1424} 1443}
1425 1444
1426template <class P> 1445template <class P>
1427BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1446BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) {
1428 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); 1447 DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE);
1429 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); 1448 device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE);
1430 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); 1449 wanted_size = static_cast<u32>(device_addr_end - device_addr);
1431 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1450 const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
1432 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1451 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1433 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1452 const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size);
1434 auto& new_buffer = slot_buffers[new_buffer_id]; 1453 auto& new_buffer = slot_buffers[new_buffer_id];
1435 const size_t size_bytes = new_buffer.SizeBytes(); 1454 const size_t size_bytes = new_buffer.SizeBytes();
1436 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); 1455 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
@@ -1465,10 +1484,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1465 total_used_memory -= Common::AlignUp(size, 1024); 1484 total_used_memory -= Common::AlignUp(size, 1024);
1466 lru_cache.Free(buffer.getLRUID()); 1485 lru_cache.Free(buffer.getLRUID());
1467 } 1486 }
1468 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1487 const DAddr device_addr_begin = buffer.CpuAddr();
1469 const VAddr cpu_addr_end = cpu_addr_begin + size; 1488 const DAddr device_addr_end = device_addr_begin + size;
1470 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; 1489 const u64 page_begin = device_addr_begin / CACHING_PAGESIZE;
1471 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); 1490 const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE);
1472 for (u64 page = page_begin; page != page_end; ++page) { 1491 for (u64 page = page_begin; page != page_end; ++page) {
1473 if constexpr (insert) { 1492 if constexpr (insert) {
1474 page_table[page] = buffer_id; 1493 page_table[page] = buffer_id;
@@ -1486,15 +1505,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1486} 1505}
1487 1506
1488template <class P> 1507template <class P>
1489bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1508bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) {
1490 boost::container::small_vector<BufferCopy, 4> copies; 1509 boost::container::small_vector<BufferCopy, 4> copies;
1491 u64 total_size_bytes = 0; 1510 u64 total_size_bytes = 0;
1492 u64 largest_copy = 0; 1511 u64 largest_copy = 0;
1493 VAddr buffer_start = buffer.CpuAddr(); 1512 DAddr buffer_start = buffer.CpuAddr();
1494 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1513 memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1495 copies.push_back(BufferCopy{ 1514 copies.push_back(BufferCopy{
1496 .src_offset = total_size_bytes, 1515 .src_offset = total_size_bytes,
1497 .dst_offset = cpu_addr_out - buffer_start, 1516 .dst_offset = device_addr_out - buffer_start,
1498 .size = range_size, 1517 .size = range_size,
1499 }); 1518 });
1500 total_size_bytes += range_size; 1519 total_size_bytes += range_size;
@@ -1526,14 +1545,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1526 std::span<u8> immediate_buffer; 1545 std::span<u8> immediate_buffer;
1527 for (const BufferCopy& copy : copies) { 1546 for (const BufferCopy& copy : copies) {
1528 std::span<const u8> upload_span; 1547 std::span<const u8> upload_span;
1529 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1548 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1530 if (IsRangeGranular(cpu_addr, copy.size)) { 1549 if (IsRangeGranular(device_addr, copy.size)) {
1531 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1550 upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
1532 } else { 1551 } else {
1533 if (immediate_buffer.empty()) { 1552 if (immediate_buffer.empty()) {
1534 immediate_buffer = ImmediateBuffer(largest_copy); 1553 immediate_buffer = ImmediateBuffer(largest_copy);
1535 } 1554 }
1536 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1555 device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
1537 upload_span = immediate_buffer.subspan(0, copy.size); 1556 upload_span = immediate_buffer.subspan(0, copy.size);
1538 } 1557 }
1539 buffer.ImmediateUpload(copy.dst_offset, upload_span); 1558 buffer.ImmediateUpload(copy.dst_offset, upload_span);
@@ -1550,8 +1569,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1550 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1569 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1551 for (BufferCopy& copy : copies) { 1570 for (BufferCopy& copy : copies) {
1552 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1571 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1553 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1572 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1554 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1573 device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
1555 1574
1556 // Apply the staging offset 1575 // Apply the staging offset
1557 copy.src_offset += upload_staging.offset; 1576 copy.src_offset += upload_staging.offset;
@@ -1562,14 +1581,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1562} 1581}
1563 1582
1564template <class P> 1583template <class P>
1565bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, 1584bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
1566 std::span<const u8> inlined_buffer) { 1585 std::span<const u8> inlined_buffer) {
1567 const bool is_dirty = IsRegionRegistered(dest_address, copy_size); 1586 const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
1568 if (!is_dirty) { 1587 if (!is_dirty) {
1569 return false; 1588 return false;
1570 } 1589 }
1571 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); 1590 DAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1572 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); 1591 DAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1573 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 1592 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1574 return false; 1593 return false;
1575 } 1594 }
@@ -1580,7 +1599,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1580} 1599}
1581 1600
1582template <class P> 1601template <class P>
1583void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 1602void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
1584 std::span<const u8> inlined_buffer) { 1603 std::span<const u8> inlined_buffer) {
1585 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1604 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1586 ClearDownload(subtract_interval); 1605 ClearDownload(subtract_interval);
@@ -1612,14 +1631,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
1612} 1631}
1613 1632
1614template <class P> 1633template <class P>
1615void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { 1634void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) {
1616 boost::container::small_vector<BufferCopy, 1> copies; 1635 boost::container::small_vector<BufferCopy, 1> copies;
1617 u64 total_size_bytes = 0; 1636 u64 total_size_bytes = 0;
1618 u64 largest_copy = 0; 1637 u64 largest_copy = 0;
1619 memory_tracker.ForEachDownloadRangeAndClear( 1638 memory_tracker.ForEachDownloadRangeAndClear(
1620 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1639 device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1621 const VAddr buffer_addr = buffer.CpuAddr(); 1640 const DAddr buffer_addr = buffer.CpuAddr();
1622 const auto add_download = [&](VAddr start, VAddr end) { 1641 const auto add_download = [&](DAddr start, DAddr end) {
1623 const u64 new_offset = start - buffer_addr; 1642 const u64 new_offset = start - buffer_addr;
1624 const u64 new_size = end - start; 1643 const u64 new_size = end - start;
1625 copies.push_back(BufferCopy{ 1644 copies.push_back(BufferCopy{
@@ -1634,8 +1653,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1634 largest_copy = std::max(largest_copy, new_size); 1653 largest_copy = std::max(largest_copy, new_size);
1635 }; 1654 };
1636 1655
1637 const VAddr start_address = cpu_addr_out; 1656 const DAddr start_address = device_addr_out;
1638 const VAddr end_address = start_address + range_size; 1657 const DAddr end_address = start_address + range_size;
1639 ForEachInRangeSet(common_ranges, start_address, range_size, add_download); 1658 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1640 const IntervalType subtract_interval{start_address, end_address}; 1659 const IntervalType subtract_interval{start_address, end_address};
1641 ClearDownload(subtract_interval); 1660 ClearDownload(subtract_interval);
@@ -1658,18 +1677,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1658 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); 1677 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
1659 runtime.Finish(); 1678 runtime.Finish();
1660 for (const BufferCopy& copy : copies) { 1679 for (const BufferCopy& copy : copies) {
1661 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1680 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1662 // Undo the modified offset 1681 // Undo the modified offset
1663 const u64 dst_offset = copy.dst_offset - download_staging.offset; 1682 const u64 dst_offset = copy.dst_offset - download_staging.offset;
1664 const u8* copy_mapped_memory = mapped_memory + dst_offset; 1683 const u8* copy_mapped_memory = mapped_memory + dst_offset;
1665 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); 1684 device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
1666 } 1685 }
1667 } else { 1686 } else {
1668 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 1687 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
1669 for (const BufferCopy& copy : copies) { 1688 for (const BufferCopy& copy : copies) {
1670 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 1689 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
1671 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1690 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1672 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); 1691 device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size);
1673 } 1692 }
1674 } 1693 }
1675} 1694}
@@ -1758,20 +1777,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1758 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); 1777 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1759 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; 1778 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
1760 1779
1761 const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); 1780 const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1762 if (!aligned_cpu_addr || size == 0) { 1781 if (!aligned_device_addr || size == 0) {
1763 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1782 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
1764 return NULL_BINDING; 1783 return NULL_BINDING;
1765 } 1784 }
1766 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1785 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1767 ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", 1786 ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}",
1768 cbuf_index); 1787 cbuf_index);
1769 // The end address used for size calculation does not need to be aligned 1788 // The end address used for size calculation does not need to be aligned
1770 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1789 const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::Memory::YUZU_PAGESIZE);
1771 1790
1772 const Binding binding{ 1791 const Binding binding{
1773 .cpu_addr = *aligned_cpu_addr, 1792 .device_addr = *aligned_device_addr,
1774 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), 1793 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr),
1775 .buffer_id = BufferId{}, 1794 .buffer_id = BufferId{},
1776 }; 1795 };
1777 return binding; 1796 return binding;
@@ -1780,15 +1799,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1780template <class P> 1799template <class P>
1781TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 1800TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
1782 PixelFormat format) { 1801 PixelFormat format) {
1783 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1802 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1784 TextureBufferBinding binding; 1803 TextureBufferBinding binding;
1785 if (!cpu_addr || size == 0) { 1804 if (!device_addr || size == 0) {
1786 binding.cpu_addr = 0; 1805 binding.device_addr = 0;
1787 binding.size = 0; 1806 binding.size = 0;
1788 binding.buffer_id = NULL_BUFFER_ID; 1807 binding.buffer_id = NULL_BUFFER_ID;
1789 binding.format = PixelFormat::Invalid; 1808 binding.format = PixelFormat::Invalid;
1790 } else { 1809 } else {
1791 binding.cpu_addr = *cpu_addr; 1810 binding.device_addr = *device_addr;
1792 binding.size = size; 1811 binding.size = size;
1793 binding.buffer_id = BufferId{}; 1812 binding.buffer_id = BufferId{};
1794 binding.format = format; 1813 binding.format = format;
@@ -1797,14 +1816,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr,
1797} 1816}
1798 1817
1799template <class P> 1818template <class P>
1800std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1819std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) {
1801 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1820 u8* const base_pointer = device_memory.GetPointer<u8>(device_addr);
1802 if (IsRangeGranular(cpu_addr, size) || 1821 if (IsRangeGranular(device_addr, size) ||
1803 base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { 1822 base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) {
1804 return std::span(base_pointer, size); 1823 return std::span(base_pointer, size);
1805 } else { 1824 } else {
1806 const std::span<u8> span = ImmediateBuffer(size); 1825 const std::span<u8> span = ImmediateBuffer(size);
1807 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1826 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
1808 return span; 1827 return span;
1809 } 1828 }
1810} 1829}
@@ -1828,13 +1847,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
1828template <class P> 1847template <class P>
1829std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { 1848std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
1830 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; 1849 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
1831 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); 1850 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr));
1832} 1851}
1833 1852
1834template <class P> 1853template <class P>
1835std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { 1854std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
1836 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; 1855 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
1837 return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); 1856 return std::make_pair(&buffer,
1857 buffer.Offset(channel_state->indirect_buffer_binding.device_addr));
1838} 1858}
1839 1859
1840} // namespace VideoCommon 1860} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index d6d696d8c..4074003e4 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -32,7 +32,6 @@
32#include "common/microprofile.h" 32#include "common/microprofile.h"
33#include "common/scope_exit.h" 33#include "common/scope_exit.h"
34#include "common/settings.h" 34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h" 35#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h" 36#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h" 37#include "video_core/delayed_destruction_ring.h"
@@ -41,7 +40,6 @@
41#include "video_core/engines/kepler_compute.h" 40#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h" 41#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h" 42#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h" 43#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h" 44#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h" 45#include "video_core/texture_cache/types.h"
@@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0};
94static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); 92static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
95 93
96struct Binding { 94struct Binding {
97 VAddr cpu_addr{}; 95 DAddr device_addr{};
98 u32 size{}; 96 u32 size{};
99 BufferId buffer_id; 97 BufferId buffer_id;
100}; 98};
@@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding {
104}; 102};
105 103
106static constexpr Binding NULL_BINDING{ 104static constexpr Binding NULL_BINDING{
107 .cpu_addr = 0, 105 .device_addr = 0,
108 .size = 0, 106 .size = 0,
109 .buffer_id = NULL_BUFFER_ID, 107 .buffer_id = NULL_BUFFER_ID,
110}; 108};
@@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
204 using Async_Buffer = typename P::Async_Buffer; 202 using Async_Buffer = typename P::Async_Buffer;
205 using MemoryTracker = typename P::MemoryTracker; 203 using MemoryTracker = typename P::MemoryTracker;
206 204
207 using IntervalCompare = std::less<VAddr>; 205 using IntervalCompare = std::less<DAddr>;
208 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; 206 using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
209 using IntervalAllocator = boost::fast_pool_allocator<VAddr>; 207 using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
210 using IntervalSet = boost::icl::interval_set<VAddr>; 208 using IntervalSet = boost::icl::interval_set<DAddr>;
211 using IntervalType = typename IntervalSet::interval_type; 209 using IntervalType = typename IntervalSet::interval_type;
212 210
213 template <typename Type> 211 template <typename Type>
@@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
230 228
231 using OverlapCombine = counter_add_functor<int>; 229 using OverlapCombine = counter_add_functor<int>;
232 using OverlapSection = boost::icl::inter_section<int>; 230 using OverlapSection = boost::icl::inter_section<int>;
233 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 231 using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
234 232
235 struct OverlapResult { 233 struct OverlapResult {
236 boost::container::small_vector<BufferId, 16> ids; 234 boost::container::small_vector<BufferId, 16> ids;
237 VAddr begin; 235 DAddr begin;
238 VAddr end; 236 DAddr end;
239 bool has_stream_leap = false; 237 bool has_stream_leap = false;
240 }; 238 };
241 239
242public: 240public:
243 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 241 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
244 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
245 242
246 void TickFrame(); 243 void TickFrame();
247 244
248 void WriteMemory(VAddr cpu_addr, u64 size); 245 void WriteMemory(DAddr device_addr, u64 size);
249 246
250 void CachedWriteMemory(VAddr cpu_addr, u64 size); 247 void CachedWriteMemory(DAddr device_addr, u64 size);
251 248
252 bool OnCPUWrite(VAddr cpu_addr, u64 size); 249 bool OnCPUWrite(DAddr device_addr, u64 size);
253 250
254 void DownloadMemory(VAddr cpu_addr, u64 size); 251 void DownloadMemory(DAddr device_addr, u64 size);
255 252
256 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 253 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size);
257 254
258 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); 255 bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
259 256
260 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); 257 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
261 258
@@ -300,7 +297,7 @@ public:
300 ObtainBufferSynchronize sync_info, 297 ObtainBufferSynchronize sync_info,
301 ObtainBufferOperation post_op); 298 ObtainBufferOperation post_op);
302 299
303 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, 300 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size,
304 ObtainBufferSynchronize sync_info, 301 ObtainBufferSynchronize sync_info,
305 ObtainBufferOperation post_op); 302 ObtainBufferOperation post_op);
306 void FlushCachedWrites(); 303 void FlushCachedWrites();
@@ -326,13 +323,13 @@ public:
326 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); 323 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
327 324
328 /// Return true when a CPU region is modified from the GPU 325 /// Return true when a CPU region is modified from the GPU
329 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 326 [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size);
330 327
331 /// Return true when a region is registered on the cache 328 /// Return true when a region is registered on the cache
332 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); 329 [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size);
333 330
334 /// Return true when a CPU region is modified from the CPU 331 /// Return true when a CPU region is modified from the CPU
335 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 332 [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size);
336 333
337 void SetDrawIndirect( 334 void SetDrawIndirect(
338 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { 335 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
@@ -366,9 +363,9 @@ private:
366 } 363 }
367 364
368 template <typename Func> 365 template <typename Func>
369 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { 366 void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) {
370 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); 367 const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
371 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { 368 for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
372 const BufferId buffer_id = page_table[page]; 369 const BufferId buffer_id = page_table[page];
373 if (!buffer_id) { 370 if (!buffer_id) {
374 ++page; 371 ++page;
@@ -377,15 +374,15 @@ private:
377 Buffer& buffer = slot_buffers[buffer_id]; 374 Buffer& buffer = slot_buffers[buffer_id];
378 func(buffer_id, buffer); 375 func(buffer_id, buffer);
379 376
380 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 377 const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
381 page = Common::DivCeil(end_addr, CACHING_PAGESIZE); 378 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
382 } 379 }
383 } 380 }
384 381
385 template <typename Func> 382 template <typename Func>
386 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { 383 void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
387 const VAddr start_address = cpu_addr; 384 const DAddr start_address = device_addr;
388 const VAddr end_address = start_address + size; 385 const DAddr end_address = start_address + size;
389 const IntervalType search_interval{start_address, end_address}; 386 const IntervalType search_interval{start_address, end_address};
390 auto it = current_range.lower_bound(search_interval); 387 auto it = current_range.lower_bound(search_interval);
391 if (it == current_range.end()) { 388 if (it == current_range.end()) {
@@ -393,8 +390,8 @@ private:
393 } 390 }
394 auto end_it = current_range.upper_bound(search_interval); 391 auto end_it = current_range.upper_bound(search_interval);
395 for (; it != end_it; it++) { 392 for (; it != end_it; it++) {
396 VAddr inter_addr_end = it->upper(); 393 DAddr inter_addr_end = it->upper();
397 VAddr inter_addr = it->lower(); 394 DAddr inter_addr = it->lower();
398 if (inter_addr_end > end_address) { 395 if (inter_addr_end > end_address) {
399 inter_addr_end = end_address; 396 inter_addr_end = end_address;
400 } 397 }
@@ -406,10 +403,10 @@ private:
406 } 403 }
407 404
408 template <typename Func> 405 template <typename Func>
409 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, 406 void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
410 Func&& func) { 407 Func&& func) {
411 const VAddr start_address = cpu_addr; 408 const DAddr start_address = device_addr;
412 const VAddr end_address = start_address + size; 409 const DAddr end_address = start_address + size;
413 const IntervalType search_interval{start_address, end_address}; 410 const IntervalType search_interval{start_address, end_address};
414 auto it = current_range.lower_bound(search_interval); 411 auto it = current_range.lower_bound(search_interval);
415 if (it == current_range.end()) { 412 if (it == current_range.end()) {
@@ -418,8 +415,8 @@ private:
418 auto end_it = current_range.upper_bound(search_interval); 415 auto end_it = current_range.upper_bound(search_interval);
419 for (; it != end_it; it++) { 416 for (; it != end_it; it++) {
420 auto& inter = it->first; 417 auto& inter = it->first;
421 VAddr inter_addr_end = inter.upper(); 418 DAddr inter_addr_end = inter.upper();
422 VAddr inter_addr = inter.lower(); 419 DAddr inter_addr = inter.lower();
423 if (inter_addr_end > end_address) { 420 if (inter_addr_end > end_address) {
424 inter_addr_end = end_address; 421 inter_addr_end = end_address;
425 } 422 }
@@ -451,9 +448,9 @@ private:
451 } while (any_removals); 448 } while (any_removals);
452 } 449 }
453 450
454 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 451 static bool IsRangeGranular(DAddr device_addr, size_t size) {
455 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == 452 return (device_addr & ~Core::Memory::YUZU_PAGEMASK) ==
456 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); 453 ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
457 } 454 }
458 455
459 void RunGarbageCollector(); 456 void RunGarbageCollector();
@@ -508,15 +505,15 @@ private:
508 505
509 void UpdateComputeTextureBuffers(); 506 void UpdateComputeTextureBuffers();
510 507
511 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 508 void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size);
512 509
513 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 510 [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size);
514 511
515 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); 512 [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size);
516 513
517 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); 514 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
518 515
519 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); 516 [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size);
520 517
521 void Register(BufferId buffer_id); 518 void Register(BufferId buffer_id);
522 519
@@ -527,7 +524,7 @@ private:
527 524
528 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; 525 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
529 526
530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 527 bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size);
531 528
532 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 529 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
533 std::span<BufferCopy> copies); 530 std::span<BufferCopy> copies);
@@ -539,7 +536,7 @@ private:
539 536
540 void DownloadBufferMemory(Buffer& buffer_id); 537 void DownloadBufferMemory(Buffer& buffer_id);
541 538
542 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); 539 void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size);
543 540
544 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); 541 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
545 542
@@ -549,7 +546,7 @@ private:
549 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 546 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
550 PixelFormat format); 547 PixelFormat format);
551 548
552 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 549 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size);
553 550
554 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 551 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
555 552
@@ -557,11 +554,10 @@ private:
557 554
558 void ClearDownload(IntervalType subtract_interval); 555 void ClearDownload(IntervalType subtract_interval);
559 556
560 void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 557 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
561 std::span<const u8> inlined_buffer); 558 std::span<const u8> inlined_buffer);
562 559
563 VideoCore::RasterizerInterface& rasterizer; 560 Tegra::MaxwellDeviceMemoryManager& device_memory;
564 Core::Memory::Memory& cpu_memory;
565 561
566 SlotVector<Buffer> slot_buffers; 562 SlotVector<Buffer> slot_buffers;
567 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 563 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -598,7 +594,7 @@ private:
598 u64 critical_memory = 0; 594 u64 critical_memory = 0;
599 BufferId inline_buffer_id; 595 BufferId inline_buffer_id;
600 596
601 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 597 std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
602 Common::ScratchBuffer<u8> tmp_buffer; 598 Common::ScratchBuffer<u8> tmp_buffer;
603}; 599};
604 600
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
index 6c1c8287b..c95eed1f6 100644
--- a/src/video_core/buffer_cache/memory_tracker_base.h
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -17,19 +17,19 @@
17 17
18namespace VideoCommon { 18namespace VideoCommon {
19 19
20template <class RasterizerInterface> 20template <typename DeviceTracker>
21class MemoryTrackerBase { 21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39; 22 static constexpr size_t MAX_CPU_PAGE_BITS = 34;
23 static constexpr size_t HIGHER_PAGE_BITS = 22; 23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; 24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; 25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); 26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32; 27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; 28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; 29 using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>;
30 30
31public: 31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} 32 MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {}
33 ~MemoryTrackerBase() = default; 33 ~MemoryTrackerBase() = default;
34 34
35 /// Returns the inclusive CPU modified range in a begin end pair 35 /// Returns the inclusive CPU modified range in a begin end pair
@@ -74,7 +74,7 @@ public:
74 }); 74 });
75 } 75 }
76 76
77 /// Mark region as CPU modified, notifying the rasterizer about this change 77 /// Mark region as CPU modified, notifying the device_tracker about this change
78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
79 IteratePages<true>(dirty_cpu_addr, query_size, 79 IteratePages<true>(dirty_cpu_addr, query_size,
80 [](Manager* manager, u64 offset, size_t size) { 80 [](Manager* manager, u64 offset, size_t size) {
@@ -83,7 +83,7 @@ public:
83 }); 83 });
84 } 84 }
85 85
86 /// Unmark region as CPU modified, notifying the rasterizer about this change 86 /// Unmark region as CPU modified, notifying the device_tracker about this change
87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
88 IteratePages<true>(dirty_cpu_addr, query_size, 88 IteratePages<true>(dirty_cpu_addr, query_size,
89 [](Manager* manager, u64 offset, size_t size) { 89 [](Manager* manager, u64 offset, size_t size) {
@@ -139,7 +139,7 @@ public:
139 }); 139 });
140 } 140 }
141 141
142 /// Flushes cached CPU writes, and notify the rasterizer about the deltas 142 /// Flushes cached CPU writes, and notify the device_tracker about the deltas
143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { 143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
144 IteratePages<false>(query_cpu_addr, query_size, 144 IteratePages<false>(query_cpu_addr, query_size,
145 [](Manager* manager, [[maybe_unused]] u64 offset, 145 [](Manager* manager, [[maybe_unused]] u64 offset,
@@ -280,7 +280,7 @@ private:
280 manager_pool.emplace_back(); 280 manager_pool.emplace_back();
281 auto& last_pool = manager_pool.back(); 281 auto& last_pool = manager_pool.back();
282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { 282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
283 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); 283 new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE);
284 free_managers.push_back(&last_pool[i]); 284 free_managers.push_back(&last_pool[i]);
285 } 285 }
286 return on_return(); 286 return on_return();
@@ -293,7 +293,7 @@ private:
293 293
294 std::unordered_set<u32> cached_pages; 294 std::unordered_set<u32> cached_pages;
295 295
296 RasterizerInterface* rasterizer = nullptr; 296 DeviceTracker* device_tracker = nullptr;
297}; 297};
298 298
299} // namespace VideoCommon 299} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
index a336bde41..56ab4f5f1 100644
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -163,11 +163,11 @@ struct Words {
163 WordsArray<stack_words> preflushable; 163 WordsArray<stack_words> preflushable;
164}; 164};
165 165
166template <class RasterizerInterface, size_t stack_words = 1> 166template <class DeviceTracker, size_t stack_words = 1>
167class WordManager { 167class WordManager {
168public: 168public:
169 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) 169 explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes)
170 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} 170 : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {}
171 171
172 explicit WordManager() = default; 172 explicit WordManager() = default;
173 173
@@ -279,7 +279,7 @@ public:
279 } 279 }
280 280
281 /** 281 /**
282 * Loop over each page in the given range, turn off those bits and notify the rasterizer if 282 * Loop over each page in the given range, turn off those bits and notify the tracker if
283 * needed. Call the given function on each turned off range. 283 * needed. Call the given function on each turned off range.
284 * 284 *
285 * @param query_cpu_range Base CPU address to loop over 285 * @param query_cpu_range Base CPU address to loop over
@@ -459,26 +459,26 @@ private:
459 } 459 }
460 460
461 /** 461 /**
462 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer 462 * Notify tracker about changes in the CPU tracking state of a word in the buffer
463 * 463 *
464 * @param word_index Index to the word to notify to the rasterizer 464 * @param word_index Index to the word to notify to the tracker
465 * @param current_bits Current state of the word 465 * @param current_bits Current state of the word
466 * @param new_bits New state of the word 466 * @param new_bits New state of the word
467 * 467 *
468 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 468 * @tparam add_to_tracker True when the tracker should start tracking the new pages
469 */ 469 */
470 template <bool add_to_rasterizer> 470 template <bool add_to_tracker>
471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { 471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
472 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 472 u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
474 IteratePages(changed_bits, [&](size_t offset, size_t size) { 474 IteratePages(changed_bits, [&](size_t offset, size_t size) {
475 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, 475 tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
476 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); 476 size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1);
477 }); 477 });
478 } 478 }
479 479
480 VAddr cpu_addr = 0; 480 VAddr cpu_addr = 0;
481 RasterizerInterface* rasterizer = nullptr; 481 DeviceTracker* tracker = nullptr;
482 Words<stack_words> words; 482 Words<stack_words> words;
483}; 483};
484 484
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 58ce0d8c2..d461c5be8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -5,10 +5,10 @@
5#include "common/microprofile.h" 5#include "common/microprofile.h"
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/memory.h"
9#include "video_core/dma_pusher.h" 8#include "video_core/dma_pusher.h"
10#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
11#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/guest_memory.h"
12#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
13 13
14namespace Tegra { 14namespace Tegra {
@@ -85,15 +85,15 @@ bool DmaPusher::Step() {
85 } 85 }
86 } 86 }
87 const auto safe_process = [&] { 87 const auto safe_process = [&] {
88 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 88 Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
89 Core::Memory::GuestMemoryFlags::SafeRead> 89 Tegra::Memory::GuestMemoryFlags::SafeRead>
90 headers(memory_manager, dma_state.dma_get, command_list_header.size, 90 headers(memory_manager, dma_state.dma_get, command_list_header.size,
91 &command_headers); 91 &command_headers);
92 ProcessCommands(headers); 92 ProcessCommands(headers);
93 }; 93 };
94 const auto unsafe_process = [&] { 94 const auto unsafe_process = [&] {
95 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 95 Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
96 Core::Memory::GuestMemoryFlags::UnsafeRead> 96 Tegra::Memory::GuestMemoryFlags::UnsafeRead>
97 headers(memory_manager, dma_state.dma_get, command_list_header.size, 97 headers(memory_manager, dma_state.dma_get, command_list_header.size,
98 &command_headers); 98 &command_headers);
99 ProcessCommands(headers); 99 ProcessCommands(headers);
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index bc64d4486..e5cc04ec4 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -5,8 +5,8 @@
5 5
6#include "common/algorithm.h" 6#include "common/algorithm.h"
7#include "common/assert.h" 7#include "common/assert.h"
8#include "core/memory.h"
9#include "video_core/engines/engine_upload.h" 8#include "video_core/engines/engine_upload.h"
9#include "video_core/guest_memory.h"
10#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/textures/decoders.h" 12#include "video_core/textures/decoders.h"
@@ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) {
68 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, 68 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
69 regs.dest.BlockHeight(), regs.dest.BlockDepth()); 69 regs.dest.BlockHeight(), regs.dest.BlockDepth());
70 70
71 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 71 Tegra::Memory::GpuGuestMemoryScoped<u8,
72 Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
72 tmp(memory_manager, address, dst_size, &tmp_buffer); 73 tmp(memory_manager, address, dst_size, &tmp_buffer);
73 74
74 Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, 75 Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height,
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 56fbff306..4bf461fb0 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -11,6 +11,7 @@
11#include "core/memory.h" 11#include "core/memory.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 13#include "video_core/engines/maxwell_dma.h"
14#include "video_core/guest_memory.h"
14#include "video_core/memory_manager.h" 15#include "video_core/memory_manager.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
16#include "video_core/textures/decoders.h" 17#include "video_core/textures/decoders.h"
@@ -133,8 +134,8 @@ void MaxwellDMA::Launch() {
133 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 134 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
134 read_buffer.resize_destructive(16); 135 read_buffer.resize_destructive(16);
135 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 136 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
136 Core::Memory::GpuGuestMemoryScoped< 137 Tegra::Memory::GpuGuestMemoryScoped<
137 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 138 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
138 tmp_write_buffer(memory_manager, 139 tmp_write_buffer(memory_manager,
139 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 140 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
140 16, &read_buffer); 141 16, &read_buffer);
@@ -146,16 +147,16 @@ void MaxwellDMA::Launch() {
146 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 147 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
147 read_buffer.resize_destructive(16); 148 read_buffer.resize_destructive(16);
148 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 149 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
149 Core::Memory::GpuGuestMemoryScoped< 150 Tegra::Memory::GpuGuestMemoryScoped<
150 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 151 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
151 tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); 152 tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer);
152 tmp_write_buffer.SetAddressAndSize( 153 tmp_write_buffer.SetAddressAndSize(
153 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); 154 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16);
154 } 155 }
155 } else { 156 } else {
156 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 157 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
157 Core::Memory::GpuGuestMemoryScoped< 158 Tegra::Memory::GpuGuestMemoryScoped<
158 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 159 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
159 tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, 160 tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in,
160 &read_buffer); 161 &read_buffer);
161 tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); 162 tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in);
@@ -226,9 +227,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
226 227
227 const size_t dst_size = dst_operand.pitch * regs.line_count; 228 const size_t dst_size = dst_operand.pitch * regs.line_count;
228 229
229 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 230 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
230 memory_manager, src_operand.address, src_size, &read_buffer); 231 memory_manager, src_operand.address, src_size, &read_buffer);
231 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> 232 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite>
232 tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); 233 tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer);
233 234
234 UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, 235 UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
@@ -290,9 +291,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
290 291
291 GPUVAddr src_addr = regs.offset_in; 292 GPUVAddr src_addr = regs.offset_in;
292 GPUVAddr dst_addr = regs.offset_out; 293 GPUVAddr dst_addr = regs.offset_out;
293 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 294 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
294 memory_manager, src_addr, src_size, &read_buffer); 295 memory_manager, src_addr, src_size, &read_buffer);
295 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> 296 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite>
296 tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); 297 tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer);
297 298
298 // If the input is linear and the output is tiled, swizzle the input and copy it over. 299 // If the input is linear and the output is tiled, swizzle the input and copy it over.
@@ -344,9 +345,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
344 345
345 intermediate_buffer.resize_destructive(mid_buffer_size); 346 intermediate_buffer.resize_destructive(mid_buffer_size);
346 347
347 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 348 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
348 memory_manager, regs.offset_in, src_size, &read_buffer); 349 memory_manager, regs.offset_in, src_size, &read_buffer);
349 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 350 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
350 tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); 351 tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer);
351 352
352 UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, 353 UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height,
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
index 67ce9134b..b67589daf 100644
--- a/src/video_core/engines/sw_blitter/blitter.cpp
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -11,6 +11,7 @@
11#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/surface.h" 12#include "video_core/surface.h"
13#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
14#include "video_core/guest_memory.h"
14 15
15namespace Tegra { 16namespace Tegra {
16class MemoryManager; 17class MemoryManager;
@@ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
160 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); 161 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
161 const size_t src_size = get_surface_size(src, src_bytes_per_pixel); 162 const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
162 163
163 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( 164 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer(
164 memory_manager, src.Address(), src_size, &impl->tmp_buffer); 165 memory_manager, src.Address(), src_size, &impl->tmp_buffer);
165 166
166 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; 167 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
@@ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
220 } 221 }
221 222
222 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); 223 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
223 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> 224 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite>
224 tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); 225 tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer);
225 226
226 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { 227 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index 5f3bffcab..856f4bd52 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -14,7 +14,7 @@ namespace Tegra {
14 * Struct describing framebuffer configuration 14 * Struct describing framebuffer configuration
15 */ 15 */
16struct FramebufferConfig { 16struct FramebufferConfig {
17 VAddr address{}; 17 DAddr address{};
18 u32 offset{}; 18 u32 offset{};
19 u32 width{}; 19 u32 width{};
20 u32 height{}; 20 u32 height{};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 1e915682f..5f780507b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -34,6 +34,8 @@
34#include "video_core/renderer_base.h" 34#include "video_core/renderer_base.h"
35#include "video_core/shader_notify.h" 35#include "video_core/shader_notify.h"
36 36
37#pragma optimize("", off)
38
37namespace Tegra { 39namespace Tegra {
38 40
39struct GPU::Impl { 41struct GPU::Impl {
@@ -95,8 +97,8 @@ struct GPU::Impl {
95 97
96 /// Synchronizes CPU writes with Host GPU memory. 98 /// Synchronizes CPU writes with Host GPU memory.
97 void InvalidateGPUCache() { 99 void InvalidateGPUCache() {
98 std::function<void(VAddr, size_t)> callback_writes( 100 std::function<void(PAddr, size_t)> callback_writes(
99 [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); 101 [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
100 system.GatherGPUDirtyMemory(callback_writes); 102 system.GatherGPUDirtyMemory(callback_writes);
101 } 103 }
102 104
@@ -279,11 +281,11 @@ struct GPU::Impl {
279 } 281 }
280 282
281 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 283 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
282 void FlushRegion(VAddr addr, u64 size) { 284 void FlushRegion(DAddr addr, u64 size) {
283 gpu_thread.FlushRegion(addr, size); 285 gpu_thread.FlushRegion(addr, size);
284 } 286 }
285 287
286 VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { 288 VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
287 auto raster_area = rasterizer->GetFlushArea(addr, size); 289 auto raster_area = rasterizer->GetFlushArea(addr, size);
288 if (raster_area.preemtive) { 290 if (raster_area.preemtive) {
289 return raster_area; 291 return raster_area;
@@ -299,16 +301,16 @@ struct GPU::Impl {
299 } 301 }
300 302
301 /// Notify rasterizer that any caches of the specified region should be invalidated 303 /// Notify rasterizer that any caches of the specified region should be invalidated
302 void InvalidateRegion(VAddr addr, u64 size) { 304 void InvalidateRegion(DAddr addr, u64 size) {
303 gpu_thread.InvalidateRegion(addr, size); 305 gpu_thread.InvalidateRegion(addr, size);
304 } 306 }
305 307
306 bool OnCPUWrite(VAddr addr, u64 size) { 308 bool OnCPUWrite(DAddr addr, u64 size) {
307 return rasterizer->OnCPUWrite(addr, size); 309 return rasterizer->OnCPUWrite(addr, size);
308 } 310 }
309 311
310 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 312 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
311 void FlushAndInvalidateRegion(VAddr addr, u64 size) { 313 void FlushAndInvalidateRegion(DAddr addr, u64 size) {
312 gpu_thread.FlushAndInvalidateRegion(addr, size); 314 gpu_thread.FlushAndInvalidateRegion(addr, size);
313 } 315 }
314 316
@@ -437,7 +439,7 @@ void GPU::OnCommandListEnd() {
437 impl->OnCommandListEnd(); 439 impl->OnCommandListEnd();
438} 440}
439 441
440u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 442u64 GPU::RequestFlush(DAddr addr, std::size_t size) {
441 return impl->RequestSyncOperation( 443 return impl->RequestSyncOperation(
442 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); 444 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
443} 445}
@@ -557,23 +559,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
557 impl->SwapBuffers(framebuffer); 559 impl->SwapBuffers(framebuffer);
558} 560}
559 561
560VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { 562VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
561 return impl->OnCPURead(addr, size); 563 return impl->OnCPURead(addr, size);
562} 564}
563 565
564void GPU::FlushRegion(VAddr addr, u64 size) { 566void GPU::FlushRegion(DAddr addr, u64 size) {
565 impl->FlushRegion(addr, size); 567 impl->FlushRegion(addr, size);
566} 568}
567 569
568void GPU::InvalidateRegion(VAddr addr, u64 size) { 570void GPU::InvalidateRegion(DAddr addr, u64 size) {
569 impl->InvalidateRegion(addr, size); 571 impl->InvalidateRegion(addr, size);
570} 572}
571 573
572bool GPU::OnCPUWrite(VAddr addr, u64 size) { 574bool GPU::OnCPUWrite(DAddr addr, u64 size) {
573 return impl->OnCPUWrite(addr, size); 575 return impl->OnCPUWrite(addr, size);
574} 576}
575 577
576void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 578void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) {
577 impl->FlushAndInvalidateRegion(addr, size); 579 impl->FlushAndInvalidateRegion(addr, size);
578} 580}
579 581
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba2838b89..b3c1d15bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -158,7 +158,7 @@ public:
158 void InitAddressSpace(Tegra::MemoryManager& memory_manager); 158 void InitAddressSpace(Tegra::MemoryManager& memory_manager);
159 159
160 /// Request a host GPU memory flush from the CPU. 160 /// Request a host GPU memory flush from the CPU.
161 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 161 [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size);
162 162
163 /// Obtains current flush request fence id. 163 /// Obtains current flush request fence id.
164 [[nodiscard]] u64 CurrentSyncRequestFence() const; 164 [[nodiscard]] u64 CurrentSyncRequestFence() const;
@@ -242,20 +242,20 @@ public:
242 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 242 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
243 243
244 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 244 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
245 [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); 245 [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size);
246 246
247 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 247 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
248 void FlushRegion(VAddr addr, u64 size); 248 void FlushRegion(DAddr addr, u64 size);
249 249
250 /// Notify rasterizer that any caches of the specified region should be invalidated 250 /// Notify rasterizer that any caches of the specified region should be invalidated
251 void InvalidateRegion(VAddr addr, u64 size); 251 void InvalidateRegion(DAddr addr, u64 size);
252 252
253 /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is 253 /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
254 /// sensible, false otherwise 254 /// sensible, false otherwise
255 bool OnCPUWrite(VAddr addr, u64 size); 255 bool OnCPUWrite(DAddr addr, u64 size);
256 256
257 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 257 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
258 void FlushAndInvalidateRegion(VAddr addr, u64 size); 258 void FlushAndInvalidateRegion(DAddr addr, u64 size);
259 259
260private: 260private:
261 struct Impl; 261 struct Impl;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 2f0f9f593..788d4f61e 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
82 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 82 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
83} 83}
84 84
85void ThreadManager::FlushRegion(VAddr addr, u64 size) { 85void ThreadManager::FlushRegion(DAddr addr, u64 size) {
86 if (!is_async) { 86 if (!is_async) {
87 // Always flush with synchronous GPU mode 87 // Always flush with synchronous GPU mode
88 PushCommand(FlushRegionCommand(addr, size)); 88 PushCommand(FlushRegionCommand(addr, size));
@@ -101,11 +101,11 @@ void ThreadManager::TickGPU() {
101 PushCommand(GPUTickCommand()); 101 PushCommand(GPUTickCommand());
102} 102}
103 103
104void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 104void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
105 rasterizer->OnCacheInvalidation(addr, size); 105 rasterizer->OnCacheInvalidation(addr, size);
106} 106}
107 107
108void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 108void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) {
109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
110 rasterizer->OnCacheInvalidation(addr, size); 110 rasterizer->OnCacheInvalidation(addr, size);
111} 111}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 43940bd6d..2de25e9ef 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -54,26 +54,26 @@ struct SwapBuffersCommand final {
54 54
55/// Command to signal to the GPU thread to flush a region 55/// Command to signal to the GPU thread to flush a region
56struct FlushRegionCommand final { 56struct FlushRegionCommand final {
57 explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} 57 explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
58 58
59 VAddr addr; 59 DAddr addr;
60 u64 size; 60 u64 size;
61}; 61};
62 62
63/// Command to signal to the GPU thread to invalidate a region 63/// Command to signal to the GPU thread to invalidate a region
64struct InvalidateRegionCommand final { 64struct InvalidateRegionCommand final {
65 explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} 65 explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
66 66
67 VAddr addr; 67 DAddr addr;
68 u64 size; 68 u64 size;
69}; 69};
70 70
71/// Command to signal to the GPU thread to flush and invalidate a region 71/// Command to signal to the GPU thread to flush and invalidate a region
72struct FlushAndInvalidateRegionCommand final { 72struct FlushAndInvalidateRegionCommand final {
73 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) 73 explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_)
74 : addr{addr_}, size{size_} {} 74 : addr{addr_}, size{size_} {}
75 75
76 VAddr addr; 76 DAddr addr;
77 u64 size; 77 u64 size;
78}; 78};
79 79
@@ -122,13 +122,13 @@ public:
122 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 122 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
123 123
124 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 124 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
125 void FlushRegion(VAddr addr, u64 size); 125 void FlushRegion(DAddr addr, u64 size);
126 126
127 /// Notify rasterizer that any caches of the specified region should be invalidated 127 /// Notify rasterizer that any caches of the specified region should be invalidated
128 void InvalidateRegion(VAddr addr, u64 size); 128 void InvalidateRegion(DAddr addr, u64 size);
129 129
130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
131 void FlushAndInvalidateRegion(VAddr addr, u64 size); 131 void FlushAndInvalidateRegion(DAddr addr, u64 size);
132 132
133 void TickGPU(); 133 void TickGPU();
134 134
diff --git a/src/video_core/guest_memory.h b/src/video_core/guest_memory.h
new file mode 100644
index 000000000..a2132f7ea
--- /dev/null
+++ b/src/video_core/guest_memory.h
@@ -0,0 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <iterator>
7#include <memory>
8#include <optional>
9#include <span>
10#include <vector>
11
12#include "common/scratch_buffer.h"
13#include "core/guest_memory.h"
14#include "video_core/memory_manager.h"
15
16namespace Tegra::Memory {
17
18using GuestMemoryFlags = Core::Memory::GuestMemoryFlags;
19
20template <typename T, GuestMemoryFlags FLAGS>
21using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
22template <typename T, GuestMemoryFlags FLAGS>
23using DeviceGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
24template <typename T, GuestMemoryFlags FLAGS>
25using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>;
26template <typename T, GuestMemoryFlags FLAGS>
27using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
28
29} // namespace Tegra::Memory
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h
index 2fb77605e..a406ce965 100644
--- a/src/video_core/host1x/gpu_device_memory_manager.h
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@@ -1,6 +1,8 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#pragma once
5
4#include "core/device_memory_manager.h" 6#include "core/device_memory_manager.h"
5 7
6namespace VideoCore { 8namespace VideoCore {
@@ -12,8 +14,8 @@ namespace Tegra {
12struct MaxwellDeviceMethods; 14struct MaxwellDeviceMethods;
13 15
14struct MaxwellDeviceTraits { 16struct MaxwellDeviceTraits {
15 static constexpr bool supports_pinning = true; 17 static constexpr bool supports_pinning = false;
16 static constexpr size_t device_virtual_bits = 34; 18 static constexpr size_t device_virtual_bits = 32;
17 using DeviceInterface = typename VideoCore::RasterizerInterface; 19 using DeviceInterface = typename VideoCore::RasterizerInterface;
18 using DeviceMethods = typename MaxwellDeviceMethods; 20 using DeviceMethods = typename MaxwellDeviceMethods;
19}; 21};
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d16040613..82f7a1c3b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -7,22 +7,24 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/device_memory.h"
11#include "core/hle/kernel/k_page_table.h" 10#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 11#include "core/hle/kernel/k_process.h"
12#include "video_core/guest_memory.h"
13#include "video_core/host1x/host1x.h"
13#include "video_core/invalidation_accumulator.h" 14#include "video_core/invalidation_accumulator.h"
14#include "video_core/memory_manager.h" 15#include "video_core/memory_manager.h"
15#include "video_core/rasterizer_interface.h" 16#include "video_core/rasterizer_interface.h"
16#include "video_core/renderer_base.h" 17#include "video_core/renderer_base.h"
17 18
19
18namespace Tegra { 20namespace Tegra {
19using Core::Memory::GuestMemoryFlags; 21using Tegra::Memory::GuestMemoryFlags;
20 22
21std::atomic<size_t> MemoryManager::unique_identifier_generator{}; 23std::atomic<size_t> MemoryManager::unique_identifier_generator{};
22 24
23MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, 25MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
24 u64 page_bits_) 26 u64 page_bits_)
25 : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, 27 : system{system_}, memory{system.Host1x().MemoryManager()},
26 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, 28 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
27 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, 29 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
28 page_bits != big_page_bits ? page_bits : 0}, 30 page_bits != big_page_bits ? page_bits : 0},
@@ -42,7 +44,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
42 big_page_table_mask = big_page_table_size - 1; 44 big_page_table_mask = big_page_table_size - 1;
43 45
44 big_entries.resize(big_page_table_size / 32, 0); 46 big_entries.resize(big_page_table_size / 32, 0);
45 big_page_table_cpu.resize(big_page_table_size); 47 big_page_table_dev.resize(big_page_table_size);
46 big_page_continuous.resize(big_page_table_size / continuous_bits, 0); 48 big_page_continuous.resize(big_page_table_size / continuous_bits, 0);
47 entries.resize(page_table_size / 32, 0); 49 entries.resize(page_table_size / 32, 0);
48} 50}
@@ -100,7 +102,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu
100} 102}
101 103
102template <MemoryManager::EntryType entry_type> 104template <MemoryManager::EntryType entry_type>
103GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 105GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
104 PTEKind kind) { 106 PTEKind kind) {
105 [[maybe_unused]] u64 remaining_size{size}; 107 [[maybe_unused]] u64 remaining_size{size};
106 if constexpr (entry_type == EntryType::Mapped) { 108 if constexpr (entry_type == EntryType::Mapped) {
@@ -114,9 +116,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
114 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); 116 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
115 } 117 }
116 if constexpr (entry_type == EntryType::Mapped) { 118 if constexpr (entry_type == EntryType::Mapped) {
117 const VAddr current_cpu_addr = cpu_addr + offset; 119 const DAddr current_dev_addr = dev_addr + offset;
118 const auto index = PageEntryIndex<false>(current_gpu_addr); 120 const auto index = PageEntryIndex<false>(current_gpu_addr);
119 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); 121 const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits);
120 page_table[index] = sub_value; 122 page_table[index] = sub_value;
121 } 123 }
122 remaining_size -= page_size; 124 remaining_size -= page_size;
@@ -126,7 +128,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
126} 128}
127 129
128template <MemoryManager::EntryType entry_type> 130template <MemoryManager::EntryType entry_type>
129GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, 131GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr,
130 size_t size, PTEKind kind) { 132 size_t size, PTEKind kind) {
131 [[maybe_unused]] u64 remaining_size{size}; 133 [[maybe_unused]] u64 remaining_size{size};
132 for (u64 offset{}; offset < size; offset += big_page_size) { 134 for (u64 offset{}; offset < size; offset += big_page_size) {
@@ -137,20 +139,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
137 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); 139 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
138 } 140 }
139 if constexpr (entry_type == EntryType::Mapped) { 141 if constexpr (entry_type == EntryType::Mapped) {
140 const VAddr current_cpu_addr = cpu_addr + offset; 142 const DAddr current_dev_addr = dev_addr + offset;
141 const auto index = PageEntryIndex<true>(current_gpu_addr); 143 const auto index = PageEntryIndex<true>(current_gpu_addr);
142 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); 144 const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits);
143 big_page_table_cpu[index] = sub_value; 145 big_page_table_dev[index] = sub_value;
144 const bool is_continuous = ([&] { 146 const bool is_continuous = ([&] {
145 uintptr_t base_ptr{ 147 uintptr_t base_ptr{
146 reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; 148 reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))};
147 if (base_ptr == 0) { 149 if (base_ptr == 0) {
148 return false; 150 return false;
149 } 151 }
150 for (VAddr start_cpu = current_cpu_addr + page_size; 152 for (DAddr start_cpu = current_dev_addr + page_size;
151 start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { 153 start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) {
152 base_ptr += page_size; 154 base_ptr += page_size;
153 auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); 155 auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu));
154 if (next_ptr == 0 || base_ptr != next_ptr) { 156 if (next_ptr == 0 || base_ptr != next_ptr) {
155 return false; 157 return false;
156 } 158 }
@@ -172,12 +174,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
172 rasterizer = rasterizer_; 174 rasterizer = rasterizer_;
173} 175}
174 176
175GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, 177GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind,
176 bool is_big_pages) { 178 bool is_big_pages) {
177 if (is_big_pages) [[likely]] { 179 if (is_big_pages) [[likely]] {
178 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); 180 return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind);
179 } 181 }
180 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); 182 return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind);
181} 183}
182 184
183GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { 185GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
@@ -202,7 +204,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
202 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 204 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
203} 205}
204 206
205std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { 207std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
206 if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { 208 if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] {
207 return std::nullopt; 209 return std::nullopt;
208 } 210 }
@@ -211,17 +213,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
211 return std::nullopt; 213 return std::nullopt;
212 } 214 }
213 215
214 const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) 216 const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)])
215 << cpu_page_bits; 217 << cpu_page_bits;
216 return cpu_addr_base + (gpu_addr & page_mask); 218 return dev_addr_base + (gpu_addr & page_mask);
217 } 219 }
218 220
219 const VAddr cpu_addr_base = 221 const DAddr dev_addr_base =
220 static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; 222 static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits;
221 return cpu_addr_base + (gpu_addr & big_page_mask); 223 return dev_addr_base + (gpu_addr & big_page_mask);
222} 224}
223 225
224std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { 226std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
225 size_t page_index{addr >> page_bits}; 227 size_t page_index{addr >> page_bits};
226 const size_t page_last{(addr + size + page_size - 1) >> page_bits}; 228 const size_t page_last{(addr + size + page_size - 1) >> page_bits};
227 while (page_index < page_last) { 229 while (page_index < page_last) {
@@ -274,7 +276,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
274 return {}; 276 return {};
275 } 277 }
276 278
277 return memory.GetPointer(*address); 279 return memory.GetPointer<u8>(*address);
278} 280}
279 281
280const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { 282const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
@@ -283,7 +285,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
283 return {}; 285 return {};
284 } 286 }
285 287
286 return memory.GetPointer(*address); 288 return memory.GetPointer<u8>(*address);
287} 289}
288 290
289#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. 291#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
@@ -367,25 +369,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
367 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 369 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
368 }; 370 };
369 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 371 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
370 const VAddr cpu_addr_base = 372 const DAddr dev_addr_base =
371 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 373 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
372 if constexpr (is_safe) { 374 if constexpr (is_safe) {
373 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 375 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
374 } 376 }
375 u8* physical = memory.GetPointer(cpu_addr_base); 377 u8* physical = memory.GetPointer<u8>(dev_addr_base);
376 std::memcpy(dest_buffer, physical, copy_amount); 378 std::memcpy(dest_buffer, physical, copy_amount);
377 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 379 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
378 }; 380 };
379 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 381 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
380 const VAddr cpu_addr_base = 382 const DAddr dev_addr_base =
381 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 383 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
382 if constexpr (is_safe) { 384 if constexpr (is_safe) {
383 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 385 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
384 } 386 }
385 if (!IsBigPageContinuous(page_index)) [[unlikely]] { 387 if (!IsBigPageContinuous(page_index)) [[unlikely]] {
386 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); 388 memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount);
387 } else { 389 } else {
388 u8* physical = memory.GetPointer(cpu_addr_base); 390 u8* physical = memory.GetPointer<u8>(dev_addr_base);
389 std::memcpy(dest_buffer, physical, copy_amount); 391 std::memcpy(dest_buffer, physical, copy_amount);
390 } 392 }
391 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 393 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@@ -416,25 +418,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
416 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 418 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
417 }; 419 };
418 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 420 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
419 const VAddr cpu_addr_base = 421 const DAddr dev_addr_base =
420 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 422 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
421 if constexpr (is_safe) { 423 if constexpr (is_safe) {
422 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 424 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
423 } 425 }
424 u8* physical = memory.GetPointer(cpu_addr_base); 426 u8* physical = memory.GetPointer<u8>(dev_addr_base);
425 std::memcpy(physical, src_buffer, copy_amount); 427 std::memcpy(physical, src_buffer, copy_amount);
426 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 428 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
427 }; 429 };
428 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 430 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
429 const VAddr cpu_addr_base = 431 const DAddr dev_addr_base =
430 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 432 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
431 if constexpr (is_safe) { 433 if constexpr (is_safe) {
432 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 434 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
433 } 435 }
434 if (!IsBigPageContinuous(page_index)) [[unlikely]] { 436 if (!IsBigPageContinuous(page_index)) [[unlikely]] {
435 memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); 437 memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount);
436 } else { 438 } else {
437 u8* physical = memory.GetPointer(cpu_addr_base); 439 u8* physical = memory.GetPointer<u8>(dev_addr_base);
438 std::memcpy(physical, src_buffer, copy_amount); 440 std::memcpy(physical, src_buffer, copy_amount);
439 } 441 }
440 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 442 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@@ -470,14 +472,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
470 [[maybe_unused]] std::size_t copy_amount) {}; 472 [[maybe_unused]] std::size_t copy_amount) {};
471 473
472 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 474 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
473 const VAddr cpu_addr_base = 475 const DAddr dev_addr_base =
474 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 476 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
475 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 477 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
476 }; 478 };
477 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 479 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
478 const VAddr cpu_addr_base = 480 const DAddr dev_addr_base =
479 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 481 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
480 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 482 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
481 }; 483 };
482 auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, 484 auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
483 std::size_t copy_amount) { 485 std::size_t copy_amount) {
@@ -495,15 +497,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
495 [[maybe_unused]] std::size_t copy_amount) { return false; }; 497 [[maybe_unused]] std::size_t copy_amount) { return false; };
496 498
497 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 499 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
498 const VAddr cpu_addr_base = 500 const DAddr dev_addr_base =
499 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 501 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
500 result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); 502 result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which);
501 return result; 503 return result;
502 }; 504 };
503 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 505 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
504 const VAddr cpu_addr_base = 506 const DAddr dev_addr_base =
505 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 507 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
506 result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); 508 result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which);
507 return result; 509 return result;
508 }; 510 };
509 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 511 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -517,7 +519,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
517} 519}
518 520
519size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { 521size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
520 std::optional<VAddr> old_page_addr{}; 522 std::optional<DAddr> old_page_addr{};
521 size_t range_so_far = 0; 523 size_t range_so_far = 0;
522 bool result{false}; 524 bool result{false};
523 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, 525 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
@@ -526,24 +528,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
526 return true; 528 return true;
527 }; 529 };
528 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 530 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
529 const VAddr cpu_addr_base = 531 const DAddr dev_addr_base =
530 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 532 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
531 if (old_page_addr && *old_page_addr != cpu_addr_base) { 533 if (old_page_addr && *old_page_addr != dev_addr_base) {
532 result = true; 534 result = true;
533 return true; 535 return true;
534 } 536 }
535 range_so_far += copy_amount; 537 range_so_far += copy_amount;
536 old_page_addr = {cpu_addr_base + copy_amount}; 538 old_page_addr = {dev_addr_base + copy_amount};
537 return false; 539 return false;
538 }; 540 };
539 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 541 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
540 const VAddr cpu_addr_base = 542 const DAddr dev_addr_base =
541 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 543 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
542 if (old_page_addr && *old_page_addr != cpu_addr_base) { 544 if (old_page_addr && *old_page_addr != dev_addr_base) {
543 return true; 545 return true;
544 } 546 }
545 range_so_far += copy_amount; 547 range_so_far += copy_amount;
546 old_page_addr = {cpu_addr_base + copy_amount}; 548 old_page_addr = {dev_addr_base + copy_amount};
547 return false; 549 return false;
548 }; 550 };
549 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 551 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -568,14 +570,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
568 [[maybe_unused]] std::size_t copy_amount) {}; 570 [[maybe_unused]] std::size_t copy_amount) {};
569 571
570 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 572 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
571 const VAddr cpu_addr_base = 573 const DAddr dev_addr_base =
572 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 574 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
573 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 575 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
574 }; 576 };
575 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 577 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
576 const VAddr cpu_addr_base = 578 const DAddr dev_addr_base =
577 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 579 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
578 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 580 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
579 }; 581 };
580 auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, 582 auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
581 std::size_t copy_amount) { 583 std::size_t copy_amount) {
@@ -587,7 +589,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 589
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 590void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 591 VideoCommon::CacheType which) {
590 Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( 592 Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data(
591 *this, gpu_src_addr, size); 593 *this, gpu_src_addr, size);
592 data.SetAddressAndSize(gpu_dest_addr, size); 594 data.SetAddressAndSize(gpu_dest_addr, size);
593 FlushRegion(gpu_dest_addr, size, which); 595 FlushRegion(gpu_dest_addr, size, which);
@@ -611,7 +613,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
611} 613}
612 614
613bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { 615bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const {
614 std::optional<VAddr> old_page_addr{}; 616 std::optional<DAddr> old_page_addr{};
615 bool result{true}; 617 bool result{true};
616 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, 618 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
617 std::size_t copy_amount) { 619 std::size_t copy_amount) {
@@ -619,23 +621,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const
619 return true; 621 return true;
620 }; 622 };
621 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 623 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
622 const VAddr cpu_addr_base = 624 const DAddr dev_addr_base =
623 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 625 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
624 if (old_page_addr && *old_page_addr != cpu_addr_base) { 626 if (old_page_addr && *old_page_addr != dev_addr_base) {
625 result = false; 627 result = false;
626 return true; 628 return true;
627 } 629 }
628 old_page_addr = {cpu_addr_base + copy_amount}; 630 old_page_addr = {dev_addr_base + copy_amount};
629 return false; 631 return false;
630 }; 632 };
631 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 633 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
632 const VAddr cpu_addr_base = 634 const DAddr dev_addr_base =
633 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 635 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
634 if (old_page_addr && *old_page_addr != cpu_addr_base) { 636 if (old_page_addr && *old_page_addr != dev_addr_base) {
635 result = false; 637 result = false;
636 return true; 638 return true;
637 } 639 }
638 old_page_addr = {cpu_addr_base + copy_amount}; 640 old_page_addr = {dev_addr_base + copy_amount};
639 return false; 641 return false;
640 }; 642 };
641 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 643 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -678,11 +680,11 @@ template <bool is_gpu_address>
678void MemoryManager::GetSubmappedRangeImpl( 680void MemoryManager::GetSubmappedRangeImpl(
679 GPUVAddr gpu_addr, std::size_t size, 681 GPUVAddr gpu_addr, std::size_t size,
680 boost::container::small_vector< 682 boost::container::small_vector<
681 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) 683 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result)
682 const { 684 const {
683 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> 685 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>>
684 last_segment{}; 686 last_segment{};
685 std::optional<VAddr> old_page_addr{}; 687 std::optional<DAddr> old_page_addr{};
686 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, 688 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
687 [[maybe_unused]] std::size_t offset, 689 [[maybe_unused]] std::size_t offset,
688 [[maybe_unused]] std::size_t copy_amount) { 690 [[maybe_unused]] std::size_t copy_amount) {
@@ -694,20 +696,20 @@ void MemoryManager::GetSubmappedRangeImpl(
694 const auto extend_size_big = [this, &split, &old_page_addr, 696 const auto extend_size_big = [this, &split, &old_page_addr,
695 &last_segment](std::size_t page_index, std::size_t offset, 697 &last_segment](std::size_t page_index, std::size_t offset,
696 std::size_t copy_amount) { 698 std::size_t copy_amount) {
697 const VAddr cpu_addr_base = 699 const DAddr dev_addr_base =
698 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 700 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
699 if (old_page_addr) { 701 if (old_page_addr) {
700 if (*old_page_addr != cpu_addr_base) { 702 if (*old_page_addr != dev_addr_base) {
701 split(0, 0, 0); 703 split(0, 0, 0);
702 } 704 }
703 } 705 }
704 old_page_addr = {cpu_addr_base + copy_amount}; 706 old_page_addr = {dev_addr_base + copy_amount};
705 if (!last_segment) { 707 if (!last_segment) {
706 if constexpr (is_gpu_address) { 708 if constexpr (is_gpu_address) {
707 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; 709 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
708 last_segment = {new_base_addr, copy_amount}; 710 last_segment = {new_base_addr, copy_amount};
709 } else { 711 } else {
710 last_segment = {cpu_addr_base, copy_amount}; 712 last_segment = {dev_addr_base, copy_amount};
711 } 713 }
712 } else { 714 } else {
713 last_segment->second += copy_amount; 715 last_segment->second += copy_amount;
@@ -716,20 +718,20 @@ void MemoryManager::GetSubmappedRangeImpl(
716 const auto extend_size_short = [this, &split, &old_page_addr, 718 const auto extend_size_short = [this, &split, &old_page_addr,
717 &last_segment](std::size_t page_index, std::size_t offset, 719 &last_segment](std::size_t page_index, std::size_t offset,
718 std::size_t copy_amount) { 720 std::size_t copy_amount) {
719 const VAddr cpu_addr_base = 721 const DAddr dev_addr_base =
720 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 722 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
721 if (old_page_addr) { 723 if (old_page_addr) {
722 if (*old_page_addr != cpu_addr_base) { 724 if (*old_page_addr != dev_addr_base) {
723 split(0, 0, 0); 725 split(0, 0, 0);
724 } 726 }
725 } 727 }
726 old_page_addr = {cpu_addr_base + copy_amount}; 728 old_page_addr = {dev_addr_base + copy_amount};
727 if (!last_segment) { 729 if (!last_segment) {
728 if constexpr (is_gpu_address) { 730 if constexpr (is_gpu_address) {
729 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; 731 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
730 last_segment = {new_base_addr, copy_amount}; 732 last_segment = {new_base_addr, copy_amount};
731 } else { 733 } else {
732 last_segment = {cpu_addr_base, copy_amount}; 734 last_segment = {dev_addr_base, copy_amount};
733 } 735 }
734 } else { 736 } else {
735 last_segment->second += copy_amount; 737 last_segment->second += copy_amount;
@@ -756,9 +758,9 @@ void MemoryManager::FlushCaching() {
756} 758}
757 759
758const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { 760const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const {
759 auto cpu_addr = GpuToCpuAddress(src_addr); 761 auto dev_addr = GpuToCpuAddress(src_addr);
760 if (cpu_addr) { 762 if (dev_addr) {
761 return memory.GetSpan(*cpu_addr, size); 763 return memory.GetSpan(*dev_addr, size);
762 } 764 }
763 return nullptr; 765 return nullptr;
764} 766}
@@ -767,9 +769,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) {
767 if (!IsContinuousRange(src_addr, size)) { 769 if (!IsContinuousRange(src_addr, size)) {
768 return nullptr; 770 return nullptr;
769 } 771 }
770 auto cpu_addr = GpuToCpuAddress(src_addr); 772 auto dev_addr = GpuToCpuAddress(src_addr);
771 if (cpu_addr) { 773 if (dev_addr) {
772 return memory.GetSpan(*cpu_addr, size); 774 return memory.GetSpan(*dev_addr, size);
773 } 775 }
774 return nullptr; 776 return nullptr;
775} 777}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 9b311b9e5..e2912a73f 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -17,6 +17,7 @@
17#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
18#include "core/memory.h" 18#include "core/memory.h"
19#include "video_core/cache_types.h" 19#include "video_core/cache_types.h"
20#include "video_core/host1x/gpu_device_memory_manager.h"
20#include "video_core/pte_kind.h" 21#include "video_core/pte_kind.h"
21 22
22namespace VideoCore { 23namespace VideoCore {
@@ -28,10 +29,6 @@ class InvalidationAccumulator;
28} 29}
29 30
30namespace Core { 31namespace Core {
31class DeviceMemory;
32namespace Memory {
33class Memory;
34} // namespace Memory
35class System; 32class System;
36} // namespace Core 33} // namespace Core
37 34
@@ -50,9 +47,9 @@ public:
50 /// Binds a renderer to the memory manager. 47 /// Binds a renderer to the memory manager.
51 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 48 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
52 49
53 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 50 [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const;
54 51
55 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; 52 [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
56 53
57 template <typename T> 54 template <typename T>
58 [[nodiscard]] T Read(GPUVAddr addr) const; 55 [[nodiscard]] T Read(GPUVAddr addr) const;
@@ -110,7 +107,7 @@ public:
110 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; 107 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
111 108
112 /** 109 /**
113 * Checks if a gpu region is mapped by a single range of cpu addresses. 110 * Checks if a gpu region is mapped by a single range of device addresses.
114 */ 111 */
115 [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; 112 [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const;
116 113
@@ -120,14 +117,14 @@ public:
120 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; 117 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
121 118
122 /** 119 /**
123 * Returns a vector with all the subranges of cpu addresses mapped beneath. 120 * Returns a vector with all the subranges of device addresses mapped beneath.
124 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty 121 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
125 * vector will be returned; 122 * vector will be returned;
126 */ 123 */
127 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( 124 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
128 GPUVAddr gpu_addr, std::size_t size) const; 125 GPUVAddr gpu_addr, std::size_t size) const;
129 126
130 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 127 GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size,
131 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 128 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
132 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); 129 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
133 void Unmap(GPUVAddr gpu_addr, std::size_t size); 130 void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -186,12 +183,11 @@ private:
186 void GetSubmappedRangeImpl( 183 void GetSubmappedRangeImpl(
187 GPUVAddr gpu_addr, std::size_t size, 184 GPUVAddr gpu_addr, std::size_t size,
188 boost::container::small_vector< 185 boost::container::small_vector<
189 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& 186 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>&
190 result) const; 187 result) const;
191 188
192 Core::System& system; 189 Core::System& system;
193 Core::Memory::Memory& memory; 190 MaxwellDeviceMemoryManager& memory;
194 Core::DeviceMemory& device_memory;
195 191
196 const u64 address_space_bits; 192 const u64 address_space_bits;
197 const u64 page_bits; 193 const u64 page_bits;
@@ -218,11 +214,11 @@ private:
218 std::vector<u64> big_entries; 214 std::vector<u64> big_entries;
219 215
220 template <EntryType entry_type> 216 template <EntryType entry_type>
221 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 217 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
222 PTEKind kind); 218 PTEKind kind);
223 219
224 template <EntryType entry_type> 220 template <EntryType entry_type>
225 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 221 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
226 PTEKind kind); 222 PTEKind kind);
227 223
228 template <bool is_big_page> 224 template <bool is_big_page>
@@ -233,11 +229,11 @@ private:
233 229
234 Common::MultiLevelPageTable<u32> page_table; 230 Common::MultiLevelPageTable<u32> page_table;
235 Common::RangeMap<GPUVAddr, PTEKind> kind_map; 231 Common::RangeMap<GPUVAddr, PTEKind> kind_map;
236 Common::VirtualBuffer<u32> big_page_table_cpu; 232 Common::VirtualBuffer<u32> big_page_table_dev;
237 233
238 std::vector<u64> big_page_continuous; 234 std::vector<u64> big_page_continuous;
239 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; 235 boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{};
240 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; 236 boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{};
241 237
242 mutable std::mutex guard; 238 mutable std::mutex guard;
243 239
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index a64404ce4..b01d843e4 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -21,6 +21,7 @@
21#include "core/memory.h" 21#include "core/memory.h"
22#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
23#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/host1x/gpu_device_memory_manager.h"
24#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
25#include "video_core/rasterizer_interface.h" 26#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h" 27#include "video_core/texture_cache/slot_vector.h"
@@ -102,18 +103,19 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo
102class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 103class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
103public: 104public:
104 explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, 105 explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_,
105 Core::Memory::Memory& cpu_memory_) 106 Tegra::MaxwellDeviceMemoryManager& device_memory_)
106 : rasterizer{rasterizer_}, 107 : rasterizer{rasterizer_},
107 // Use reinterpret_cast instead of static_cast as workaround for 108 // Use reinterpret_cast instead of static_cast as workaround for
108 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) 109 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
109 cpu_memory{cpu_memory_}, streams{{ 110 device_memory{device_memory_},
110 {CounterStream{reinterpret_cast<QueryCache&>(*this), 111 streams{{
111 VideoCore::QueryType::SamplesPassed}}, 112 {CounterStream{reinterpret_cast<QueryCache&>(*this),
112 {CounterStream{reinterpret_cast<QueryCache&>(*this), 113 VideoCore::QueryType::SamplesPassed}},
113 VideoCore::QueryType::PrimitivesGenerated}}, 114 {CounterStream{reinterpret_cast<QueryCache&>(*this),
114 {CounterStream{reinterpret_cast<QueryCache&>(*this), 115 VideoCore::QueryType::PrimitivesGenerated}},
115 VideoCore::QueryType::TfbPrimitivesWritten}}, 116 {CounterStream{reinterpret_cast<QueryCache&>(*this),
116 }} { 117 VideoCore::QueryType::TfbPrimitivesWritten}},
118 }} {
117 (void)slot_async_jobs.insert(); // Null value 119 (void)slot_async_jobs.insert(); // Null value
118 } 120 }
119 121
@@ -322,13 +324,14 @@ private:
322 local_lock.unlock(); 324 local_lock.unlock();
323 if (timestamp) { 325 if (timestamp) {
324 u64 timestamp_value = *timestamp; 326 u64 timestamp_value = *timestamp;
325 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64)); 327 device_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value,
326 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); 328 sizeof(u64));
329 device_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
327 rasterizer.InvalidateRegion(address, sizeof(u64) * 2, 330 rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
328 VideoCommon::CacheType::NoQueryCache); 331 VideoCommon::CacheType::NoQueryCache);
329 } else { 332 } else {
330 u32 small_value = static_cast<u32>(value); 333 u32 small_value = static_cast<u32>(value);
331 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); 334 device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
332 rasterizer.InvalidateRegion(address, sizeof(u32), 335 rasterizer.InvalidateRegion(address, sizeof(u32),
333 VideoCommon::CacheType::NoQueryCache); 336 VideoCommon::CacheType::NoQueryCache);
334 } 337 }
@@ -342,7 +345,7 @@ private:
342 SlotVector<AsyncJob> slot_async_jobs; 345 SlotVector<AsyncJob> slot_async_jobs;
343 346
344 VideoCore::RasterizerInterface& rasterizer; 347 VideoCore::RasterizerInterface& rasterizer;
345 Core::Memory::Memory& cpu_memory; 348 Tegra::MaxwellDeviceMemoryManager& device_memory;
346 349
347 mutable std::recursive_mutex mutex; 350 mutable std::recursive_mutex mutex;
348 351
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 1d786b3a7..aca6a6447 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23 23
24class QueryBase { 24class QueryBase {
25public: 25public:
26 VAddr guest_address{}; 26 DAddr guest_address{};
27 QueryFlagBits flags{}; 27 QueryFlagBits flags{};
28 u64 value{}; 28 u64 value{};
29 29
@@ -32,7 +32,7 @@ protected:
32 QueryBase() = default; 32 QueryBase() = default;
33 33
34 // Parameterized constructor 34 // Parameterized constructor
35 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) 35 QueryBase(DAddr address, QueryFlagBits flags_, u64 value_)
36 : guest_address(address), flags(flags_), value{value_} {} 36 : guest_address(address), flags(flags_), value{value_} {}
37}; 37};
38 38
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 94f0c4466..508afb10a 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -18,6 +18,7 @@
18#include "core/memory.h" 18#include "core/memory.h"
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h" 20#include "video_core/gpu.h"
21#include "video_core/host1x/gpu_device_memory_manager.h"
21#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
22#include "video_core/query_cache/bank_base.h" 23#include "video_core/query_cache/bank_base.h"
23#include "video_core/query_cache/query_base.h" 24#include "video_core/query_cache/query_base.h"
@@ -113,9 +114,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
113 using RuntimeType = typename Traits::RuntimeType; 114 using RuntimeType = typename Traits::RuntimeType;
114 115
115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, 116 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
116 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) 117 Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
118 Tegra::GPU& gpu_)
117 : owner{owner_}, rasterizer{rasterizer_}, 119 : owner{owner_}, rasterizer{rasterizer_},
118 cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { 120 device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} {
119 streamer_mask = 0; 121 streamer_mask = 0;
120 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { 122 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
121 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); 123 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@@ -158,7 +160,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
158 160
159 QueryCacheBase<Traits>* owner; 161 QueryCacheBase<Traits>* owner;
160 VideoCore::RasterizerInterface& rasterizer; 162 VideoCore::RasterizerInterface& rasterizer;
161 Core::Memory::Memory& cpu_memory; 163 Tegra::MaxwellDeviceMemoryManager& device_memory;
162 RuntimeType& runtime; 164 RuntimeType& runtime;
163 Tegra::GPU& gpu; 165 Tegra::GPU& gpu;
164 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; 166 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
@@ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
171template <typename Traits> 173template <typename Traits>
172QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, 174QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
173 VideoCore::RasterizerInterface& rasterizer_, 175 VideoCore::RasterizerInterface& rasterizer_,
174 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) 176 Tegra::MaxwellDeviceMemoryManager& device_memory_,
177 RuntimeType& runtime_)
175 : cached_queries{} { 178 : cached_queries{} {
176 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( 179 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
177 this, rasterizer_, cpu_memory_, runtime_, gpu_); 180 this, rasterizer_, device_memory_, runtime_, gpu_);
178} 181}
179 182
180template <typename Traits> 183template <typename Traits>
@@ -240,7 +243,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
240 if (!cpu_addr_opt) [[unlikely]] { 243 if (!cpu_addr_opt) [[unlikely]] {
241 return; 244 return;
242 } 245 }
243 VAddr cpu_addr = *cpu_addr_opt; 246 DAddr cpu_addr = *cpu_addr_opt;
244 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); 247 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
245 auto* query = streamer->GetQuery(new_query_id); 248 auto* query = streamer->GetQuery(new_query_id);
246 if (is_fence) { 249 if (is_fence) {
@@ -253,10 +256,9 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
253 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 256 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
254 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 257 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
255 }; 258 };
256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); 259 u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr);
257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); 260 u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8);
258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence; 261 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
259
260 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, 262 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
261 pointer, pointer_timestamp] { 263 pointer, pointer_timestamp] {
262 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { 264 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
@@ -559,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
559 } 561 }
560 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && 562 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
561 False(query_base->flags & QueryFlagBits::IsGuestSynced)) { 563 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
562 auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); 564 auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address);
563 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { 565 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
564 std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); 566 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
565 return false; 567 return false;
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
index 07be421c6..7720456ff 100644
--- a/src/video_core/query_cache/query_cache_base.h
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -17,10 +17,7 @@
17#include "video_core/control/channel_state_cache.h" 17#include "video_core/control/channel_state_cache.h"
18#include "video_core/query_cache/query_base.h" 18#include "video_core/query_cache/query_base.h"
19#include "video_core/query_cache/types.h" 19#include "video_core/query_cache/types.h"
20 20#include "video_core/host1x/gpu_device_memory_manager.h"
21namespace Core::Memory {
22class Memory;
23}
24 21
25namespace VideoCore { 22namespace VideoCore {
26class RasterizerInterface; 23class RasterizerInterface;
@@ -53,7 +50,7 @@ public:
53 }; 50 };
54 51
55 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, 52 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); 53 Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_);
57 54
58 ~QueryCacheBase(); 55 ~QueryCacheBase();
59 56
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
deleted file mode 100644
index f200a650f..000000000
--- a/src/video_core/rasterizer_accelerated.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <atomic>
5
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/div_ceil.h"
9#include "core/memory.h"
10#include "video_core/rasterizer_accelerated.h"
11
12namespace VideoCore {
13
14using namespace Core::Memory;
15
16RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_)
17 : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {}
18
19RasterizerAccelerated::~RasterizerAccelerated() = default;
20
21void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
22 u64 uncache_begin = 0;
23 u64 cache_begin = 0;
24 u64 uncache_bytes = 0;
25 u64 cache_bytes = 0;
26
27 std::atomic_thread_fence(std::memory_order_acquire);
28 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
29 for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
30 std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
31
32 if (delta > 0) {
33 ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
34 } else if (delta < 0) {
35 ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
36 } else {
37 ASSERT_MSG(false, "Delta must be non-zero!");
38 }
39
40 // Adds or subtracts 1, as count is a unsigned 8-bit value
41 count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
42
43 // Assume delta is either -1 or 1
44 if (count.load(std::memory_order::relaxed) == 0) {
45 if (uncache_bytes == 0) {
46 uncache_begin = page;
47 }
48 uncache_bytes += YUZU_PAGESIZE;
49 } else if (uncache_bytes > 0) {
50 cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes,
51 false);
52 uncache_bytes = 0;
53 }
54 if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
55 if (cache_bytes == 0) {
56 cache_begin = page;
57 }
58 cache_bytes += YUZU_PAGESIZE;
59 } else if (cache_bytes > 0) {
60 cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
61 cache_bytes = 0;
62 }
63 }
64 if (uncache_bytes > 0) {
65 cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false);
66 }
67 if (cache_bytes > 0) {
68 cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
69 }
70}
71
72} // namespace VideoCore
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
deleted file mode 100644
index e6c0ea87a..000000000
--- a/src/video_core/rasterizer_accelerated.h
+++ /dev/null
@@ -1,49 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <atomic>
8
9#include "common/common_types.h"
10#include "video_core/rasterizer_interface.h"
11
12namespace Core::Memory {
13class Memory;
14}
15
16namespace VideoCore {
17
18/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
19class RasterizerAccelerated : public RasterizerInterface {
20public:
21 explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_);
22 ~RasterizerAccelerated() override;
23
24 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
25
26private:
27 class CacheEntry final {
28 public:
29 CacheEntry() = default;
30
31 std::atomic_uint16_t& Count(std::size_t page) {
32 return values[page & 3];
33 }
34
35 const std::atomic_uint16_t& Count(std::size_t page) const {
36 return values[page & 3];
37 }
38
39 private:
40 std::array<std::atomic_uint16_t, 4> values{};
41 };
42 static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
43
44 using CachedPages = std::array<CacheEntry, 0x2000000>;
45 std::unique_ptr<CachedPages> cached_pages;
46 Core::Memory::Memory& cpu_memory;
47};
48
49} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 49224ca85..8fa4e4d9a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -86,35 +86,35 @@ public:
86 virtual void FlushAll() = 0; 86 virtual void FlushAll() = 0;
87 87
88 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 88 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
89 virtual void FlushRegion(VAddr addr, u64 size, 89 virtual void FlushRegion(DAddr addr, u64 size,
90 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 90 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
91 91
92 /// Check if the the specified memory area requires flushing to CPU Memory. 92 /// Check if the the specified memory area requires flushing to CPU Memory.
93 virtual bool MustFlushRegion(VAddr addr, u64 size, 93 virtual bool MustFlushRegion(DAddr addr, u64 size,
94 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 94 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
95 95
96 virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; 96 virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0;
97 97
98 /// Notify rasterizer that any caches of the specified region should be invalidated 98 /// Notify rasterizer that any caches of the specified region should be invalidated
99 virtual void InvalidateRegion(VAddr addr, u64 size, 99 virtual void InvalidateRegion(DAddr addr, u64 size,
100 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
101 101
102 virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { 102 virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) {
103 for (const auto& [cpu_addr, size] : sequences) { 103 for (const auto& [cpu_addr, size] : sequences) {
104 InvalidateRegion(cpu_addr, size); 104 InvalidateRegion(cpu_addr, size);
105 } 105 }
106 } 106 }
107 107
108 /// Notify rasterizer that any caches of the specified region are desync with guest 108 /// Notify rasterizer that any caches of the specified region are desync with guest
109 virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; 109 virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0;
110 110
111 virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; 111 virtual bool OnCPUWrite(PAddr addr, u64 size) = 0;
112 112
113 /// Sync memory between guest and host. 113 /// Sync memory between guest and host.
114 virtual void InvalidateGPUCache() = 0; 114 virtual void InvalidateGPUCache() = 0;
115 115
116 /// Unmap memory range 116 /// Unmap memory range
117 virtual void UnmapMemory(VAddr addr, u64 size) = 0; 117 virtual void UnmapMemory(DAddr addr, u64 size) = 0;
118 118
119 /// Remap GPU memory range. This means underneath backing memory changed 119 /// Remap GPU memory range. This means underneath backing memory changed
120 virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; 120 virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0;
@@ -122,7 +122,7 @@ public:
122 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 122 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
123 /// and invalidated 123 /// and invalidated
124 virtual void FlushAndInvalidateRegion( 124 virtual void FlushAndInvalidateRegion(
125 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 125 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
126 126
127 /// Notify the host renderer to wait for previous primitive and compute operations. 127 /// Notify the host renderer to wait for previous primitive and compute operations.
128 virtual void WaitForIdle() = 0; 128 virtual void WaitForIdle() = 0;
@@ -157,13 +157,10 @@ public:
157 157
158 /// Attempt to use a faster method to display the framebuffer to screen 158 /// Attempt to use a faster method to display the framebuffer to screen
159 [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, 159 [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
160 VAddr framebuffer_addr, u32 pixel_stride) { 160 DAddr framebuffer_addr, u32 pixel_stride) {
161 return false; 161 return false;
162 } 162 }
163 163
164 /// Increase/decrease the number of object in pages touching the specified region
165 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
166
167 /// Initialize disk cached resources for the game being emulated 164 /// Initialize disk cached resources for the game being emulated
168 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 165 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
169 const DiskResourceLoadCallback& callback) {} 166 const DiskResourceLoadCallback& callback) {}
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 4f1d5b548..11b93fdc9 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -19,8 +19,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
19 return true; 19 return true;
20} 20}
21 21
22RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) 22RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {}
23 : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {}
24RasterizerNull::~RasterizerNull() = default; 23RasterizerNull::~RasterizerNull() = default;
25 24
26void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} 25void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
@@ -45,16 +44,16 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
45 u32 size) {} 44 u32 size) {}
46void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} 45void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {}
47void RasterizerNull::FlushAll() {} 46void RasterizerNull::FlushAll() {}
48void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 47void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
49bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { 48bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {
50 return false; 49 return false;
51} 50}
52void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 51void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
53bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { 52bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) {
54 return false; 53 return false;
55} 54}
56void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} 55void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {}
57VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { 56VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) {
58 VideoCore::RasterizerDownloadArea new_area{ 57 VideoCore::RasterizerDownloadArea new_area{
59 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), 58 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
60 .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), 59 .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE),
@@ -63,7 +62,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 s
63 return new_area; 62 return new_area;
64} 63}
65void RasterizerNull::InvalidateGPUCache() {} 64void RasterizerNull::InvalidateGPUCache() {}
66void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} 65void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {}
67void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} 66void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {}
68void RasterizerNull::SignalFence(std::function<void()>&& func) { 67void RasterizerNull::SignalFence(std::function<void()>&& func) {
69 func(); 68 func();
@@ -78,7 +77,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) {
78} 77}
79void RasterizerNull::SignalReference() {} 78void RasterizerNull::SignalReference() {}
80void RasterizerNull::ReleaseFences(bool) {} 79void RasterizerNull::ReleaseFences(bool) {}
81void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 80void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
82void RasterizerNull::WaitForIdle() {} 81void RasterizerNull::WaitForIdle() {}
83void RasterizerNull::FragmentBarrier() {} 82void RasterizerNull::FragmentBarrier() {}
84void RasterizerNull::TiledCacheBarrier() {} 83void RasterizerNull::TiledCacheBarrier() {}
@@ -95,7 +94,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac
95void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 94void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
96 std::span<const u8> memory) {} 95 std::span<const u8> memory) {}
97bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, 96bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
98 VAddr framebuffer_addr, u32 pixel_stride) { 97 DAddr framebuffer_addr, u32 pixel_stride) {
99 return true; 98 return true;
100} 99}
101void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 100void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 23001eeb8..a5789604f 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -6,7 +6,6 @@
6#include "common/common_types.h" 6#include "common/common_types.h"
7#include "video_core/control/channel_state_cache.h" 7#include "video_core/control/channel_state_cache.h"
8#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
9#include "video_core/rasterizer_accelerated.h"
10#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
11 10
12namespace Core { 11namespace Core {
@@ -32,10 +31,10 @@ public:
32 } 31 }
33}; 32};
34 33
35class RasterizerNull final : public VideoCore::RasterizerAccelerated, 34class RasterizerNull final : public VideoCore::RasterizerInterface,
36 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 35 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
37public: 36public:
38 explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); 37 explicit RasterizerNull(Tegra::GPU& gpu);
39 ~RasterizerNull() override; 38 ~RasterizerNull() override;
40 39
41 void Draw(bool is_indexed, u32 instance_count) override; 40 void Draw(bool is_indexed, u32 instance_count) override;
@@ -48,17 +47,17 @@ public:
48 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 47 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
49 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 48 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
50 void FlushAll() override; 49 void FlushAll() override;
51 void FlushRegion(VAddr addr, u64 size, 50 void FlushRegion(DAddr addr, u64 size,
52 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 51 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
53 bool MustFlushRegion(VAddr addr, u64 size, 52 bool MustFlushRegion(DAddr addr, u64 size,
54 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 53 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
55 void InvalidateRegion(VAddr addr, u64 size, 54 void InvalidateRegion(DAddr addr, u64 size,
56 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 55 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
57 void OnCacheInvalidation(VAddr addr, u64 size) override; 56 void OnCacheInvalidation(DAddr addr, u64 size) override;
58 bool OnCPUWrite(VAddr addr, u64 size) override; 57 bool OnCPUWrite(DAddr addr, u64 size) override;
59 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 58 VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override;
60 void InvalidateGPUCache() override; 59 void InvalidateGPUCache() override;
61 void UnmapMemory(VAddr addr, u64 size) override; 60 void UnmapMemory(DAddr addr, u64 size) override;
62 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 61 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
63 void SignalFence(std::function<void()>&& func) override; 62 void SignalFence(std::function<void()>&& func) override;
64 void SyncOperation(std::function<void()>&& func) override; 63 void SyncOperation(std::function<void()>&& func) override;
@@ -66,7 +65,7 @@ public:
66 void SignalReference() override; 65 void SignalReference() override;
67 void ReleaseFences(bool force) override; 66 void ReleaseFences(bool force) override;
68 void FlushAndInvalidateRegion( 67 void FlushAndInvalidateRegion(
69 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 68 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
70 void WaitForIdle() override; 69 void WaitForIdle() override;
71 void FragmentBarrier() override; 70 void FragmentBarrier() override;
72 void TiledCacheBarrier() override; 71 void TiledCacheBarrier() override;
@@ -78,7 +77,7 @@ public:
78 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 77 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
79 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 78 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
80 std::span<const u8> memory) override; 79 std::span<const u8> memory) override;
81 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 80 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
82 u32 pixel_stride) override; 81 u32 pixel_stride) override;
83 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 82 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
84 const VideoCore::DiskResourceLoadCallback& callback) override; 83 const VideoCore::DiskResourceLoadCallback& callback) override;
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp
index be92cc2f4..078feb925 100644
--- a/src/video_core/renderer_null/renderer_null.cpp
+++ b/src/video_core/renderer_null/renderer_null.cpp
@@ -7,10 +7,9 @@
7 7
8namespace Null { 8namespace Null {
9 9
10RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 10RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
11 Tegra::GPU& gpu,
12 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 11 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
13 : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} 12 : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {}
14 13
15RendererNull::~RendererNull() = default; 14RendererNull::~RendererNull() = default;
16 15
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h
index 967ff5645..9531b43f6 100644
--- a/src/video_core/renderer_null/renderer_null.h
+++ b/src/video_core/renderer_null/renderer_null.h
@@ -13,8 +13,7 @@ namespace Null {
13 13
14class RendererNull final : public VideoCore::RendererBase { 14class RendererNull final : public VideoCore::RendererBase {
15public: 15public:
16 explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 16 explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
17 Tegra::GPU& gpu,
18 std::unique_ptr<Core::Frontend::GraphicsContext> context); 17 std::unique_ptr<Core::Frontend::GraphicsContext> context);
19 ~RendererNull() override; 18 ~RendererNull() override;
20 19
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 517ac14dd..ade72e1f9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{
47} // Anonymous namespace 47} // Anonymous namespace
48 48
49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
50 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} 50 : VideoCommon::BufferBase(null_params) {}
51 51
52Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 52Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_)
53 VAddr cpu_addr_, u64 size_bytes_) 53 : VideoCommon::BufferBase(cpu_addr_, size_bytes_) {
54 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
55 buffer.Create(); 54 buffer.Create();
56 if (runtime.device.HasDebuggingToolAttached()) { 55 if (runtime.device.HasDebuggingToolAttached()) {
57 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); 56 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 2c18de166..e6ad030cb 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_cache_base.h" 11#include "video_core/buffer_cache/buffer_cache_base.h"
12#include "video_core/buffer_cache/memory_tracker_base.h" 12#include "video_core/buffer_cache/memory_tracker_base.h"
13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" 15#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
@@ -19,9 +18,9 @@ namespace OpenGL {
19 18
20class BufferCacheRuntime; 19class BufferCacheRuntime;
21 20
22class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { 21class Buffer : public VideoCommon::BufferBase {
23public: 22public:
24 explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, 23 explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr,
25 u64 size_bytes); 24 u64 size_bytes);
26 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); 25 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
27 26
@@ -244,7 +243,7 @@ struct BufferCacheParams {
244 using Runtime = OpenGL::BufferCacheRuntime; 243 using Runtime = OpenGL::BufferCacheRuntime;
245 using Buffer = OpenGL::Buffer; 244 using Buffer = OpenGL::Buffer;
246 using Async_Buffer = OpenGL::StagingBufferMap; 245 using Async_Buffer = OpenGL::StagingBufferMap;
247 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; 246 using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>;
248 247
249 static constexpr bool IS_OPENGL = true; 248 static constexpr bool IS_OPENGL = true;
250 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; 249 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index fef7360ed..567292e1c 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -35,8 +35,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
35 35
36} // Anonymous namespace 36} // Anonymous namespace
37 37
38QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) 38QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
39 : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { 39 : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} {
40 EnableCounters(); 40 EnableCounters();
41} 41}
42 42
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 0721e0b3d..04a1b39c9 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -11,6 +11,7 @@
11#include "video_core/query_cache.h" 11#include "video_core/query_cache.h"
12#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/host1x/gpu_device_memory_manager.h"
14 15
15namespace Core { 16namespace Core {
16class System; 17class System;
@@ -28,7 +29,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28class QueryCache final 29class QueryCache final
29 : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { 30 : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 31public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); 32 explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_);
32 ~QueryCache(); 33 ~QueryCache();
33 34
34 OGLQuery AllocateQuery(VideoCore::QueryType type); 35 OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7a5fad735..ca31e2fbd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy
70} // Anonymous namespace 70} // Anonymous namespace
71 71
72RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 72RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
73 Core::Memory::Memory& cpu_memory_, const Device& device_, 73 Tegra::MaxwellDeviceMemoryManager& device_memory_,
74 ScreenInfo& screen_info_, ProgramManager& program_manager_, 74 const Device& device_, ScreenInfo& screen_info_,
75 StateTracker& state_tracker_) 75 ProgramManager& program_manager_, StateTracker& state_tracker_)
76 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), 76 : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_),
77 program_manager(program_manager_), state_tracker(state_tracker_), 77 program_manager(program_manager_), state_tracker(state_tracker_),
78 texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), 78 texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
79 texture_cache(texture_cache_runtime, *this), 79 texture_cache(texture_cache_runtime, device_memory_),
80 buffer_cache_runtime(device, staging_buffer_pool), 80 buffer_cache_runtime(device, staging_buffer_pool),
81 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 81 buffer_cache(device_memory_, buffer_cache_runtime),
82 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, 82 shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache,
83 state_tracker, gpu.ShaderNotify()), 83 program_manager, state_tracker, gpu.ShaderNotify()),
84 query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), 84 query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache),
85 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 85 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
86 blit_image(program_manager_) {} 86 blit_image(program_manager_) {}
87 87
@@ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
475 475
476void RasterizerOpenGL::FlushAll() {} 476void RasterizerOpenGL::FlushAll() {}
477 477
478void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 478void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
479 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 479 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
480 if (addr == 0 || size == 0) { 480 if (addr == 0 || size == 0) {
481 return; 481 return;
@@ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
493 } 493 }
494} 494}
495 495
496bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 496bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
497 if ((True(which & VideoCommon::CacheType::BufferCache))) { 497 if ((True(which & VideoCommon::CacheType::BufferCache))) {
498 std::scoped_lock lock{buffer_cache.mutex}; 498 std::scoped_lock lock{buffer_cache.mutex};
499 if (buffer_cache.IsRegionGpuModified(addr, size)) { 499 if (buffer_cache.IsRegionGpuModified(addr, size)) {
@@ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
510 return false; 510 return false;
511} 511}
512 512
513VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { 513VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) {
514 { 514 {
515 std::scoped_lock lock{texture_cache.mutex}; 515 std::scoped_lock lock{texture_cache.mutex};
516 auto area = texture_cache.GetFlushArea(addr, size); 516 auto area = texture_cache.GetFlushArea(addr, size);
@@ -533,7 +533,7 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64
533 return new_area; 533 return new_area;
534} 534}
535 535
536void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 536void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
537 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 537 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
538 if (addr == 0 || size == 0) { 538 if (addr == 0 || size == 0) {
539 return; 539 return;
@@ -554,8 +554,9 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
554 } 554 }
555} 555}
556 556
557bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { 557bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) {
558 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 558 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
559 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
559 if (addr == 0 || size == 0) { 560 if (addr == 0 || size == 0) {
560 return false; 561 return false;
561 } 562 }
@@ -576,8 +577,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
576 return false; 577 return false;
577} 578}
578 579
579void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { 580void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) {
580 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 581 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
582 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
581 if (addr == 0 || size == 0) { 583 if (addr == 0 || size == 0) {
582 return; 584 return;
583 } 585 }
@@ -596,7 +598,7 @@ void RasterizerOpenGL::InvalidateGPUCache() {
596 gpu.InvalidateGPUCache(); 598 gpu.InvalidateGPUCache();
597} 599}
598 600
599void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { 601void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) {
600 { 602 {
601 std::scoped_lock lock{texture_cache.mutex}; 603 std::scoped_lock lock{texture_cache.mutex};
602 texture_cache.UnmapMemory(addr, size); 604 texture_cache.UnmapMemory(addr, size);
@@ -635,7 +637,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) {
635 fence_manager.WaitPendingFences(force); 637 fence_manager.WaitPendingFences(force);
636} 638}
637 639
638void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, 640void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size,
639 VideoCommon::CacheType which) { 641 VideoCommon::CacheType which) {
640 if (Settings::IsGPULevelExtreme()) { 642 if (Settings::IsGPULevelExtreme()) {
641 FlushRegion(addr, size, which); 643 FlushRegion(addr, size, which);
@@ -739,7 +741,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
739} 741}
740 742
741bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, 743bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
742 VAddr framebuffer_addr, u32 pixel_stride) { 744 DAddr framebuffer_addr, u32 pixel_stride) {
743 if (framebuffer_addr == 0) { 745 if (framebuffer_addr == 0) {
744 return false; 746 return false;
745 } 747 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ce3460938..f197774ed 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -14,7 +14,6 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "video_core/control/channel_state_cache.h" 15#include "video_core/control/channel_state_cache.h"
16#include "video_core/engines/maxwell_dma.h" 16#include "video_core/engines/maxwell_dma.h"
17#include "video_core/rasterizer_accelerated.h"
18#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/blit_image.h" 18#include "video_core/renderer_opengl/blit_image.h"
20#include "video_core/renderer_opengl/gl_buffer_cache.h" 19#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -72,11 +71,11 @@ private:
72 TextureCache& texture_cache; 71 TextureCache& texture_cache;
73}; 72};
74 73
75class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, 74class RasterizerOpenGL : public VideoCore::RasterizerInterface,
76 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 75 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
77public: 76public:
78 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 77 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
79 Core::Memory::Memory& cpu_memory_, const Device& device_, 78 Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_,
80 ScreenInfo& screen_info_, ProgramManager& program_manager_, 79 ScreenInfo& screen_info_, ProgramManager& program_manager_,
81 StateTracker& state_tracker_); 80 StateTracker& state_tracker_);
82 ~RasterizerOpenGL() override; 81 ~RasterizerOpenGL() override;
@@ -92,17 +91,17 @@ public:
92 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 91 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
93 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 92 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
94 void FlushAll() override; 93 void FlushAll() override;
95 void FlushRegion(VAddr addr, u64 size, 94 void FlushRegion(DAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 95 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
97 bool MustFlushRegion(VAddr addr, u64 size, 96 bool MustFlushRegion(DAddr addr, u64 size,
98 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 97 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
99 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 98 VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override;
100 void InvalidateRegion(VAddr addr, u64 size, 99 void InvalidateRegion(DAddr addr, u64 size,
101 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
102 void OnCacheInvalidation(VAddr addr, u64 size) override; 101 void OnCacheInvalidation(PAddr addr, u64 size) override;
103 bool OnCPUWrite(VAddr addr, u64 size) override; 102 bool OnCPUWrite(PAddr addr, u64 size) override;
104 void InvalidateGPUCache() override; 103 void InvalidateGPUCache() override;
105 void UnmapMemory(VAddr addr, u64 size) override; 104 void UnmapMemory(DAddr addr, u64 size) override;
106 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 105 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
107 void SignalFence(std::function<void()>&& func) override; 106 void SignalFence(std::function<void()>&& func) override;
108 void SyncOperation(std::function<void()>&& func) override; 107 void SyncOperation(std::function<void()>&& func) override;
@@ -110,7 +109,7 @@ public:
110 void SignalReference() override; 109 void SignalReference() override;
111 void ReleaseFences(bool force = true) override; 110 void ReleaseFences(bool force = true) override;
112 void FlushAndInvalidateRegion( 111 void FlushAndInvalidateRegion(
113 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 112 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
114 void WaitForIdle() override; 113 void WaitForIdle() override;
115 void FragmentBarrier() override; 114 void FragmentBarrier() override;
116 void TiledCacheBarrier() override; 115 void TiledCacheBarrier() override;
@@ -123,7 +122,7 @@ public:
123 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 122 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
124 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 123 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
125 std::span<const u8> memory) override; 124 std::span<const u8> memory) override;
126 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 125 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
127 u32 pixel_stride) override; 126 u32 pixel_stride) override;
128 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 127 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
129 const VideoCore::DiskResourceLoadCallback& callback) override; 128 const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -235,6 +234,7 @@ private:
235 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); 234 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
236 235
237 Tegra::GPU& gpu; 236 Tegra::GPU& gpu;
237 Tegra::MaxwellDeviceMemoryManager& device_memory;
238 238
239 const Device& device; 239 const Device& device;
240 ScreenInfo& screen_info; 240 ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 30df41b7d..50462cdde 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
168} 168}
169} // Anonymous namespace 169} // Anonymous namespace
170 170
171ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 171ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
172 const Device& device_, TextureCache& texture_cache_, 172 Core::Frontend::EmuWindow& emu_window_, const Device& device_,
173 BufferCache& buffer_cache_, ProgramManager& program_manager_, 173 TextureCache& texture_cache_, BufferCache& buffer_cache_,
174 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) 174 ProgramManager& program_manager_, StateTracker& state_tracker_,
175 : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, 175 VideoCore::ShaderNotify& shader_notify_)
176 : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_},
176 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, 177 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
177 state_tracker{state_tracker_}, shader_notify{shader_notify_}, 178 state_tracker{state_tracker_}, shader_notify{shader_notify_},
178 use_asynchronous_shaders{device.UseAsynchronousShaders()}, 179 use_asynchronous_shaders{device.UseAsynchronousShaders()},
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6b9732fca..5ac413529 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -17,7 +17,7 @@
17 17
18namespace Tegra { 18namespace Tegra {
19class MemoryManager; 19class MemoryManager;
20} 20} // namespace Tegra
21 21
22namespace OpenGL { 22namespace OpenGL {
23 23
@@ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
28 28
29class ShaderCache : public VideoCommon::ShaderCache { 29class ShaderCache : public VideoCommon::ShaderCache {
30public: 30public:
31 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 31 explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
32 const Device& device_, TextureCache& texture_cache_, 32 Core::Frontend::EmuWindow& emu_window_, const Device& device_,
33 BufferCache& buffer_cache_, ProgramManager& program_manager_, 33 TextureCache& texture_cache_, BufferCache& buffer_cache_,
34 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); 34 ProgramManager& program_manager_, StateTracker& state_tracker_,
35 VideoCore::ShaderNotify& shader_notify_);
35 ~ShaderCache(); 36 ~ShaderCache();
36 37
37 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 38 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2933718b6..821a045ad 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -144,12 +144,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
144 144
145RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, 145RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
146 Core::Frontend::EmuWindow& emu_window_, 146 Core::Frontend::EmuWindow& emu_window_,
147 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 147 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
148 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 148 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
149 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, 149 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
150 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, 150 emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_},
151 state_tracker{}, program_manager{device}, 151 state_tracker{}, program_manager{device},
152 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { 152 rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager,
153 state_tracker) {
153 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { 154 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
154 glEnable(GL_DEBUG_OUTPUT); 155 glEnable(GL_DEBUG_OUTPUT);
155 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); 156 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
@@ -242,7 +243,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
242 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; 243 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
243 const u64 size_in_bytes{Tegra::Texture::CalculateSize( 244 const u64 size_in_bytes{Tegra::Texture::CalculateSize(
244 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; 245 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
245 const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; 246 const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
246 const std::span<const u8> input_data(host_ptr, size_in_bytes); 247 const std::span<const u8> input_data(host_ptr, size_in_bytes);
247 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, 248 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
248 framebuffer.width, framebuffer.height, 1, block_height_log2, 249 framebuffer.width, framebuffer.height, 1, block_height_log2,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b70607635..18699610a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase {
61public: 61public:
62 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, 62 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
63 Core::Frontend::EmuWindow& emu_window_, 63 Core::Frontend::EmuWindow& emu_window_,
64 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 64 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
65 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 65 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
66 ~RendererOpenGL() override; 66 ~RendererOpenGL() override;
67 67
@@ -101,7 +101,7 @@ private:
101 101
102 Core::TelemetrySession& telemetry_session; 102 Core::TelemetrySession& telemetry_session;
103 Core::Frontend::EmuWindow& emu_window; 103 Core::Frontend::EmuWindow& emu_window;
104 Core::Memory::Memory& cpu_memory; 104 Tegra::MaxwellDeviceMemoryManager& device_memory;
105 Tegra::GPU& gpu; 105 Tegra::GPU& gpu;
106 106
107 Device device; 107 Device device;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 100b70918..0e1815076 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
82 82
83RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 83RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
84 Core::Frontend::EmuWindow& emu_window, 84 Core::Frontend::EmuWindow& emu_window,
85 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 85 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
86 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try 86 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
87 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), 87 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
88 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), 88 device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
89 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 89 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
90 Settings::values.renderer_debug.GetValue())), 90 Settings::values.renderer_debug.GetValue())),
91 debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) 91 debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance)
@@ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
97 render_window.GetFramebufferLayout().height), 97 render_window.GetFramebufferLayout().height),
98 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, 98 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
99 surface), 99 surface),
100 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, 100 blit_screen(device_memory, render_window, device, memory_allocator, swapchain, present_manager,
101 scheduler, screen_info), 101 scheduler, screen_info),
102 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 102 rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator,
103 state_tracker, scheduler) { 103 state_tracker, scheduler) {
104 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { 104 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
105 turbo_mode.emplace(instance, dld); 105 turbo_mode.emplace(instance, dld);
@@ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
128 screen_info.width = framebuffer->width; 128 screen_info.width = framebuffer->width;
129 screen_info.height = framebuffer->height; 129 screen_info.height = framebuffer->height;
130 130
131 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 131 const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
132 const bool use_accelerated = 132 const bool use_accelerated =
133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
134 RenderScreenshot(*framebuffer, use_accelerated); 134 RenderScreenshot(*framebuffer, use_accelerated);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 14e257cf7..e5ce4692d 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -20,6 +20,7 @@
20#include "video_core/vulkan_common/vulkan_device.h" 20#include "video_core/vulkan_common/vulkan_device.h"
21#include "video_core/vulkan_common/vulkan_memory_allocator.h" 21#include "video_core/vulkan_common/vulkan_memory_allocator.h"
22#include "video_core/vulkan_common/vulkan_wrapper.h" 22#include "video_core/vulkan_common/vulkan_wrapper.h"
23#include "video_core/host1x/gpu_device_memory_manager.h"
23 24
24namespace Core { 25namespace Core {
25class TelemetrySession; 26class TelemetrySession;
@@ -42,7 +43,7 @@ class RendererVulkan final : public VideoCore::RendererBase {
42public: 43public:
43 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 44 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
44 Core::Frontend::EmuWindow& emu_window, 45 Core::Frontend::EmuWindow& emu_window,
45 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 46 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
46 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 47 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
47 ~RendererVulkan() override; 48 ~RendererVulkan() override;
48 49
@@ -62,7 +63,7 @@ private:
62 void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); 63 void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
63 64
64 Core::TelemetrySession& telemetry_session; 65 Core::TelemetrySession& telemetry_session;
65 Core::Memory::Memory& cpu_memory; 66 Tegra::MaxwellDeviceMemoryManager& device_memory;
66 Tegra::GPU& gpu; 67 Tegra::GPU& gpu;
67 68
68 std::shared_ptr<Common::DynamicLibrary> library; 69 std::shared_ptr<Common::DynamicLibrary> library;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 60432f5ad..610f27c84 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -14,8 +14,8 @@
14#include "common/settings.h" 14#include "common/settings.h"
15#include "core/core.h" 15#include "core/core.h"
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/memory.h"
18#include "video_core/gpu.h" 17#include "video_core/gpu.h"
18#include "video_core/host1x/gpu_device_memory_manager.h"
19#include "video_core/host_shaders/fxaa_frag_spv.h" 19#include "video_core/host_shaders/fxaa_frag_spv.h"
20#include "video_core/host_shaders/fxaa_vert_spv.h" 20#include "video_core/host_shaders/fxaa_vert_spv.h"
21#include "video_core/host_shaders/present_bicubic_frag_spv.h" 21#include "video_core/host_shaders/present_bicubic_frag_spv.h"
@@ -121,11 +121,12 @@ struct BlitScreen::BufferData {
121 // Unaligned image data goes here 121 // Unaligned image data goes here
122}; 122};
123 123
124BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, 124BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_,
125 const Device& device_, MemoryAllocator& memory_allocator_, 125 Core::Frontend::EmuWindow& render_window_, const Device& device_,
126 Swapchain& swapchain_, PresentManager& present_manager_, 126 MemoryAllocator& memory_allocator_, Swapchain& swapchain_,
127 Scheduler& scheduler_, const ScreenInfo& screen_info_) 127 PresentManager& present_manager_, Scheduler& scheduler_,
128 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, 128 const ScreenInfo& screen_info_)
129 : device_memory{device_memory_}, render_window{render_window_}, device{device_},
129 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, 130 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
130 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 131 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
131 resource_ticks.resize(image_count); 132 resource_ticks.resize(image_count);
@@ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
219 if (!use_accelerated) { 220 if (!use_accelerated) {
220 const u64 image_offset = GetRawImageOffset(framebuffer); 221 const u64 image_offset = GetRawImageOffset(framebuffer);
221 222
222 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 223 const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
223 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 224 const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr);
224 225
225 // TODO(Rodrigo): Read this from HLE 226 // TODO(Rodrigo): Read this from HLE
226 constexpr u32 block_height_log2 = 4; 227 constexpr u32 block_height_log2 = 4;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 78b32416d..cb941a956 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -8,15 +8,12 @@
8#include "core/frontend/framebuffer_layout.h" 8#include "core/frontend/framebuffer_layout.h"
9#include "video_core/vulkan_common/vulkan_memory_allocator.h" 9#include "video_core/vulkan_common/vulkan_memory_allocator.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11#include "video_core/host1x/gpu_device_memory_manager.h"
11 12
12namespace Core { 13namespace Core {
13class System; 14class System;
14} 15}
15 16
16namespace Core::Memory {
17class Memory;
18}
19
20namespace Core::Frontend { 17namespace Core::Frontend {
21class EmuWindow; 18class EmuWindow;
22} 19}
@@ -56,7 +53,7 @@ struct ScreenInfo {
56 53
57class BlitScreen { 54class BlitScreen {
58public: 55public:
59 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, 56 explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, Core::Frontend::EmuWindow& render_window,
60 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, 57 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
61 PresentManager& present_manager, Scheduler& scheduler, 58 PresentManager& present_manager, Scheduler& scheduler,
62 const ScreenInfo& screen_info); 59 const ScreenInfo& screen_info);
@@ -109,7 +106,7 @@ private:
109 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; 106 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
110 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; 107 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
111 108
112 Core::Memory::Memory& cpu_memory; 109 Tegra::MaxwellDeviceMemoryManager& device_memory;
113 Core::Frontend::EmuWindow& render_window; 110 Core::Frontend::EmuWindow& render_window;
114 const Device& device; 111 const Device& device;
115 MemoryAllocator& memory_allocator; 112 MemoryAllocator& memory_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 3c61799fa..31001d142 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
79} // Anonymous namespace 79} // Anonymous namespace
80 80
81Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) 81Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params)
82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { 82 : VideoCommon::BufferBase(null_params), tracker{4096} {
83 if (runtime.device.HasNullDescriptor()) { 83 if (runtime.device.HasNullDescriptor()) {
84 return; 84 return;
85 } 85 }
@@ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p
88 is_null = true; 88 is_null = true;
89} 89}
90 90
91Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 91Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_)
92 VAddr cpu_addr_, u64 size_bytes_) 92 : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device},
93 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), 93 buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} {
94 device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
95 tracker{SizeBytes()} {
96 if (runtime.device.HasDebuggingToolAttached()) { 94 if (runtime.device.HasDebuggingToolAttached()) {
97 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 95 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
98 } 96 }
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index dc300d7cb..e273f4988 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -23,11 +23,10 @@ struct HostVertexBinding;
23 23
24class BufferCacheRuntime; 24class BufferCacheRuntime;
25 25
26class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { 26class Buffer : public VideoCommon::BufferBase {
27public: 27public:
28 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); 28 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
29 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 29 explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_);
30 VAddr cpu_addr_, u64 size_bytes_);
31 30
32 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); 31 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
33 32
@@ -173,7 +172,7 @@ struct BufferCacheParams {
173 using Runtime = Vulkan::BufferCacheRuntime; 172 using Runtime = Vulkan::BufferCacheRuntime;
174 using Buffer = Vulkan::Buffer; 173 using Buffer = Vulkan::Buffer;
175 using Async_Buffer = Vulkan::StagingBufferRef; 174 using Async_Buffer = Vulkan::StagingBufferRef;
176 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; 175 using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>;
177 176
178 static constexpr bool IS_OPENGL = false; 177 static constexpr bool IS_OPENGL = false;
179 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; 178 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index d1841198d..bec20c21a 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -30,7 +30,6 @@
30#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 30#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
31#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 31#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
32#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
33#include "video_core/renderer_vulkan/vk_rasterizer.h"
34#include "video_core/renderer_vulkan/vk_scheduler.h" 33#include "video_core/renderer_vulkan/vk_scheduler.h"
35#include "video_core/renderer_vulkan/vk_shader_util.h" 34#include "video_core/renderer_vulkan/vk_shader_util.h"
36#include "video_core/renderer_vulkan/vk_update_descriptor.h" 35#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -299,12 +298,12 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
299 return std::memcmp(&rhs, this, Size()) == 0; 298 return std::memcmp(&rhs, this, Size()) == 0;
300} 299}
301 300
302PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, 301PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_,
303 Scheduler& scheduler_, DescriptorPool& descriptor_pool_, 302 Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
304 GuestDescriptorQueue& guest_descriptor_queue_, 303 GuestDescriptorQueue& guest_descriptor_queue_,
305 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, 304 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
306 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) 305 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
307 : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, 306 : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_},
308 descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, 307 descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
309 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, 308 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
310 texture_cache{texture_cache_}, shader_notify{shader_notify_}, 309 texture_cache{texture_cache_}, shader_notify{shader_notify_},
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e323ea0fd..354fdc8ed 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -26,6 +26,7 @@
26#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 26#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
27#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
28#include "video_core/shader_cache.h" 28#include "video_core/shader_cache.h"
29#include "video_core/host1x/gpu_device_memory_manager.h"
29 30
30namespace Core { 31namespace Core {
31class System; 32class System;
@@ -79,7 +80,6 @@ class ComputePipeline;
79class DescriptorPool; 80class DescriptorPool;
80class Device; 81class Device;
81class PipelineStatistics; 82class PipelineStatistics;
82class RasterizerVulkan;
83class RenderPassCache; 83class RenderPassCache;
84class Scheduler; 84class Scheduler;
85 85
@@ -99,7 +99,7 @@ struct ShaderPools {
99 99
100class PipelineCache : public VideoCommon::ShaderCache { 100class PipelineCache : public VideoCommon::ShaderCache {
101public: 101public:
102 explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, 102 explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device, Scheduler& scheduler,
103 DescriptorPool& descriptor_pool, 103 DescriptorPool& descriptor_pool,
104 GuestDescriptorQueue& guest_descriptor_queue, 104 GuestDescriptorQueue& guest_descriptor_queue,
105 RenderPassCache& render_pass_cache, BufferCache& buffer_cache, 105 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ad4caf688..d59fe698c 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -14,7 +14,9 @@
14#include "common/bit_util.h" 14#include "common/bit_util.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "core/memory.h" 16#include "core/memory.h"
17#include "video_core/rasterizer_interface.h"
17#include "video_core/engines/draw_manager.h" 18#include "video_core/engines/draw_manager.h"
19#include "video_core/host1x/gpu_device_memory_manager.h"
18#include "video_core/query_cache/query_cache.h" 20#include "video_core/query_cache/query_cache.h"
19#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
20#include "video_core/renderer_vulkan/vk_compute_pass.h" 22#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -102,7 +104,7 @@ private:
102using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; 104using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>;
103 105
104struct HostSyncValues { 106struct HostSyncValues {
105 VAddr address; 107 DAddr address;
106 size_t size; 108 size_t size;
107 size_t offset; 109 size_t offset;
108 110
@@ -317,7 +319,7 @@ public:
317 pending_sync.clear(); 319 pending_sync.clear();
318 } 320 }
319 321
320 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 322 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
321 [[maybe_unused]] std::optional<u32> subreport) override { 323 [[maybe_unused]] std::optional<u32> subreport) override {
322 PauseCounter(); 324 PauseCounter();
323 auto index = BuildQuery(); 325 auto index = BuildQuery();
@@ -738,7 +740,7 @@ public:
738 pending_sync.clear(); 740 pending_sync.clear();
739 } 741 }
740 742
741 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 743 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
742 std::optional<u32> subreport_) override { 744 std::optional<u32> subreport_) override {
743 auto index = BuildQuery(); 745 auto index = BuildQuery();
744 auto* new_query = GetQuery(index); 746 auto* new_query = GetQuery(index);
@@ -769,9 +771,9 @@ public:
769 return index; 771 return index;
770 } 772 }
771 773
772 std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { 774 std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) {
773 if (last_queries[stream] != 0) { 775 if (last_queries[stream] != 0) {
774 std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); 776 std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
775 return result; 777 return result;
776 } 778 }
777 return std::nullopt; 779 return std::nullopt;
@@ -974,7 +976,7 @@ private:
974 size_t buffers_count{}; 976 size_t buffers_count{};
975 std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; 977 std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
976 std::array<VkDeviceSize, NUM_STREAMS> offsets{}; 978 std::array<VkDeviceSize, NUM_STREAMS> offsets{};
977 std::array<VAddr, NUM_STREAMS> last_queries; 979 std::array<DAddr, NUM_STREAMS> last_queries;
978 std::array<size_t, NUM_STREAMS> last_queries_stride; 980 std::array<size_t, NUM_STREAMS> last_queries_stride;
979 Maxwell3D::Regs::PrimitiveTopology out_topology; 981 Maxwell3D::Regs::PrimitiveTopology out_topology;
980 u64 streams_mask; 982 u64 streams_mask;
@@ -987,7 +989,7 @@ public:
987 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} 989 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
988 990
989 // Parameterized constructor 991 // Parameterized constructor
990 PrimitivesQueryBase(bool has_timestamp, VAddr address) 992 PrimitivesQueryBase(bool has_timestamp, DAddr address)
991 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { 993 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
992 if (has_timestamp) { 994 if (has_timestamp) {
993 flags |= VideoCommon::QueryFlagBits::HasTimestamp; 995 flags |= VideoCommon::QueryFlagBits::HasTimestamp;
@@ -995,7 +997,7 @@ public:
995 } 997 }
996 998
997 u64 stride{}; 999 u64 stride{};
998 VAddr dependant_address{}; 1000 DAddr dependant_address{};
999 Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; 1001 Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
1000 size_t dependant_index{}; 1002 size_t dependant_index{};
1001 bool dependant_manage{}; 1003 bool dependant_manage{};
@@ -1005,15 +1007,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive
1005public: 1007public:
1006 explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, 1008 explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
1007 TFBCounterStreamer& tfb_streamer_, 1009 TFBCounterStreamer& tfb_streamer_,
1008 Core::Memory::Memory& cpu_memory_) 1010 Tegra::MaxwellDeviceMemoryManager& device_memory_)
1009 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, 1011 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
1010 tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { 1012 tfb_streamer{tfb_streamer_}, device_memory{device_memory_} {
1011 MakeDependent(&tfb_streamer); 1013 MakeDependent(&tfb_streamer);
1012 } 1014 }
1013 1015
1014 ~PrimitivesSucceededStreamer() = default; 1016 ~PrimitivesSucceededStreamer() = default;
1015 1017
1016 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 1018 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
1017 std::optional<u32> subreport_) override { 1019 std::optional<u32> subreport_) override {
1018 auto index = BuildQuery(); 1020 auto index = BuildQuery();
1019 auto* new_query = GetQuery(index); 1021 auto* new_query = GetQuery(index);
@@ -1063,6 +1065,8 @@ public:
1063 } 1065 }
1064 }); 1066 });
1065 } 1067 }
1068 auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address);
1069 ASSERT(ptr != nullptr);
1066 1070
1067 new_query->dependant_manage = must_manage_dependance; 1071 new_query->dependant_manage = must_manage_dependance;
1068 pending_flush_queries.push_back(index); 1072 pending_flush_queries.push_back(index);
@@ -1100,7 +1104,7 @@ public:
1100 num_vertices = dependant_query->value / query->stride; 1104 num_vertices = dependant_query->value / query->stride;
1101 tfb_streamer.Free(query->dependant_index); 1105 tfb_streamer.Free(query->dependant_index);
1102 } else { 1106 } else {
1103 u8* pointer = cpu_memory.GetPointer(query->dependant_address); 1107 u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
1104 u32 result; 1108 u32 result;
1105 std::memcpy(&result, pointer, sizeof(u32)); 1109 std::memcpy(&result, pointer, sizeof(u32));
1106 num_vertices = static_cast<u64>(result) / query->stride; 1110 num_vertices = static_cast<u64>(result) / query->stride;
@@ -1137,7 +1141,7 @@ public:
1137private: 1141private:
1138 QueryCacheRuntime& runtime; 1142 QueryCacheRuntime& runtime;
1139 TFBCounterStreamer& tfb_streamer; 1143 TFBCounterStreamer& tfb_streamer;
1140 Core::Memory::Memory& cpu_memory; 1144 Tegra::MaxwellDeviceMemoryManager& device_memory;
1141 1145
1142 // syncing queue 1146 // syncing queue
1143 std::vector<size_t> pending_sync; 1147 std::vector<size_t> pending_sync;
@@ -1152,12 +1156,12 @@ private:
1152 1156
1153struct QueryCacheRuntimeImpl { 1157struct QueryCacheRuntimeImpl {
1154 QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, 1158 QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_,
1155 Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, 1159 Tegra::MaxwellDeviceMemoryManager& device_memory_, Vulkan::BufferCache& buffer_cache_,
1156 const Device& device_, const MemoryAllocator& memory_allocator_, 1160 const Device& device_, const MemoryAllocator& memory_allocator_,
1157 Scheduler& scheduler_, StagingBufferPool& staging_pool_, 1161 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
1158 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 1162 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
1159 DescriptorPool& descriptor_pool) 1163 DescriptorPool& descriptor_pool)
1160 : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, 1164 : rasterizer{rasterizer_}, device_memory{device_memory_},
1161 buffer_cache{buffer_cache_}, device{device_}, 1165 buffer_cache{buffer_cache_}, device{device_},
1162 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, 1166 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
1163 guest_streamer(0, runtime), 1167 guest_streamer(0, runtime),
@@ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl {
1168 scheduler, memory_allocator, staging_pool), 1172 scheduler, memory_allocator, staging_pool),
1169 primitives_succeeded_streamer( 1173 primitives_succeeded_streamer(
1170 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, 1174 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
1171 cpu_memory_), 1175 device_memory_),
1172 primitives_needed_minus_succeeded_streamer( 1176 primitives_needed_minus_succeeded_streamer(
1173 static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), 1177 static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
1174 hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { 1178 hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} {
@@ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl {
1195 } 1199 }
1196 1200
1197 VideoCore::RasterizerInterface* rasterizer; 1201 VideoCore::RasterizerInterface* rasterizer;
1198 Core::Memory::Memory& cpu_memory; 1202 Tegra::MaxwellDeviceMemoryManager& device_memory;
1199 Vulkan::BufferCache& buffer_cache; 1203 Vulkan::BufferCache& buffer_cache;
1200 1204
1201 const Device& device; 1205 const Device& device;
@@ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl {
1210 PrimitivesSucceededStreamer primitives_succeeded_streamer; 1214 PrimitivesSucceededStreamer primitives_succeeded_streamer;
1211 VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; 1215 VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer;
1212 1216
1213 std::vector<std::pair<VAddr, VAddr>> little_cache; 1217 std::vector<std::pair<DAddr, DAddr>> little_cache;
1214 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; 1218 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
1215 std::vector<size_t> redirect_cache; 1219 std::vector<size_t> redirect_cache;
1216 std::vector<std::vector<VkBufferCopy>> copies_setup; 1220 std::vector<std::vector<VkBufferCopy>> copies_setup;
@@ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl {
1229}; 1233};
1230 1234
1231QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, 1235QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
1232 Core::Memory::Memory& cpu_memory_, 1236 Tegra::MaxwellDeviceMemoryManager& device_memory_,
1233 Vulkan::BufferCache& buffer_cache_, const Device& device_, 1237 Vulkan::BufferCache& buffer_cache_, const Device& device_,
1234 const MemoryAllocator& memory_allocator_, 1238 const MemoryAllocator& memory_allocator_,
1235 Scheduler& scheduler_, StagingBufferPool& staging_pool_, 1239 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
1236 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 1240 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
1237 DescriptorPool& descriptor_pool) { 1241 DescriptorPool& descriptor_pool) {
1238 impl = std::make_unique<QueryCacheRuntimeImpl>( 1242 impl = std::make_unique<QueryCacheRuntimeImpl>(
1239 *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, 1243 *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_,
1240 staging_pool_, compute_pass_descriptor_queue, descriptor_pool); 1244 staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
1241} 1245}
1242 1246
@@ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
1309 ResumeHostConditionalRendering(); 1313 ResumeHostConditionalRendering();
1310} 1314}
1311 1315
1312void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { 1316void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
1313 VkBuffer to_resolve; 1317 VkBuffer to_resolve;
1314 u32 to_resolve_offset; 1318 u32 to_resolve_offset;
1315 { 1319 {
@@ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
1350 return false; 1354 return false;
1351 } 1355 }
1352 1356
1353 const auto check_in_bc = [&](VAddr address) { 1357 const auto check_in_bc = [&](DAddr address) {
1354 return impl->buffer_cache.IsRegionGpuModified(address, 8); 1358 return impl->buffer_cache.IsRegionGpuModified(address, 8);
1355 }; 1359 };
1356 const auto check_value = [&](VAddr address) { 1360 const auto check_value = [&](DAddr address) {
1357 u8* ptr = impl->cpu_memory.GetPointer(address); 1361 u8* ptr = impl->device_memory.GetPointer<u8>(address);
1358 u64 value{}; 1362 u64 value{};
1359 std::memcpy(&value, ptr, sizeof(value)); 1363 std::memcpy(&value, ptr, sizeof(value));
1360 return value == 0; 1364 return value == 0;
@@ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba
1477 for (auto& sync_val : values) { 1481 for (auto& sync_val : values) {
1478 total_size += sync_val.size; 1482 total_size += sync_val.size;
1479 bool found = false; 1483 bool found = false;
1480 VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); 1484 DAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE);
1481 VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; 1485 DAddr base_end = base + Core::Memory::YUZU_PAGESIZE;
1482 for (size_t i = 0; i < impl->little_cache.size(); i++) { 1486 for (size_t i = 0; i < impl->little_cache.size(); i++) {
1483 const auto set_found = [&] { 1487 const auto set_found = [&] {
1484 impl->redirect_cache.push_back(i); 1488 impl->redirect_cache.push_back(i);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index e9a1ea169..f6151123e 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl;
27class QueryCacheRuntime { 27class QueryCacheRuntime {
28public: 28public:
29 explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, 29 explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
30 Core::Memory::Memory& cpu_memory_, 30 Tegra::MaxwellDeviceMemoryManager& device_memory_,
31 Vulkan::BufferCache& buffer_cache_, const Device& device_, 31 Vulkan::BufferCache& buffer_cache_, const Device& device_,
32 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, 32 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
33 StagingBufferPool& staging_pool_, 33 StagingBufferPool& staging_pool_,
@@ -61,7 +61,7 @@ public:
61 61
62private: 62private:
63 void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); 63 void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal);
64 void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); 64 void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal);
65 friend struct QueryCacheRuntimeImpl; 65 friend struct QueryCacheRuntimeImpl;
66 std::unique_ptr<QueryCacheRuntimeImpl> impl; 66 std::unique_ptr<QueryCacheRuntimeImpl> impl;
67}; 67};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 241fc34be..efcc349a0 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -18,6 +18,7 @@
18#include "video_core/engines/draw_manager.h" 18#include "video_core/engines/draw_manager.h"
19#include "video_core/engines/kepler_compute.h" 19#include "video_core/engines/kepler_compute.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/host1x/gpu_device_memory_manager.h"
21#include "video_core/renderer_vulkan/blit_image.h" 22#include "video_core/renderer_vulkan/blit_image.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/maxwell_to_vk.h" 24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
@@ -37,6 +38,7 @@
37#include "video_core/vulkan_common/vulkan_device.h" 38#include "video_core/vulkan_common/vulkan_device.h"
38#include "video_core/vulkan_common/vulkan_wrapper.h" 39#include "video_core/vulkan_common/vulkan_wrapper.h"
39 40
41
40namespace Vulkan { 42namespace Vulkan {
41 43
42using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -163,10 +165,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
163} // Anonymous namespace 165} // Anonymous namespace
164 166
165RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 167RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
166 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, 168 Tegra::MaxwellDeviceMemoryManager& device_memory_,
167 const Device& device_, MemoryAllocator& memory_allocator_, 169 ScreenInfo& screen_info_, const Device& device_,
168 StateTracker& state_tracker_, Scheduler& scheduler_) 170 MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
169 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, 171 Scheduler& scheduler_)
172 : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_},
170 memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, 173 memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
171 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 174 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
172 guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), 175 guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
@@ -174,14 +177,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
174 texture_cache_runtime{ 177 texture_cache_runtime{
175 device, scheduler, memory_allocator, staging_pool, 178 device, scheduler, memory_allocator, staging_pool,
176 blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, 179 blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue},
177 texture_cache(texture_cache_runtime, *this), 180 texture_cache(texture_cache_runtime, device_memory),
178 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 181 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
179 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), 182 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
180 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 183 buffer_cache(device_memory, buffer_cache_runtime),
181 query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, 184 query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler,
182 staging_pool, compute_pass_descriptor_queue, descriptor_pool), 185 staging_pool, compute_pass_descriptor_queue, descriptor_pool),
183 query_cache(gpu, *this, cpu_memory_, query_cache_runtime), 186 query_cache(gpu, *this, device_memory, query_cache_runtime),
184 pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, 187 pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue,
185 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), 188 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
186 accelerate_dma(buffer_cache, texture_cache, scheduler), 189 accelerate_dma(buffer_cache, texture_cache, scheduler),
187 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 190 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
@@ -508,7 +511,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in
508 511
509void RasterizerVulkan::FlushAll() {} 512void RasterizerVulkan::FlushAll() {}
510 513
511void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 514void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
512 if (addr == 0 || size == 0) { 515 if (addr == 0 || size == 0) {
513 return; 516 return;
514 } 517 }
@@ -525,7 +528,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
525 } 528 }
526} 529}
527 530
528bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 531bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
529 if ((True(which & VideoCommon::CacheType::BufferCache))) { 532 if ((True(which & VideoCommon::CacheType::BufferCache))) {
530 std::scoped_lock lock{buffer_cache.mutex}; 533 std::scoped_lock lock{buffer_cache.mutex};
531 if (buffer_cache.IsRegionGpuModified(addr, size)) { 534 if (buffer_cache.IsRegionGpuModified(addr, size)) {
@@ -542,7 +545,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
542 return false; 545 return false;
543} 546}
544 547
545VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { 548VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) {
546 { 549 {
547 std::scoped_lock lock{texture_cache.mutex}; 550 std::scoped_lock lock{texture_cache.mutex};
548 auto area = texture_cache.GetFlushArea(addr, size); 551 auto area = texture_cache.GetFlushArea(addr, size);
@@ -558,7 +561,7 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64
558 return new_area; 561 return new_area;
559} 562}
560 563
561void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 564void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
562 if (addr == 0 || size == 0) { 565 if (addr == 0 || size == 0) {
563 return; 566 return;
564 } 567 }
@@ -578,7 +581,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
578 } 581 }
579} 582}
580 583
581void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { 584void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) {
582 { 585 {
583 std::scoped_lock lock{texture_cache.mutex}; 586 std::scoped_lock lock{texture_cache.mutex};
584 for (const auto& [addr, size] : sequences) { 587 for (const auto& [addr, size] : sequences) {
@@ -599,7 +602,8 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
599 } 602 }
600} 603}
601 604
602bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 605bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) {
606 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
603 if (addr == 0 || size == 0) { 607 if (addr == 0 || size == 0) {
604 return false; 608 return false;
605 } 609 }
@@ -620,7 +624,8 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
620 return false; 624 return false;
621} 625}
622 626
623void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { 627void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) {
628 const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
624 if (addr == 0 || size == 0) { 629 if (addr == 0 || size == 0) {
625 return; 630 return;
626 } 631 }
@@ -640,7 +645,7 @@ void RasterizerVulkan::InvalidateGPUCache() {
640 gpu.InvalidateGPUCache(); 645 gpu.InvalidateGPUCache();
641} 646}
642 647
643void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 648void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) {
644 { 649 {
645 std::scoped_lock lock{texture_cache.mutex}; 650 std::scoped_lock lock{texture_cache.mutex};
646 texture_cache.UnmapMemory(addr, size); 651 texture_cache.UnmapMemory(addr, size);
@@ -679,7 +684,7 @@ void RasterizerVulkan::ReleaseFences(bool force) {
679 fence_manager.WaitPendingFences(force); 684 fence_manager.WaitPendingFences(force);
680} 685}
681 686
682void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, 687void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size,
683 VideoCommon::CacheType which) { 688 VideoCommon::CacheType which) {
684 if (Settings::IsGPULevelExtreme()) { 689 if (Settings::IsGPULevelExtreme()) {
685 FlushRegion(addr, size, which); 690 FlushRegion(addr, size, which);
@@ -782,7 +787,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
782} 787}
783 788
784bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, 789bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
785 VAddr framebuffer_addr, u32 pixel_stride) { 790 DAddr framebuffer_addr, u32 pixel_stride) {
786 if (!framebuffer_addr) { 791 if (!framebuffer_addr) {
787 return false; 792 return false;
788 } 793 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ad069556c..d593f35df 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -12,7 +12,6 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/control/channel_state_cache.h" 13#include "video_core/control/channel_state_cache.h"
14#include "video_core/engines/maxwell_dma.h" 14#include "video_core/engines/maxwell_dma.h"
15#include "video_core/rasterizer_accelerated.h"
16#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
17#include "video_core/renderer_vulkan/blit_image.h" 16#include "video_core/renderer_vulkan/blit_image.h"
18#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 17#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@@ -25,6 +24,7 @@
25#include "video_core/renderer_vulkan/vk_update_descriptor.h" 24#include "video_core/renderer_vulkan/vk_update_descriptor.h"
26#include "video_core/vulkan_common/vulkan_memory_allocator.h" 25#include "video_core/vulkan_common/vulkan_memory_allocator.h"
27#include "video_core/vulkan_common/vulkan_wrapper.h" 26#include "video_core/vulkan_common/vulkan_wrapper.h"
27#include "video_core/host1x/gpu_device_memory_manager.h"
28 28
29namespace Core { 29namespace Core {
30class System; 30class System;
@@ -34,10 +34,14 @@ namespace Core::Frontend {
34class EmuWindow; 34class EmuWindow;
35} 35}
36 36
37namespace Tegra::Engines { 37namespace Tegra {
38
39namespace Engines {
38class Maxwell3D; 40class Maxwell3D;
39} 41}
40 42
43} // namespace Tegra
44
41namespace Vulkan { 45namespace Vulkan {
42 46
43struct ScreenInfo; 47struct ScreenInfo;
@@ -70,13 +74,14 @@ private:
70 Scheduler& scheduler; 74 Scheduler& scheduler;
71}; 75};
72 76
73class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, 77class RasterizerVulkan final : public VideoCore::RasterizerInterface,
74 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 78 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
75public: 79public:
76 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 80 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
77 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, 81 Tegra::MaxwellDeviceMemoryManager& device_memory_,
78 const Device& device_, MemoryAllocator& memory_allocator_, 82 ScreenInfo& screen_info_, const Device& device_,
79 StateTracker& state_tracker_, Scheduler& scheduler_); 83 MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
84 Scheduler& scheduler_);
80 ~RasterizerVulkan() override; 85 ~RasterizerVulkan() override;
81 86
82 void Draw(bool is_indexed, u32 instance_count) override; 87 void Draw(bool is_indexed, u32 instance_count) override;
@@ -90,18 +95,18 @@ public:
90 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 95 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
91 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 96 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
92 void FlushAll() override; 97 void FlushAll() override;
93 void FlushRegion(VAddr addr, u64 size, 98 void FlushRegion(DAddr addr, u64 size,
94 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 99 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
95 bool MustFlushRegion(VAddr addr, u64 size, 100 bool MustFlushRegion(DAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 101 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
97 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 102 VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override;
98 void InvalidateRegion(VAddr addr, u64 size, 103 void InvalidateRegion(DAddr addr, u64 size,
99 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 104 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
100 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; 105 void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override;
101 void OnCacheInvalidation(VAddr addr, u64 size) override; 106 void OnCacheInvalidation(DAddr addr, u64 size) override;
102 bool OnCPUWrite(VAddr addr, u64 size) override; 107 bool OnCPUWrite(DAddr addr, u64 size) override;
103 void InvalidateGPUCache() override; 108 void InvalidateGPUCache() override;
104 void UnmapMemory(VAddr addr, u64 size) override; 109 void UnmapMemory(DAddr addr, u64 size) override;
105 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 110 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
106 void SignalFence(std::function<void()>&& func) override; 111 void SignalFence(std::function<void()>&& func) override;
107 void SyncOperation(std::function<void()>&& func) override; 112 void SyncOperation(std::function<void()>&& func) override;
@@ -109,7 +114,7 @@ public:
109 void SignalReference() override; 114 void SignalReference() override;
110 void ReleaseFences(bool force = true) override; 115 void ReleaseFences(bool force = true) override;
111 void FlushAndInvalidateRegion( 116 void FlushAndInvalidateRegion(
112 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 117 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
113 void WaitForIdle() override; 118 void WaitForIdle() override;
114 void FragmentBarrier() override; 119 void FragmentBarrier() override;
115 void TiledCacheBarrier() override; 120 void TiledCacheBarrier() override;
@@ -122,7 +127,7 @@ public:
122 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 127 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
123 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 128 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
124 std::span<const u8> memory) override; 129 std::span<const u8> memory) override;
125 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 130 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
126 u32 pixel_stride) override; 131 u32 pixel_stride) override;
127 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 132 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
128 const VideoCore::DiskResourceLoadCallback& callback) override; 133 const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -176,6 +181,7 @@ private:
176 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); 181 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
177 182
178 Tegra::GPU& gpu; 183 Tegra::GPU& gpu;
184 Tegra::MaxwellDeviceMemoryManager& device_memory;
179 185
180 ScreenInfo& screen_info; 186 ScreenInfo& screen_info;
181 const Device& device; 187 const Device& device;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index e81cd031b..86fd62428 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -12,6 +12,7 @@
12#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/host1x/gpu_device_memory_manager.h"
15#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h" 17#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h" 18#include "video_core/shader_environment.h"
@@ -34,7 +35,7 @@ void ShaderCache::SyncGuestHost() {
34 RemovePendingShaders(); 35 RemovePendingShaders();
35} 36}
36 37
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} 38ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_) : device_memory{device_memory_} {}
38 39
39bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { 40bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
40 auto& dirty{maxwell3d->dirty.flags}; 41 auto& dirty{maxwell3d->dirty.flags};
@@ -132,7 +133,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t
132 133
133 storage.push_back(std::move(data)); 134 storage.push_back(std::move(data));
134 135
135 rasterizer.UpdatePagesCachedCount(addr, size, 1); 136 device_memory.UpdatePagesCachedCount(addr, size, 1);
136} 137}
137 138
138void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { 139void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
@@ -209,7 +210,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {
209 210
210 const VAddr addr = entry->addr_start; 211 const VAddr addr = entry->addr_start;
211 const size_t size = entry->addr_end - addr; 212 const size_t size = entry->addr_end - addr;
212 rasterizer.UpdatePagesCachedCount(addr, size, -1); 213 device_memory.UpdatePagesCachedCount(addr, size, -1);
213} 214}
214 215
215void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { 216void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index a76896620..02ef39483 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -16,6 +16,7 @@
16#include "video_core/control/channel_state_cache.h" 16#include "video_core/control/channel_state_cache.h"
17#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/shader_environment.h" 18#include "video_core/shader_environment.h"
19#include "video_core/host1x/gpu_device_memory_manager.h"
19 20
20namespace Tegra { 21namespace Tegra {
21class MemoryManager; 22class MemoryManager;
@@ -77,7 +78,7 @@ protected:
77 } 78 }
78 }; 79 };
79 80
80 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); 81 explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory);
81 82
82 /// @brief Update the hashes and information of shader stages 83 /// @brief Update the hashes and information of shader stages
83 /// @param unique_hashes Shader hashes to store into when a stage is enabled 84 /// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -145,7 +146,7 @@ private:
145 /// @brief Create a new shader entry and register it 146 /// @brief Create a new shader entry and register it
146 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); 147 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
147 148
148 VideoCore::RasterizerInterface& rasterizer; 149 Tegra::MaxwellDeviceMemoryManager& device_memory;
149 150
150 mutable std::mutex lookup_mutex; 151 mutable std::mutex lookup_mutex;
151 std::mutex invalidation_mutex; 152 std::mutex invalidation_mutex;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0d5a1709f..7398ed2ec 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -8,10 +8,11 @@
8 8
9#include "common/alignment.h" 9#include "common/alignment.h"
10#include "common/settings.h" 10#include "common/settings.h"
11#include "core/memory.h"
12#include "video_core/control/channel_state.h" 11#include "video_core/control/channel_state.h"
13#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
14#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
14#include "video_core/guest_memory.h"
15#include "video_core/host1x/gpu_device_memory_manager.h"
15#include "video_core/texture_cache/image_view_base.h" 16#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/samples_helper.h" 17#include "video_core/texture_cache/samples_helper.h"
17#include "video_core/texture_cache/texture_cache_base.h" 18#include "video_core/texture_cache/texture_cache_base.h"
@@ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType;
27using namespace Common::Literals; 28using namespace Common::Literals;
28 29
29template <class P> 30template <class P>
30TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) 31TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
31 : runtime{runtime_}, rasterizer{rasterizer_} { 32 : runtime{runtime_}, device_memory{device_memory_} {
32 // Configure null sampler 33 // Configure null sampler
33 TSCEntry sampler_descriptor{}; 34 TSCEntry sampler_descriptor{};
34 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); 35 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
49 void(slot_samplers.insert(runtime, sampler_descriptor)); 50 void(slot_samplers.insert(runtime, sampler_descriptor));
50 51
51 if constexpr (HAS_DEVICE_MEMORY_INFO) { 52 if constexpr (HAS_DEVICE_MEMORY_INFO) {
52 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 53 const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
53 const s64 min_spacing_expected = device_memory - 1_GiB; 54 const s64 min_spacing_expected = device_local_memory - 1_GiB;
54 const s64 min_spacing_critical = device_memory - 512_MiB; 55 const s64 min_spacing_critical = device_local_memory - 512_MiB;
55 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 56 const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
56 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 57 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
57 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 58 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
58 expected_memory = static_cast<u64>( 59 expected_memory = static_cast<u64>(
59 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), 60 std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
60 DEFAULT_EXPECTED_MEMORY)); 61 DEFAULT_EXPECTED_MEMORY));
61 critical_memory = static_cast<u64>( 62 critical_memory = static_cast<u64>(
62 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), 63 std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
63 DEFAULT_CRITICAL_MEMORY)); 64 DEFAULT_CRITICAL_MEMORY));
64 minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); 65 minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2);
65 } else { 66 } else {
66 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; 67 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
67 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; 68 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
@@ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
513} 514}
514 515
515template <class P> 516template <class P>
516void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { 517void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
517 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { 518 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
518 if (True(image.flags & ImageFlagBits::CpuModified)) { 519 if (True(image.flags & ImageFlagBits::CpuModified)) {
519 return; 520 return;
@@ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
526} 527}
527 528
528template <class P> 529template <class P>
529void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 530void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
530 boost::container::small_vector<ImageId, 16> images; 531 boost::container::small_vector<ImageId, 16> images;
531 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { 532 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
532 if (!image.IsSafeDownload()) { 533 if (!image.IsSafeDownload()) {
@@ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
553} 554}
554 555
555template <class P> 556template <class P>
556std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, 557std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr,
557 u64 size) { 558 u64 size) {
558 std::optional<VideoCore::RasterizerDownloadArea> area{}; 559 std::optional<VideoCore::RasterizerDownloadArea> area{};
559 ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { 560 ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) {
@@ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
579} 580}
580 581
581template <class P> 582template <class P>
582void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { 583void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
583 boost::container::small_vector<ImageId, 16> deleted_images; 584 boost::container::small_vector<ImageId, 16> deleted_images;
584 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); 585 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
585 for (const ImageId id : deleted_images) { 586 for (const ImageId id : deleted_images) {
@@ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
713 714
714template <class P> 715template <class P>
715typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( 716typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(
716 const Tegra::FramebufferConfig& config, VAddr cpu_addr) { 717 const Tegra::FramebufferConfig& config, DAddr cpu_addr) {
717 // TODO: Properly implement this 718 // TODO: Properly implement this
718 const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); 719 const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS);
719 if (it == page_table.end()) { 720 if (it == page_table.end()) {
@@ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep
940} 941}
941 942
942template <class P> 943template <class P>
943bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 944bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
944 bool is_modified = false; 945 bool is_modified = false;
945 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { 946 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
946 if (False(image.flags & ImageFlagBits::GpuModified)) { 947 if (False(image.flags & ImageFlagBits::GpuModified)) {
@@ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
1059 return; 1060 return;
1060 } 1061 }
1061 1062
1062 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( 1063 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1063 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); 1064 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1064 1065
1065 if (True(image.flags & ImageFlagBits::Converted)) { 1066 if (True(image.flags & ImageFlagBits::Converted)) {
@@ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
1124template <class P> 1125template <class P>
1125ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 1126ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1126 RelaxedOptions options) { 1127 RelaxedOptions options) {
1127 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1128 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1128 if (!cpu_addr) { 1129 if (!cpu_addr) {
1129 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 1130 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1130 if (!cpu_addr) { 1131 if (!cpu_addr) {
@@ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
1265 1266
1266 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; 1267 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
1267 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); 1268 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1268 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( 1269 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1269 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); 1270 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1270 1271
1271 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, 1272 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
@@ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) {
1339template <class P> 1340template <class P>
1340ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 1341ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1341 RelaxedOptions options) { 1342 RelaxedOptions options) {
1342 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1343 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1343 if (!cpu_addr) { 1344 if (!cpu_addr) {
1344 const auto size = CalculateGuestSizeInBytes(info); 1345 const auto size = CalculateGuestSizeInBytes(info);
1345 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); 1346 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
1346 if (!cpu_addr) { 1347 if (!cpu_addr) {
1347 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; 1348 const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1348 virtual_invalid_space += Common::AlignUp(size, 32); 1349 virtual_invalid_space += Common::AlignUp(size, 32);
1349 cpu_addr = std::optional<VAddr>(fake_addr); 1350 cpu_addr = std::optional<DAddr>(fake_addr);
1350 } 1351 }
1351 } 1352 }
1352 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); 1353 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
@@ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1362} 1363}
1363 1364
1364template <class P> 1365template <class P>
1365ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { 1366ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
1366 ImageInfo new_info = info; 1367 ImageInfo new_info = info;
1367 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 1368 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1368 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1369 const bool broken_views = runtime.HasBrokenTextureViewFormats();
@@ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag
1650 1651
1651template <class P> 1652template <class P>
1652ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { 1653ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) {
1653 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1654 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1654 if (!cpu_addr) { 1655 if (!cpu_addr) {
1655 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 1656 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1656 if (!cpu_addr) { 1657 if (!cpu_addr) {
@@ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd
1780 1781
1781template <class P> 1782template <class P>
1782template <typename Func> 1783template <typename Func>
1783void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { 1784void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) {
1784 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1785 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1785 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1786 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1786 boost::container::small_vector<ImageId, 32> images; 1787 boost::container::small_vector<ImageId, 32> images;
@@ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size,
1924template <class P> 1925template <class P>
1925template <typename Func> 1926template <typename Func>
1926void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1927void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1927 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1928 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type;
1928 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; 1929 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1929 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); 1930 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1930 for (const auto& [gpu_addr, size] : segments) { 1931 for (const auto& [gpu_addr, size] : segments) {
1931 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1932 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1932 ASSERT(cpu_addr); 1933 ASSERT(cpu_addr);
1933 if constexpr (RETURNS_BOOL) { 1934 if constexpr (RETURNS_BOOL) {
1934 if (func(gpu_addr, *cpu_addr, size)) { 1935 if (func(gpu_addr, *cpu_addr, size)) {
@@ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1980 } 1981 }
1981 boost::container::small_vector<ImageViewId, 16> sparse_maps; 1982 boost::container::small_vector<ImageViewId, 16> sparse_maps;
1982 ForEachSparseSegment( 1983 ForEachSparseSegment(
1983 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1984 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) {
1984 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1985 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1985 ForEachCPUPage(cpu_addr, size, 1986 ForEachCPUPage(cpu_addr, size,
1986 [this, map_id](u64 page) { page_table[page].push_back(map_id); }); 1987 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
@@ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
2048 auto& sparse_maps = it->second; 2049 auto& sparse_maps = it->second;
2049 for (auto& map_view_id : sparse_maps) { 2050 for (auto& map_view_id : sparse_maps) {
2050 const auto& map_range = slot_map_views[map_view_id]; 2051 const auto& map_range = slot_map_views[map_view_id];
2051 const VAddr cpu_addr = map_range.cpu_addr; 2052 const DAddr cpu_addr = map_range.cpu_addr;
2052 const std::size_t size = map_range.size; 2053 const std::size_t size = map_range.size;
2053 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { 2054 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
2054 const auto page_it = page_table.find(page); 2055 const auto page_it = page_table.find(page);
@@ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
2080 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 2081 ASSERT(False(image.flags & ImageFlagBits::Tracked));
2081 image.flags |= ImageFlagBits::Tracked; 2082 image.flags |= ImageFlagBits::Tracked;
2082 if (False(image.flags & ImageFlagBits::Sparse)) { 2083 if (False(image.flags & ImageFlagBits::Sparse)) {
2083 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 2084 device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
2084 return; 2085 return;
2085 } 2086 }
2086 if (True(image.flags & ImageFlagBits::Registered)) { 2087 if (True(image.flags & ImageFlagBits::Registered)) {
@@ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
2089 auto& sparse_maps = it->second; 2090 auto& sparse_maps = it->second;
2090 for (auto& map_view_id : sparse_maps) { 2091 for (auto& map_view_id : sparse_maps) {
2091 const auto& map = slot_map_views[map_view_id]; 2092 const auto& map = slot_map_views[map_view_id];
2092 const VAddr cpu_addr = map.cpu_addr; 2093 const DAddr cpu_addr = map.cpu_addr;
2093 const std::size_t size = map.size; 2094 const std::size_t size = map.size;
2094 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 2095 device_memory.UpdatePagesCachedCount(cpu_addr, size, 1);
2095 } 2096 }
2096 return; 2097 return;
2097 } 2098 }
2098 ForEachSparseSegment(image, 2099 ForEachSparseSegment(image,
2099 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 2100 [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) {
2100 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 2101 device_memory.UpdatePagesCachedCount(cpu_addr, size, 1);
2101 }); 2102 });
2102} 2103}
2103 2104
@@ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
2106 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 2107 ASSERT(True(image.flags & ImageFlagBits::Tracked));
2107 image.flags &= ~ImageFlagBits::Tracked; 2108 image.flags &= ~ImageFlagBits::Tracked;
2108 if (False(image.flags & ImageFlagBits::Sparse)) { 2109 if (False(image.flags & ImageFlagBits::Sparse)) {
2109 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); 2110 device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
2110 return; 2111 return;
2111 } 2112 }
2112 ASSERT(True(image.flags & ImageFlagBits::Registered)); 2113 ASSERT(True(image.flags & ImageFlagBits::Registered));
@@ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
2115 auto& sparse_maps = it->second; 2116 auto& sparse_maps = it->second;
2116 for (auto& map_view_id : sparse_maps) { 2117 for (auto& map_view_id : sparse_maps) {
2117 const auto& map = slot_map_views[map_view_id]; 2118 const auto& map = slot_map_views[map_view_id];
2118 const VAddr cpu_addr = map.cpu_addr; 2119 const DAddr cpu_addr = map.cpu_addr;
2119 const std::size_t size = map.size; 2120 const std::size_t size = map.size;
2120 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 2121 device_memory.UpdatePagesCachedCount(cpu_addr, size, -1);
2121 } 2122 }
2122} 2123}
2123 2124
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 6caf75b46..8699d40d4 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -36,9 +36,11 @@
36#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
37#include "video_core/textures/texture.h" 37#include "video_core/textures/texture.h"
38 38
39namespace Tegra::Control { 39namespace Tegra {
40namespace Control {
40struct ChannelState; 41struct ChannelState;
41} 42}
43} // namespace Tegra
42 44
43namespace VideoCommon { 45namespace VideoCommon {
44 46
@@ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
126 }; 128 };
127 129
128public: 130public:
129 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); 131 explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&);
130 132
131 /// Notify the cache that a new frame has been queued 133 /// Notify the cache that a new frame has been queued
132 void TickFrame(); 134 void TickFrame();
@@ -190,15 +192,15 @@ public:
190 Framebuffer* GetFramebuffer(); 192 Framebuffer* GetFramebuffer();
191 193
192 /// Mark images in a range as modified from the CPU 194 /// Mark images in a range as modified from the CPU
193 void WriteMemory(VAddr cpu_addr, size_t size); 195 void WriteMemory(DAddr cpu_addr, size_t size);
194 196
195 /// Download contents of host images to guest memory in a region 197 /// Download contents of host images to guest memory in a region
196 void DownloadMemory(VAddr cpu_addr, size_t size); 198 void DownloadMemory(DAddr cpu_addr, size_t size);
197 199
198 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 200 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size);
199 201
200 /// Remove images in a region 202 /// Remove images in a region
201 void UnmapMemory(VAddr cpu_addr, size_t size); 203 void UnmapMemory(DAddr cpu_addr, size_t size);
202 204
203 /// Remove images in a region 205 /// Remove images in a region
204 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); 206 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
@@ -210,7 +212,7 @@ public:
210 212
211 /// Try to find a cached image view in the given CPU address 213 /// Try to find a cached image view in the given CPU address
212 [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, 214 [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config,
213 VAddr cpu_addr); 215 DAddr cpu_addr);
214 216
215 /// Return true when there are uncommitted images to be downloaded 217 /// Return true when there are uncommitted images to be downloaded
216 [[nodiscard]] bool HasUncommittedFlushes() const noexcept; 218 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
@@ -235,7 +237,7 @@ public:
235 GPUVAddr address = 0, size_t size = 0); 237 GPUVAddr address = 0, size_t size = 0);
236 238
237 /// Return true when a CPU region is modified from the GPU 239 /// Return true when a CPU region is modified from the GPU
238 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 240 [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size);
239 241
240 [[nodiscard]] bool IsRescaling() const noexcept; 242 [[nodiscard]] bool IsRescaling() const noexcept;
241 243
@@ -252,7 +254,7 @@ public:
252private: 254private:
253 /// Iterate over all page indices in a range 255 /// Iterate over all page indices in a range
254 template <typename Func> 256 template <typename Func>
255 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { 257 static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) {
256 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 258 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
257 const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; 259 const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
258 for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { 260 for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
@@ -326,7 +328,7 @@ private:
326 328
327 /// Create a new image and join perfectly matching existing images 329 /// Create a new image and join perfectly matching existing images
328 /// Remove joined images from the cache 330 /// Remove joined images from the cache
329 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); 331 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr);
330 332
331 [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); 333 [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr);
332 334
@@ -349,7 +351,7 @@ private:
349 351
350 /// Iterates over all the images in a region calling func 352 /// Iterates over all the images in a region calling func
351 template <typename Func> 353 template <typename Func>
352 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 354 void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func);
353 355
354 template <typename Func> 356 template <typename Func>
355 void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); 357 void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
@@ -421,7 +423,7 @@ private:
421 423
422 Runtime& runtime; 424 Runtime& runtime;
423 425
424 VideoCore::RasterizerInterface& rasterizer; 426 Tegra::MaxwellDeviceMemoryManager& device_memory;
425 std::deque<TextureCacheGPUMap> gpu_page_table_storage; 427 std::deque<TextureCacheGPUMap> gpu_page_table_storage;
426 428
427 RenderTargets render_targets; 429 RenderTargets render_targets;
@@ -432,7 +434,7 @@ private:
432 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; 434 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
433 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; 435 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
434 436
435 VAddr virtual_invalid_space{}; 437 DAddr virtual_invalid_space{};
436 438
437 bool has_deleted_images = false; 439 bool has_deleted_images = false;
438 bool is_rescaling = false; 440 bool is_rescaling = false;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index fcf70068e..96f04b6c8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -23,6 +23,7 @@
23#include "core/memory.h" 23#include "core/memory.h"
24#include "video_core/compatible_formats.h" 24#include "video_core/compatible_formats.h"
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/guest_memory.h"
26#include "video_core/memory_manager.h" 27#include "video_core/memory_manager.h"
27#include "video_core/surface.h" 28#include "video_core/surface.h"
28#include "video_core/texture_cache/decode_bc.h" 29#include "video_core/texture_cache/decode_bc.h"
@@ -552,7 +553,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
552 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 553 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
553 const std::span<const u8> src = input.subspan(host_offset); 554 const std::span<const u8> src = input.subspan(host_offset);
554 { 555 {
555 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> 556 Tegra::Memory::GpuGuestMemoryScoped<u8,
557 Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite>
556 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); 558 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
557 559
558 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 560 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index b42d48416..0efb7b49d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -6,6 +6,8 @@
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/settings.h" 7#include "common/settings.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "video_core/host1x/gpu_device_memory_manager.h"
10#include "video_core/host1x/host1x.h"
9#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
10#include "video_core/renderer_null/renderer_null.h" 12#include "video_core/renderer_null/renderer_null.h"
11#include "video_core/renderer_opengl/renderer_opengl.h" 13#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
18 Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, 20 Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
19 std::unique_ptr<Core::Frontend::GraphicsContext> context) { 21 std::unique_ptr<Core::Frontend::GraphicsContext> context) {
20 auto& telemetry_session = system.TelemetrySession(); 22 auto& telemetry_session = system.TelemetrySession();
21 auto& cpu_memory = system.ApplicationMemory(); 23 auto& device_memory = system.Host1x().MemoryManager();
22 24
23 switch (Settings::values.renderer_backend.GetValue()) { 25 switch (Settings::values.renderer_backend.GetValue()) {
24 case Settings::RendererBackend::OpenGL: 26 case Settings::RendererBackend::OpenGL:
25 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, 27 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window,
26 gpu, std::move(context)); 28 device_memory, gpu, std::move(context));
27 case Settings::RendererBackend::Vulkan: 29 case Settings::RendererBackend::Vulkan:
28 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, 30 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window,
29 gpu, std::move(context)); 31 device_memory, gpu, std::move(context));
30 case Settings::RendererBackend::Null: 32 case Settings::RendererBackend::Null:
31 return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, 33 return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context));
32 std::move(context));
33 default: 34 default:
34 return nullptr; 35 return nullptr;
35 } 36 }