summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/device/device_session.cpp1
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp1
-rw-r--r--src/common/common_types.h1
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/core.cpp14
-rw-r--r--src/core/core.h6
-rw-r--r--src/core/device_memory.h16
-rw-r--r--src/core/device_memory_manager.h211
-rw-r--r--src/core/device_memory_manager.inc582
-rw-r--r--src/core/gpu_dirty_memory_manager.h14
-rw-r--r--src/core/guest_memory.h214
-rw-r--r--src/core/hle/kernel/k_process.cpp14
-rw-r--r--src/core/hle/kernel/k_process.h4
-rw-r--r--src/core/hle/service/hle_ipc.cpp61
-rw-r--r--src/core/hle/service/hle_ipc.h5
-rw-r--r--src/core/hle/service/nvdrv/core/container.cpp114
-rw-r--r--src/core/hle/service/nvdrv/core/container.h32
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.cpp175
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.h49
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp120
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h25
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp36
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h7
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp27
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h6
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp34
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h1
-rw-r--r--src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp25
-rw-r--r--src/core/hle/service/nvnflinger/fb_share_buffer_manager.h5
-rw-r--r--src/core/hle/service/nvnflinger/nvnflinger.cpp2
-rw-r--r--src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp2
-rw-r--r--src/core/memory.cpp108
-rw-r--r--src/core/memory.h211
-rw-r--r--src/tests/video_core/memory_tracker.cpp7
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache/buffer_base.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h451
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h98
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h18
-rw-r--r--src/video_core/buffer_cache/word_manager.h28
-rw-r--r--src/video_core/dma_pusher.cpp10
-rw-r--r--src/video_core/engines/engine_upload.cpp5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp1
-rw-r--r--src/video_core/engines/maxwell_dma.cpp26
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp5
-rw-r--r--src/video_core/framebuffer_config.h2
-rw-r--r--src/video_core/gpu.cpp29
-rw-r--r--src/video_core/gpu.h12
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h18
-rw-r--r--src/video_core/guest_memory.h30
-rw-r--r--src/video_core/host1x/codecs/h264.cpp9
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp4
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp9
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.cpp32
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.h24
-rw-r--r--src/video_core/host1x/host1x.cpp5
-rw-r--r--src/video_core/host1x/host1x.h17
-rw-r--r--src/video_core/host1x/vic.cpp15
-rw-r--r--src/video_core/memory_manager.cpp229
-rw-r--r--src/video_core/memory_manager.h36
-rw-r--r--src/video_core/query_cache.h30
-rw-r--r--src/video_core/query_cache/query_base.h4
-rw-r--r--src/video_core/query_cache/query_cache.h37
-rw-r--r--src/video_core/query_cache/query_cache_base.h15
-rw-r--r--src/video_core/rasterizer_accelerated.cpp72
-rw-r--r--src/video_core/rasterizer_accelerated.h49
-rw-r--r--src/video_core/rasterizer_interface.h23
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp26
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h23
-rw-r--r--src/video_core/renderer_null/renderer_null.cpp5
-rw-r--r--src/video_core/renderer_null/renderer_null.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h11
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h4
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h1
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp12
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp62
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp44
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h41
-rw-r--r--src/video_core/shader_cache.cpp8
-rw-r--r--src/video_core/shader_cache.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h75
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h28
-rw-r--r--src/video_core/texture_cache/util.cpp5
-rw-r--r--src/video_core/video_core.cpp15
121 files changed, 2741 insertions, 1414 deletions
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index 3c214ec00..2a1ae1bb3 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/sink/sink_stream.h" 8#include "audio_core/sink/sink_stream.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/core_timing.h" 10#include "core/core_timing.h"
11#include "core/guest_memory.h"
11#include "core/memory.h" 12#include "core/memory.h"
12 13
13#include "core/hle/kernel/k_process.h" 14#include "core/hle/kernel/k_process.h"
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index 911dae3c1..905613a5a 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -9,6 +9,7 @@
9#include "common/fixed_point.h" 9#include "common/fixed_point.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/scratch_buffer.h" 11#include "common/scratch_buffer.h"
12#include "core/guest_memory.h"
12#include "core/memory.h" 13#include "core/memory.h"
13 14
14namespace AudioCore::Renderer { 15namespace AudioCore::Renderer {
diff --git a/src/common/common_types.h b/src/common/common_types.h
index 0fc225aff..ae04c4d60 100644
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@@ -45,6 +45,7 @@ using f32 = float; ///< 32-bit floating point
45using f64 = double; ///< 64-bit floating point 45using f64 = double; ///< 64-bit floating point
46 46
47using VAddr = u64; ///< Represents a pointer in the userspace virtual address space. 47using VAddr = u64; ///< Represents a pointer in the userspace virtual address space.
48using DAddr = u64; ///< Represents a pointer in the device specific virtual address space.
48using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space. 49using PAddr = u64; ///< Represents a pointer in the ARM11 physical address space.
49using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space. 50using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space.
50 51
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 16ddb5e90..4ff2c1bb7 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -37,6 +37,8 @@ add_library(core STATIC
37 debugger/gdbstub_arch.h 37 debugger/gdbstub_arch.h
38 debugger/gdbstub.cpp 38 debugger/gdbstub.cpp
39 debugger/gdbstub.h 39 debugger/gdbstub.h
40 device_memory_manager.h
41 device_memory_manager.inc
40 device_memory.cpp 42 device_memory.cpp
41 device_memory.h 43 device_memory.h
42 file_sys/fssystem/fs_i_storage.h 44 file_sys/fssystem/fs_i_storage.h
@@ -609,6 +611,8 @@ add_library(core STATIC
609 hle/service/ns/pdm_qry.h 611 hle/service/ns/pdm_qry.h
610 hle/service/nvdrv/core/container.cpp 612 hle/service/nvdrv/core/container.cpp
611 hle/service/nvdrv/core/container.h 613 hle/service/nvdrv/core/container.h
614 hle/service/nvdrv/core/heap_mapper.cpp
615 hle/service/nvdrv/core/heap_mapper.h
612 hle/service/nvdrv/core/nvmap.cpp 616 hle/service/nvdrv/core/nvmap.cpp
613 hle/service/nvdrv/core/nvmap.h 617 hle/service/nvdrv/core/nvmap.h
614 hle/service/nvdrv/core/syncpoint_manager.cpp 618 hle/service/nvdrv/core/syncpoint_manager.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 461eea9c8..2392fe136 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -28,6 +28,7 @@
28#include "core/file_sys/savedata_factory.h" 28#include "core/file_sys/savedata_factory.h"
29#include "core/file_sys/vfs_concat.h" 29#include "core/file_sys/vfs_concat.h"
30#include "core/file_sys/vfs_real.h" 30#include "core/file_sys/vfs_real.h"
31#include "core/gpu_dirty_memory_manager.h"
31#include "core/hle/kernel/k_memory_manager.h" 32#include "core/hle/kernel/k_memory_manager.h"
32#include "core/hle/kernel/k_process.h" 33#include "core/hle/kernel/k_process.h"
33#include "core/hle/kernel/k_resource_limit.h" 34#include "core/hle/kernel/k_resource_limit.h"
@@ -565,6 +566,9 @@ struct System::Impl {
565 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; 566 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
566 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; 567 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
567 568
569 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
570 gpu_dirty_memory_managers;
571
568 std::deque<std::vector<u8>> user_channel; 572 std::deque<std::vector<u8>> user_channel;
569}; 573};
570 574
@@ -651,8 +655,14 @@ size_t System::GetCurrentHostThreadID() const {
651 return impl->kernel.GetCurrentHostThreadID(); 655 return impl->kernel.GetCurrentHostThreadID();
652} 656}
653 657
654void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { 658std::span<GPUDirtyMemoryManager> System::GetGPUDirtyMemoryManager() {
655 return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); 659 return impl->gpu_dirty_memory_managers;
660}
661
662void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) {
663 for (auto& manager : impl->gpu_dirty_memory_managers) {
664 manager.Gather(callback);
665 }
656} 666}
657 667
658PerfStatsResults System::GetAndResetPerfStats() { 668PerfStatsResults System::GetAndResetPerfStats() {
diff --git a/src/core/core.h b/src/core/core.h
index ba5add0dc..80446f385 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -8,6 +8,7 @@
8#include <functional> 8#include <functional>
9#include <memory> 9#include <memory>
10#include <mutex> 10#include <mutex>
11#include <span>
11#include <string> 12#include <string>
12#include <vector> 13#include <vector>
13 14
@@ -116,6 +117,7 @@ class CpuManager;
116class Debugger; 117class Debugger;
117class DeviceMemory; 118class DeviceMemory;
118class ExclusiveMonitor; 119class ExclusiveMonitor;
120class GPUDirtyMemoryManager;
119class PerfStats; 121class PerfStats;
120class Reporter; 122class Reporter;
121class SpeedLimiter; 123class SpeedLimiter;
@@ -224,7 +226,9 @@ public:
224 /// Prepare the core emulation for a reschedule 226 /// Prepare the core emulation for a reschedule
225 void PrepareReschedule(u32 core_index); 227 void PrepareReschedule(u32 core_index);
226 228
227 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); 229 std::span<GPUDirtyMemoryManager> GetGPUDirtyMemoryManager();
230
231 void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback);
228 232
229 [[nodiscard]] size_t GetCurrentHostThreadID() const; 233 [[nodiscard]] size_t GetCurrentHostThreadID() const;
230 234
diff --git a/src/core/device_memory.h b/src/core/device_memory.h
index 13388b73e..11bf0e326 100644
--- a/src/core/device_memory.h
+++ b/src/core/device_memory.h
@@ -32,6 +32,12 @@ public:
32 } 32 }
33 33
34 template <typename T> 34 template <typename T>
35 PAddr GetRawPhysicalAddr(const T* ptr) const {
36 return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) -
37 reinterpret_cast<uintptr_t>(buffer.BackingBasePointer()));
38 }
39
40 template <typename T>
35 T* GetPointer(Common::PhysicalAddress addr) { 41 T* GetPointer(Common::PhysicalAddress addr) {
36 return reinterpret_cast<T*>(buffer.BackingBasePointer() + 42 return reinterpret_cast<T*>(buffer.BackingBasePointer() +
37 (GetInteger(addr) - DramMemoryMap::Base)); 43 (GetInteger(addr) - DramMemoryMap::Base));
@@ -43,6 +49,16 @@ public:
43 (GetInteger(addr) - DramMemoryMap::Base)); 49 (GetInteger(addr) - DramMemoryMap::Base));
44 } 50 }
45 51
52 template <typename T>
53 T* GetPointerFromRaw(PAddr addr) {
54 return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr);
55 }
56
57 template <typename T>
58 const T* GetPointerFromRaw(PAddr addr) const {
59 return reinterpret_cast<T*>(buffer.BackingBasePointer() + addr);
60 }
61
46 Common::HostMemory buffer; 62 Common::HostMemory buffer;
47}; 63};
48 64
diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h
new file mode 100644
index 000000000..ffeed46cc
--- /dev/null
+++ b/src/core/device_memory_manager.h
@@ -0,0 +1,211 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <atomic>
8#include <deque>
9#include <memory>
10#include <mutex>
11
12#include "common/common_types.h"
13#include "common/scratch_buffer.h"
14#include "common/virtual_buffer.h"
15
16namespace Core {
17
18constexpr size_t DEVICE_PAGEBITS = 12ULL;
19constexpr size_t DEVICE_PAGESIZE = 1ULL << DEVICE_PAGEBITS;
20constexpr size_t DEVICE_PAGEMASK = DEVICE_PAGESIZE - 1ULL;
21
22class DeviceMemory;
23
24namespace Memory {
25class Memory;
26}
27
28template <typename DTraits>
29struct DeviceMemoryManagerAllocator;
30
31struct Asid {
32 size_t id;
33};
34
35template <typename Traits>
36class DeviceMemoryManager {
37 using DeviceInterface = typename Traits::DeviceInterface;
38 using DeviceMethods = typename Traits::DeviceMethods;
39
40public:
41 DeviceMemoryManager(const DeviceMemory& device_memory);
42 ~DeviceMemoryManager();
43
44 void BindInterface(DeviceInterface* device_inter);
45
46 DAddr Allocate(size_t size);
47 void AllocateFixed(DAddr start, size_t size);
48 void Free(DAddr start, size_t size);
49
50 void Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, bool track = false);
51
52 void Unmap(DAddr address, size_t size);
53
54 void TrackContinuityImpl(DAddr address, VAddr virtual_address, size_t size, Asid asid);
55 void TrackContinuity(DAddr address, VAddr virtual_address, size_t size, Asid asid) {
56 std::scoped_lock lk(mapping_guard);
57 TrackContinuityImpl(address, virtual_address, size, asid);
58 }
59
60 // Write / Read
61 template <typename T>
62 T* GetPointer(DAddr address);
63
64 template <typename T>
65 const T* GetPointer(DAddr address) const;
66
67 template <typename Func>
68 void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
69 DAddr subbits = static_cast<DAddr>(address & page_mask);
70 const u32 base = compressed_device_addr[(address >> page_bits)];
71 if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
72 const DAddr d_address = (static_cast<DAddr>(base) << page_bits) + subbits;
73 operation(d_address);
74 return;
75 }
76 InnerGatherDeviceAddresses(buffer, address);
77 for (u32 value : buffer) {
78 operation((static_cast<DAddr>(value) << page_bits) + subbits);
79 }
80 }
81
82 template <typename Func>
83 void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
84 PAddr address = GetRawPhysicalAddr<u8>(p);
85 ApplyOpOnPAddr(address, buffer, operation);
86 }
87
88 PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
89 PAddr subbits = static_cast<PAddr>(address & page_mask);
90 auto paddr = compressed_physical_ptr[(address >> page_bits)];
91 if (paddr == 0) {
92 return 0;
93 }
94 return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits;
95 }
96
97 template <typename T>
98 void Write(DAddr address, T value);
99
100 template <typename T>
101 T Read(DAddr address) const;
102
103 u8* GetSpan(const DAddr src_addr, const std::size_t size);
104 const u8* GetSpan(const DAddr src_addr, const std::size_t size) const;
105
106 void ReadBlock(DAddr address, void* dest_pointer, size_t size);
107 void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
108 void WriteBlock(DAddr address, const void* src_pointer, size_t size);
109 void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
110
111 Asid RegisterProcess(Memory::Memory* memory);
112 void UnregisterProcess(Asid id);
113
114 void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
115
116 static constexpr size_t AS_BITS = Traits::device_virtual_bits;
117
118private:
119 static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
120 static constexpr size_t device_as_size = 1ULL << device_virtual_bits;
121 static constexpr size_t physical_min_bits = 32;
122 static constexpr size_t physical_max_bits = 33;
123 static constexpr size_t page_bits = 12;
124 static constexpr size_t page_size = 1ULL << page_bits;
125 static constexpr size_t page_mask = page_size - 1ULL;
126 static constexpr u32 physical_address_base = 1U << page_bits;
127 static constexpr u32 MULTI_FLAG_BITS = 31;
128 static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
129 static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
130
131 template <typename T>
132 T* GetPointerFromRaw(PAddr addr) {
133 return reinterpret_cast<T*>(physical_base + addr);
134 }
135
136 template <typename T>
137 const T* GetPointerFromRaw(PAddr addr) const {
138 return reinterpret_cast<T*>(physical_base + addr);
139 }
140
141 template <typename T>
142 PAddr GetRawPhysicalAddr(const T* ptr) const {
143 return static_cast<PAddr>(reinterpret_cast<uintptr_t>(ptr) - physical_base);
144 }
145
146 void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
147 auto increment);
148
149 void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
150
151 std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
152
153 const uintptr_t physical_base;
154 DeviceInterface* device_inter;
155 Common::VirtualBuffer<u32> compressed_physical_ptr;
156 Common::VirtualBuffer<u32> compressed_device_addr;
157 Common::VirtualBuffer<u32> continuity_tracker;
158
159 // Process memory interfaces
160
161 std::deque<size_t> id_pool;
162 std::deque<Memory::Memory*> registered_processes;
163
164 // Memory protection management
165
166 static constexpr size_t guest_max_as_bits = 39;
167 static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits;
168 static constexpr size_t guest_mask = guest_as_size - 1ULL;
169 static constexpr size_t asid_start_bit = guest_max_as_bits;
170
171 std::pair<Asid, VAddr> ExtractCPUBacking(size_t page_index) {
172 auto content = cpu_backing_address[page_index];
173 const VAddr address = content & guest_mask;
174 const Asid asid{static_cast<size_t>(content >> asid_start_bit)};
175 return std::make_pair(asid, address);
176 }
177
178 void InsertCPUBacking(size_t page_index, VAddr address, Asid asid) {
179 cpu_backing_address[page_index] = address | (asid.id << asid_start_bit);
180 }
181
182 Common::VirtualBuffer<VAddr> cpu_backing_address;
183 static constexpr size_t subentries = 8 / sizeof(u8);
184 static constexpr size_t subentries_mask = subentries - 1;
185 class CounterEntry final {
186 public:
187 CounterEntry() = default;
188
189 std::atomic_uint8_t& Count(std::size_t page) {
190 return values[page & subentries_mask];
191 }
192
193 const std::atomic_uint8_t& Count(std::size_t page) const {
194 return values[page & subentries_mask];
195 }
196
197 private:
198 std::array<std::atomic_uint8_t, subentries> values{};
199 };
200 static_assert(sizeof(CounterEntry) == subentries * sizeof(u8),
201 "CounterEntry should be 8 bytes!");
202
203 static constexpr size_t num_counter_entries =
204 (1ULL << (device_virtual_bits - page_bits)) / subentries;
205 using CachedPages = std::array<CounterEntry, num_counter_entries>;
206 std::unique_ptr<CachedPages> cached_pages;
207 std::mutex counter_guard;
208 std::mutex mapping_guard;
209};
210
211} // namespace Core
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
new file mode 100644
index 000000000..8ce122872
--- /dev/null
+++ b/src/core/device_memory_manager.inc
@@ -0,0 +1,582 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <atomic>
5#include <limits>
6#include <memory>
7#include <type_traits>
8
9#include "common/address_space.h"
10#include "common/address_space.inc"
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/div_ceil.h"
14#include "common/scope_exit.h"
15#include "common/settings.h"
16#include "core/device_memory.h"
17#include "core/device_memory_manager.h"
18#include "core/memory.h"
19
20namespace Core {
21
22namespace {
23
24class MultiAddressContainer {
25public:
26 MultiAddressContainer() = default;
27 ~MultiAddressContainer() = default;
28
29 void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
30 buffer.resize(8);
31 buffer.resize(0);
32 size_t index = 0;
33 const auto add_value = [&](u32 value) {
34 buffer[index] = value;
35 index++;
36 buffer.resize(index);
37 };
38
39 u32 iter_entry = start_entry;
40 Entry* current = &storage[iter_entry - 1];
41 add_value(current->value);
42 while (current->next_entry != 0) {
43 iter_entry = current->next_entry;
44 current = &storage[iter_entry - 1];
45 add_value(current->value);
46 }
47 }
48
49 u32 Register(u32 value) {
50 return RegisterImplementation(value);
51 }
52
53 void Register(u32 value, u32 start_entry) {
54 auto entry_id = RegisterImplementation(value);
55 u32 iter_entry = start_entry;
56 Entry* current = &storage[iter_entry - 1];
57 while (current->next_entry != 0) {
58 iter_entry = current->next_entry;
59 current = &storage[iter_entry - 1];
60 }
61 current->next_entry = entry_id;
62 }
63
64 std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
65 u32 iter_entry = start_entry;
66 Entry* previous{};
67 Entry* current = &storage[iter_entry - 1];
68 Entry* next{};
69 bool more_than_one_remaining = false;
70 u32 result_start{start_entry};
71 size_t count = 0;
72 while (current->value != value) {
73 count++;
74 previous = current;
75 iter_entry = current->next_entry;
76 current = &storage[iter_entry - 1];
77 }
78 // Find next
79 u32 next_entry = current->next_entry;
80 if (next_entry != 0) {
81 next = &storage[next_entry - 1];
82 more_than_one_remaining = next->next_entry != 0 || previous != nullptr;
83 }
84 if (previous) {
85 previous->next_entry = next_entry;
86 } else {
87 result_start = next_entry;
88 }
89 free_entries.emplace_back(iter_entry);
90 return std::make_pair(more_than_one_remaining || count > 1, result_start);
91 }
92
93 u32 ReleaseEntry(u32 start_entry) {
94 Entry* current = &storage[start_entry - 1];
95 free_entries.emplace_back(start_entry);
96 return current->value;
97 }
98
99private:
100 u32 RegisterImplementation(u32 value) {
101 auto entry_id = GetNewEntry();
102 auto& entry = storage[entry_id - 1];
103 entry.next_entry = 0;
104 entry.value = value;
105 return entry_id;
106 }
107 u32 GetNewEntry() {
108 if (!free_entries.empty()) {
109 u32 result = free_entries.front();
110 free_entries.pop_front();
111 return result;
112 }
113 storage.emplace_back();
114 u32 new_entry = static_cast<u32>(storage.size());
115 return new_entry;
116 }
117
118 struct Entry {
119 u32 next_entry{};
120 u32 value{};
121 };
122
123 std::deque<Entry> storage;
124 std::deque<u32> free_entries;
125};
126
127struct EmptyAllocator {
128 EmptyAllocator([[maybe_unused]] DAddr address) {}
129};
130
131} // namespace
132
133template <typename DTraits>
134struct DeviceMemoryManagerAllocator {
135 static constexpr size_t device_virtual_bits = DTraits::device_virtual_bits;
136 static constexpr DAddr first_address = 1ULL << Memory::YUZU_PAGEBITS;
137 static constexpr DAddr max_device_area = 1ULL << device_virtual_bits;
138
139 DeviceMemoryManagerAllocator() : main_allocator(first_address) {}
140
141 Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
142 MultiAddressContainer multi_dev_address;
143
144 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
145 template <bool pin_area>
146 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
147 return addr >= 0 && addr + size <= max_device_area;
148 }
149
150 DAddr Allocate(size_t size) {
151 return main_allocator.Allocate(size);
152 }
153
154 void AllocateFixed(DAddr b_address, size_t b_size) {
155 main_allocator.AllocateFixed(b_address, b_size);
156 }
157
158 void Free(DAddr b_address, size_t b_size) {
159 main_allocator.Free(b_address, b_size);
160 }
161};
162
163template <typename Traits>
164DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_)
165 : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())},
166 device_inter{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
167 compressed_device_addr(1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
168 Settings::MemoryLayout::Memory_4Gb
169 ? physical_min_bits
170 : physical_max_bits) -
171 Memory::YUZU_PAGEBITS)),
172 continuity_tracker(device_as_size >> Memory::YUZU_PAGEBITS),
173 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
174 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
175 cached_pages = std::make_unique<CachedPages>();
176
177 const size_t total_virtual = device_as_size >> Memory::YUZU_PAGEBITS;
178 for (size_t i = 0; i < total_virtual; i++) {
179 compressed_physical_ptr[i] = 0;
180 continuity_tracker[i] = 1;
181 cpu_backing_address[i] = 0;
182 }
183 const size_t total_phys = 1ULL << ((Settings::values.memory_layout_mode.GetValue() ==
184 Settings::MemoryLayout::Memory_4Gb
185 ? physical_min_bits
186 : physical_max_bits) -
187 Memory::YUZU_PAGEBITS);
188 for (size_t i = 0; i < total_phys; i++) {
189 compressed_device_addr[i] = 0;
190 }
191}
192
193template <typename Traits>
194DeviceMemoryManager<Traits>::~DeviceMemoryManager() = default;
195
196template <typename Traits>
197void DeviceMemoryManager<Traits>::BindInterface(DeviceInterface* device_inter_) {
198 device_inter = device_inter_;
199}
200
201template <typename Traits>
202DAddr DeviceMemoryManager<Traits>::Allocate(size_t size) {
203 return impl->Allocate(size);
204}
205
206template <typename Traits>
207void DeviceMemoryManager<Traits>::AllocateFixed(DAddr start, size_t size) {
208 return impl->AllocateFixed(start, size);
209}
210
211template <typename Traits>
212void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
213 impl->Free(start, size);
214}
215
216template <typename Traits>
217void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
218 Asid asid, bool track) {
219 Core::Memory::Memory* process_memory = registered_processes[asid.id];
220 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
221 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
222 std::scoped_lock lk(mapping_guard);
223 for (size_t i = 0; i < num_pages; i++) {
224 const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
225 auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress));
226 if (ptr == nullptr) [[unlikely]] {
227 compressed_physical_ptr[start_page_d + i] = 0;
228 continue;
229 }
230 auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
231 compressed_physical_ptr[start_page_d + i] = phys_addr;
232 InsertCPUBacking(start_page_d + i, new_vaddress, asid);
233 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
234 const u32 new_dev = static_cast<u32>(start_page_d + i);
235 if (base_dev == 0) [[likely]] {
236 compressed_device_addr[phys_addr - 1U] = new_dev;
237 continue;
238 }
239 u32 start_id = base_dev & MULTI_MASK;
240 if ((base_dev >> MULTI_FLAG_BITS) == 0) {
241 start_id = impl->multi_dev_address.Register(base_dev);
242 compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
243 }
244 impl->multi_dev_address.Register(new_dev, start_id);
245 }
246 if (track) {
247 TrackContinuityImpl(address, virtual_address, size, asid);
248 }
249}
250
251template <typename Traits>
252void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
253 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
254 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
255 device_inter->InvalidateRegion(address, size);
256 std::scoped_lock lk(mapping_guard);
257 for (size_t i = 0; i < num_pages; i++) {
258 auto phys_addr = compressed_physical_ptr[start_page_d + i];
259 compressed_physical_ptr[start_page_d + i] = 0;
260 cpu_backing_address[start_page_d + i] = 0;
261 if (phys_addr != 0) [[likely]] {
262 const u32 base_dev = compressed_device_addr[phys_addr - 1U];
263 if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
264 compressed_device_addr[phys_addr - 1] = 0;
265 continue;
266 }
267 const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
268 static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
269 if (!more_entries) {
270 compressed_device_addr[phys_addr - 1] =
271 impl->multi_dev_address.ReleaseEntry(new_start);
272 continue;
273 }
274 compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
275 }
276 }
277}
278template <typename Traits>
279void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
280 size_t size, Asid asid) {
281 Core::Memory::Memory* process_memory = registered_processes[asid.id];
282 size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
283 size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
284 uintptr_t last_ptr = 0;
285 size_t page_count = 1;
286 for (size_t i = num_pages; i > 0; i--) {
287 size_t index = i - 1;
288 const VAddr new_vaddress = virtual_address + index * Memory::YUZU_PAGESIZE;
289 const uintptr_t new_ptr = reinterpret_cast<uintptr_t>(
290 process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)));
291 if (new_ptr + page_size == last_ptr) {
292 page_count++;
293 } else {
294 page_count = 1;
295 }
296 last_ptr = new_ptr;
297 continuity_tracker[start_page_d + index] = static_cast<u32>(page_count);
298 }
299}
300template <typename Traits>
301u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) {
302 size_t page_index = src_addr >> page_bits;
303 size_t subbits = src_addr & page_mask;
304 if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
305 return GetPointer<u8>(src_addr);
306 }
307 return nullptr;
308}
309
310template <typename Traits>
311const u8* DeviceMemoryManager<Traits>::GetSpan(const DAddr src_addr, const std::size_t size) const {
312 size_t page_index = src_addr >> page_bits;
313 size_t subbits = src_addr & page_mask;
314 if ((static_cast<size_t>(continuity_tracker[page_index]) << page_bits) >= size + subbits) {
315 return GetPointer<u8>(src_addr);
316 }
317 return nullptr;
318}
319
320template <typename Traits>
321void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
322 PAddr address) {
323 size_t phys_addr = address >> page_bits;
324 std::scoped_lock lk(mapping_guard);
325 u32 backing = compressed_device_addr[phys_addr];
326 if ((backing >> MULTI_FLAG_BITS) != 0) {
327 impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
328 return;
329 }
330 buffer.resize(1);
331 buffer[0] = backing;
332}
333
334template <typename Traits>
335template <typename T>
336T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
337 const size_t index = address >> Memory::YUZU_PAGEBITS;
338 const size_t offset = address & Memory::YUZU_PAGEMASK;
339 auto phys_addr = compressed_physical_ptr[index];
340 if (phys_addr == 0) [[unlikely]] {
341 return nullptr;
342 }
343 return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
344 offset);
345}
346
347template <typename Traits>
348template <typename T>
349const T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) const {
350 const size_t index = address >> Memory::YUZU_PAGEBITS;
351 const size_t offset = address & Memory::YUZU_PAGEMASK;
352 auto phys_addr = compressed_physical_ptr[index];
353 if (phys_addr == 0) [[unlikely]] {
354 return nullptr;
355 }
356 return GetPointerFromRaw<T>((static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) +
357 offset);
358}
359
360template <typename Traits>
361template <typename T>
362void DeviceMemoryManager<Traits>::Write(DAddr address, T value) {
363 T* ptr = GetPointer<T>(address);
364 if (!ptr) [[unlikely]] {
365 return;
366 }
367 std::memcpy(ptr, &value, sizeof(T));
368}
369
370template <typename Traits>
371template <typename T>
372T DeviceMemoryManager<Traits>::Read(DAddr address) const {
373 const T* ptr = GetPointer<T>(address);
374 T result{};
375 if (!ptr) [[unlikely]] {
376 return result;
377 }
378 std::memcpy(&result, ptr, sizeof(T));
379 return result;
380}
381
382template <typename Traits>
383void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto on_unmapped,
384 auto on_memory, auto increment) {
385 std::size_t remaining_size = size;
386 std::size_t page_index = addr >> Memory::YUZU_PAGEBITS;
387 std::size_t page_offset = addr & Memory::YUZU_PAGEMASK;
388
389 while (remaining_size) {
390 const size_t next_pages = static_cast<std::size_t>(continuity_tracker[page_index]);
391 const std::size_t copy_amount =
392 std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size);
393 const auto current_vaddr =
394 static_cast<u64>((page_index << Memory::YUZU_PAGEBITS) + page_offset);
395 SCOPE_EXIT({
396 page_index += next_pages;
397 page_offset = 0;
398 increment(copy_amount);
399 remaining_size -= copy_amount;
400 });
401
402 auto phys_addr = compressed_physical_ptr[page_index];
403 if (phys_addr == 0) {
404 on_unmapped(copy_amount, current_vaddr);
405 continue;
406 }
407 auto* mem_ptr = GetPointerFromRaw<u8>(
408 (static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS) + page_offset);
409 on_memory(copy_amount, mem_ptr);
410 }
411}
412
413template <typename Traits>
414void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) {
415 device_inter->FlushRegion(address, size);
416 WalkBlock(
417 address, size,
418 [&](size_t copy_amount, DAddr current_vaddr) {
419 LOG_ERROR(
420 HW_Memory,
421 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
422 current_vaddr, address, size);
423 std::memset(dest_pointer, 0, copy_amount);
424 },
425 [&](size_t copy_amount, const u8* const src_ptr) {
426 std::memcpy(dest_pointer, src_ptr, copy_amount);
427 },
428 [&](const std::size_t copy_amount) {
429 dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
430 });
431}
432
433template <typename Traits>
434void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) {
435 WalkBlock(
436 address, size,
437 [&](size_t copy_amount, DAddr current_vaddr) {
438 LOG_ERROR(
439 HW_Memory,
440 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
441 current_vaddr, address, size);
442 },
443 [&](size_t copy_amount, u8* const dst_ptr) {
444 std::memcpy(dst_ptr, src_pointer, copy_amount);
445 },
446 [&](const std::size_t copy_amount) {
447 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
448 });
449 device_inter->InvalidateRegion(address, size);
450}
451
452template <typename Traits>
453void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) {
454 WalkBlock(
455 address, size,
456 [&](size_t copy_amount, DAddr current_vaddr) {
457 LOG_ERROR(
458 HW_Memory,
459 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
460 current_vaddr, address, size);
461 std::memset(dest_pointer, 0, copy_amount);
462 },
463 [&](size_t copy_amount, const u8* const src_ptr) {
464 std::memcpy(dest_pointer, src_ptr, copy_amount);
465 },
466 [&](const std::size_t copy_amount) {
467 dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
468 });
469}
470
471template <typename Traits>
472void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
473 size_t size) {
474 WalkBlock(
475 address, size,
476 [&](size_t copy_amount, DAddr current_vaddr) {
477 LOG_ERROR(
478 HW_Memory,
479 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
480 current_vaddr, address, size);
481 },
482 [&](size_t copy_amount, u8* const dst_ptr) {
483 std::memcpy(dst_ptr, src_pointer, copy_amount);
484 },
485 [&](const std::size_t copy_amount) {
486 src_pointer = static_cast<const u8*>(src_pointer) + copy_amount;
487 });
488}
489
490template <typename Traits>
491Asid DeviceMemoryManager<Traits>::RegisterProcess(Memory::Memory* memory_device_inter) {
492 size_t new_id{};
493 if (!id_pool.empty()) {
494 new_id = id_pool.front();
495 id_pool.pop_front();
496 registered_processes[new_id] = memory_device_inter;
497 } else {
498 registered_processes.emplace_back(memory_device_inter);
499 new_id = registered_processes.size() - 1U;
500 }
501 return Asid{new_id};
502}
503
504template <typename Traits>
505void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) {
506 registered_processes[asid.id] = nullptr;
507 id_pool.push_front(asid.id);
508}
509
510template <typename Traits>
511void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
512 std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock);
513 const auto Lock = [&] {
514 if (!lk) {
515 lk.lock();
516 }
517 };
518 u64 uncache_begin = 0;
519 u64 cache_begin = 0;
520 u64 uncache_bytes = 0;
521 u64 cache_bytes = 0;
522 const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
523
524 std::atomic_thread_fence(std::memory_order_acquire);
525 const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
526 size_t page = addr >> Memory::YUZU_PAGEBITS;
527 auto [asid, base_vaddress] = ExtractCPUBacking(page);
528 size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
529 auto* memory_device_inter = registered_processes[asid.id];
530 for (; page != page_end; ++page) {
531 std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page);
532
533 if (delta > 0) {
534 ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(),
535 "Count may overflow!");
536 } else if (delta < 0) {
537 ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
538 } else {
539 ASSERT_MSG(false, "Delta must be non-zero!");
540 }
541
542 // Adds or subtracts 1, as count is a unsigned 8-bit value
543 count.fetch_add(static_cast<u8>(delta), std::memory_order_release);
544
545 // Assume delta is either -1 or 1
546 if (count.load(std::memory_order::relaxed) == 0) {
547 if (uncache_bytes == 0) {
548 uncache_begin = vpage;
549 }
550 uncache_bytes += Memory::YUZU_PAGESIZE;
551 } else if (uncache_bytes > 0) {
552 Lock();
553 MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS,
554 uncache_bytes, false);
555 uncache_bytes = 0;
556 }
557 if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
558 if (cache_bytes == 0) {
559 cache_begin = vpage;
560 }
561 cache_bytes += Memory::YUZU_PAGESIZE;
562 } else if (cache_bytes > 0) {
563 Lock();
564 MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
565 true);
566 cache_bytes = 0;
567 }
568 vpage++;
569 }
570 if (uncache_bytes > 0) {
571 Lock();
572 MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
573 false);
574 }
575 if (cache_bytes > 0) {
576 Lock();
577 MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
578 true);
579 }
580}
581
582} // namespace Core
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
index 9687531e8..cc8fc176f 100644
--- a/src/core/gpu_dirty_memory_manager.h
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -10,7 +10,7 @@
10#include <utility> 10#include <utility>
11#include <vector> 11#include <vector>
12 12
13#include "core/memory.h" 13#include "core/device_memory_manager.h"
14 14
15namespace Core { 15namespace Core {
16 16
@@ -23,7 +23,7 @@ public:
23 23
24 ~GPUDirtyMemoryManager() = default; 24 ~GPUDirtyMemoryManager() = default;
25 25
26 void Collect(VAddr address, size_t size) { 26 void Collect(PAddr address, size_t size) {
27 TransformAddress t = BuildTransform(address, size); 27 TransformAddress t = BuildTransform(address, size);
28 TransformAddress tmp, original; 28 TransformAddress tmp, original;
29 do { 29 do {
@@ -47,7 +47,7 @@ public:
47 std::memory_order_relaxed)); 47 std::memory_order_relaxed));
48 } 48 }
49 49
50 void Gather(std::function<void(VAddr, size_t)>& callback) { 50 void Gather(std::function<void(PAddr, size_t)>& callback) {
51 { 51 {
52 std::scoped_lock lk(guard); 52 std::scoped_lock lk(guard);
53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); 53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
@@ -65,7 +65,7 @@ public:
65 mask = mask >> empty_bits; 65 mask = mask >> empty_bits;
66 66
67 const size_t continuous_bits = std::countr_one(mask); 67 const size_t continuous_bits = std::countr_one(mask);
68 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, 68 callback((static_cast<PAddr>(transform.address) << page_bits) + offset,
69 continuous_bits << align_bits); 69 continuous_bits << align_bits);
70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; 70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
71 offset += continuous_bits << align_bits; 71 offset += continuous_bits << align_bits;
@@ -80,7 +80,7 @@ private:
80 u32 mask; 80 u32 mask;
81 }; 81 };
82 82
83 constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; 83 constexpr static size_t page_bits = DEVICE_PAGEBITS - 1;
84 constexpr static size_t page_size = 1ULL << page_bits; 84 constexpr static size_t page_size = 1ULL << page_bits;
85 constexpr static size_t page_mask = page_size - 1; 85 constexpr static size_t page_mask = page_size - 1;
86 86
@@ -89,7 +89,7 @@ private:
89 constexpr static size_t align_mask = align_size - 1; 89 constexpr static size_t align_mask = align_size - 1;
90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; 90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
91 91
92 bool IsValid(VAddr address) { 92 bool IsValid(PAddr address) {
93 return address < (1ULL << 39); 93 return address < (1ULL << 39);
94 } 94 }
95 95
@@ -103,7 +103,7 @@ private:
103 return mask; 103 return mask;
104 } 104 }
105 105
106 TransformAddress BuildTransform(VAddr address, size_t size) { 106 TransformAddress BuildTransform(PAddr address, size_t size) {
107 const size_t minor_address = address & page_mask; 107 const size_t minor_address = address & page_mask;
108 const size_t minor_bit = minor_address >> align_bits; 108 const size_t minor_bit = minor_address >> align_bits;
109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits; 109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
diff --git a/src/core/guest_memory.h b/src/core/guest_memory.h
new file mode 100644
index 000000000..7ee18c126
--- /dev/null
+++ b/src/core/guest_memory.h
@@ -0,0 +1,214 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <iterator>
7#include <memory>
8#include <optional>
9#include <span>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/scratch_buffer.h"
14
15namespace Core::Memory {
16
17enum GuestMemoryFlags : u32 {
18 Read = 1 << 0,
19 Write = 1 << 1,
20 Safe = 1 << 2,
21 Cached = 1 << 3,
22
23 SafeRead = Read | Safe,
24 SafeWrite = Write | Safe,
25 SafeReadWrite = SafeRead | SafeWrite,
26 SafeReadCachedWrite = SafeReadWrite | Cached,
27
28 UnsafeRead = Read,
29 UnsafeWrite = Write,
30 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
31 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
32};
33
34namespace {
35template <typename M, typename T, GuestMemoryFlags FLAGS>
36class GuestMemory {
37 using iterator = T*;
38 using const_iterator = const T*;
39 using value_type = T;
40 using element_type = T;
41 using iterator_category = std::contiguous_iterator_tag;
42
43public:
44 GuestMemory() = delete;
45 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
46 Common::ScratchBuffer<T>* backup = nullptr)
47 : m_memory{memory}, m_addr{addr}, m_size{size} {
48 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
49 if constexpr (FLAGS & GuestMemoryFlags::Read) {
50 Read(addr, size, backup);
51 }
52 }
53
54 ~GuestMemory() = default;
55
56 T* data() noexcept {
57 return m_data_span.data();
58 }
59
60 const T* data() const noexcept {
61 return m_data_span.data();
62 }
63
64 size_t size() const noexcept {
65 return m_size;
66 }
67
68 size_t size_bytes() const noexcept {
69 return this->size() * sizeof(T);
70 }
71
72 [[nodiscard]] T* begin() noexcept {
73 return this->data();
74 }
75
76 [[nodiscard]] const T* begin() const noexcept {
77 return this->data();
78 }
79
80 [[nodiscard]] T* end() noexcept {
81 return this->data() + this->size();
82 }
83
84 [[nodiscard]] const T* end() const noexcept {
85 return this->data() + this->size();
86 }
87
88 T& operator[](size_t index) noexcept {
89 return m_data_span[index];
90 }
91
92 const T& operator[](size_t index) const noexcept {
93 return m_data_span[index];
94 }
95
96 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
97 m_addr = addr;
98 m_size = size;
99 m_addr_changed = true;
100 }
101
102 std::span<T> Read(u64 addr, std::size_t size,
103 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
104 m_addr = addr;
105 m_size = size;
106 if (m_size == 0) {
107 m_is_data_copy = true;
108 return {};
109 }
110
111 if (this->TrySetSpan()) {
112 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
113 m_memory.FlushRegion(m_addr, this->size_bytes());
114 }
115 } else {
116 if (backup) {
117 backup->resize_destructive(this->size());
118 m_data_span = *backup;
119 } else {
120 m_data_copy.resize(this->size());
121 m_data_span = std::span(m_data_copy);
122 }
123 m_is_data_copy = true;
124 m_span_valid = true;
125 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
126 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
127 } else {
128 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
129 }
130 }
131 return m_data_span;
132 }
133
134 void Write(std::span<T> write_data) noexcept {
135 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
136 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
137 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
138 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
139 } else {
140 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
141 }
142 }
143
144 bool TrySetSpan() noexcept {
145 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
146 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
147 m_span_valid = true;
148 return true;
149 }
150 return false;
151 }
152
153protected:
154 bool IsDataCopy() const noexcept {
155 return m_is_data_copy;
156 }
157
158 bool AddressChanged() const noexcept {
159 return m_addr_changed;
160 }
161
162 M& m_memory;
163 u64 m_addr{};
164 size_t m_size{};
165 std::span<T> m_data_span{};
166 std::vector<T> m_data_copy{};
167 bool m_span_valid{false};
168 bool m_is_data_copy{false};
169 bool m_addr_changed{false};
170};
171
172template <typename M, typename T, GuestMemoryFlags FLAGS>
173class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
174public:
175 GuestMemoryScoped() = delete;
176 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
177 Common::ScratchBuffer<T>* backup = nullptr)
178 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
179 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
180 if (!this->TrySetSpan()) {
181 if (backup) {
182 this->m_data_span = *backup;
183 this->m_span_valid = true;
184 this->m_is_data_copy = true;
185 }
186 }
187 }
188 }
189
190 ~GuestMemoryScoped() {
191 if constexpr (FLAGS & GuestMemoryFlags::Write) {
192 if (this->size() == 0) [[unlikely]] {
193 return;
194 }
195
196 if (this->AddressChanged() || this->IsDataCopy()) {
197 ASSERT(this->m_span_valid);
198 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
199 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
200 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
201 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
202 } else {
203 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
204 }
205 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
206 (FLAGS & GuestMemoryFlags::Cached)) {
207 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
208 }
209 }
210 }
211};
212} // namespace
213
214} // namespace Core::Memory
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 53735a225..0b08e877e 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -5,6 +5,7 @@
5#include "common/scope_exit.h" 5#include "common/scope_exit.h"
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/gpu_dirty_memory_manager.h"
8#include "core/hle/kernel/k_process.h" 9#include "core/hle/kernel/k_process.h"
9#include "core/hle/kernel/k_scoped_resource_reservation.h" 10#include "core/hle/kernel/k_scoped_resource_reservation.h"
10#include "core/hle/kernel/k_shared_memory.h" 11#include "core/hle/kernel/k_shared_memory.h"
@@ -320,7 +321,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
320 321
321 // Ensure our memory is initialized. 322 // Ensure our memory is initialized.
322 m_memory.SetCurrentPageTable(*this); 323 m_memory.SetCurrentPageTable(*this);
323 m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); 324 m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager());
324 325
325 // Ensure we can insert the code region. 326 // Ensure we can insert the code region.
326 R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize, 327 R_UNLESS(m_page_table.CanContain(params.code_address, params.code_num_pages * PageSize,
@@ -417,7 +418,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
417 418
418 // Ensure our memory is initialized. 419 // Ensure our memory is initialized.
419 m_memory.SetCurrentPageTable(*this); 420 m_memory.SetCurrentPageTable(*this);
420 m_memory.SetGPUDirtyManagers(m_dirty_memory_managers); 421 m_memory.SetGPUDirtyManagers(m_kernel.System().GetGPUDirtyMemoryManager());
421 422
422 // Ensure we can insert the code region. 423 // Ensure we can insert the code region.
423 R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code), 424 R_UNLESS(m_page_table.CanContain(params.code_address, code_size, KMemoryState::Code),
@@ -1141,8 +1142,7 @@ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {}
1141KProcess::KProcess(KernelCore& kernel) 1142KProcess::KProcess(KernelCore& kernel)
1142 : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, 1143 : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel},
1143 m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, 1144 m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()},
1144 m_handle_table{kernel}, m_dirty_memory_managers{}, 1145 m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {}
1145 m_exclusive_monitor{}, m_memory{kernel.System()} {}
1146KProcess::~KProcess() = default; 1146KProcess::~KProcess() = default;
1147 1147
1148Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, 1148Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
@@ -1324,10 +1324,4 @@ bool KProcess::RemoveWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointT
1324 return true; 1324 return true;
1325} 1325}
1326 1326
1327void KProcess::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
1328 for (auto& manager : m_dirty_memory_managers) {
1329 manager.Gather(callback);
1330 }
1331}
1332
1333} // namespace Kernel 1327} // namespace Kernel
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 53c0e3316..ab1358a12 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -7,7 +7,6 @@
7 7
8#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
9#include "core/file_sys/program_metadata.h" 9#include "core/file_sys/program_metadata.h"
10#include "core/gpu_dirty_memory_manager.h"
11#include "core/hle/kernel/code_set.h" 10#include "core/hle/kernel/code_set.h"
12#include "core/hle/kernel/k_address_arbiter.h" 11#include "core/hle/kernel/k_address_arbiter.h"
13#include "core/hle/kernel/k_capabilities.h" 12#include "core/hle/kernel/k_capabilities.h"
@@ -128,7 +127,6 @@ private:
128#ifdef HAS_NCE 127#ifdef HAS_NCE
129 std::unordered_map<u64, u64> m_post_handlers{}; 128 std::unordered_map<u64, u64> m_post_handlers{};
130#endif 129#endif
131 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> m_dirty_memory_managers;
132 std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor; 130 std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
133 Core::Memory::Memory m_memory; 131 Core::Memory::Memory m_memory;
134 132
@@ -511,8 +509,6 @@ public:
511 return m_memory; 509 return m_memory;
512 } 510 }
513 511
514 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
515
516 Core::ExclusiveMonitor& GetExclusiveMonitor() const { 512 Core::ExclusiveMonitor& GetExclusiveMonitor() const {
517 return *m_exclusive_monitor; 513 return *m_exclusive_monitor;
518 } 514 }
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp
index 3f38ceb03..e491dd260 100644
--- a/src/core/hle/service/hle_ipc.cpp
+++ b/src/core/hle/service/hle_ipc.cpp
@@ -12,6 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "common/scratch_buffer.h" 14#include "common/scratch_buffer.h"
15#include "core/guest_memory.h"
15#include "core/hle/kernel/k_auto_object.h" 16#include "core/hle/kernel/k_auto_object.h"
16#include "core/hle/kernel/k_handle_table.h" 17#include "core/hle/kernel/k_handle_table.h"
17#include "core/hle/kernel/k_process.h" 18#include "core/hle/kernel/k_process.h"
@@ -23,19 +24,6 @@
23#include "core/hle/service/ipc_helpers.h" 24#include "core/hle/service/ipc_helpers.h"
24#include "core/memory.h" 25#include "core/memory.h"
25 26
26namespace {
27static thread_local std::array read_buffer_data_a{
28 Common::ScratchBuffer<u8>(),
29 Common::ScratchBuffer<u8>(),
30 Common::ScratchBuffer<u8>(),
31};
32static thread_local std::array read_buffer_data_x{
33 Common::ScratchBuffer<u8>(),
34 Common::ScratchBuffer<u8>(),
35 Common::ScratchBuffer<u8>(),
36};
37} // Anonymous namespace
38
39namespace Service { 27namespace Service {
40 28
41SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_) 29SessionRequestHandler::SessionRequestHandler(Kernel::KernelCore& kernel_, const char* service_name_)
@@ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
343} 331}
344 332
345std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { 333std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const {
346 static thread_local std::array read_buffer_a{ 334 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
347 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
348 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
349 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
350 };
351 335
352 ASSERT_OR_EXECUTE_MSG( 336 ASSERT_OR_EXECUTE_MSG(
353 BufferDescriptorA().size() > buffer_index, { return {}; }, 337 BufferDescriptorA().size() > buffer_index, { return {}; },
354 "BufferDescriptorA invalid buffer_index {}", buffer_index); 338 "BufferDescriptorA invalid buffer_index {}", buffer_index);
355 auto& read_buffer = read_buffer_a[buffer_index]; 339 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
356 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 340 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
357 BufferDescriptorA()[buffer_index].Size(),
358 &read_buffer_data_a[buffer_index]);
359} 341}
360 342
361std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { 343std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const {
362 static thread_local std::array read_buffer_x{ 344 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
363 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
364 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
365 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
366 };
367 345
368 ASSERT_OR_EXECUTE_MSG( 346 ASSERT_OR_EXECUTE_MSG(
369 BufferDescriptorX().size() > buffer_index, { return {}; }, 347 BufferDescriptorX().size() > buffer_index, { return {}; },
370 "BufferDescriptorX invalid buffer_index {}", buffer_index); 348 "BufferDescriptorX invalid buffer_index {}", buffer_index);
371 auto& read_buffer = read_buffer_x[buffer_index]; 349 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
372 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 350 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
373 BufferDescriptorX()[buffer_index].Size(),
374 &read_buffer_data_x[buffer_index]);
375} 351}
376 352
377std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 353std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
378 static thread_local std::array read_buffer_a{ 354 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0);
379 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
380 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
381 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
382 };
383 static thread_local std::array read_buffer_x{
384 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
385 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
386 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
387 };
388 355
389 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 356 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
390 BufferDescriptorA()[buffer_index].Size()}; 357 BufferDescriptorA()[buffer_index].Size()};
@@ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
401 ASSERT_OR_EXECUTE_MSG( 368 ASSERT_OR_EXECUTE_MSG(
402 BufferDescriptorA().size() > buffer_index, { return {}; }, 369 BufferDescriptorA().size() > buffer_index, { return {}; },
403 "BufferDescriptorA invalid buffer_index {}", buffer_index); 370 "BufferDescriptorA invalid buffer_index {}", buffer_index);
404 auto& read_buffer = read_buffer_a[buffer_index]; 371 return gm.Read(BufferDescriptorA()[buffer_index].Address(),
405 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), 372 BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]);
406 BufferDescriptorA()[buffer_index].Size(),
407 &read_buffer_data_a[buffer_index]);
408 } else { 373 } else {
409 ASSERT_OR_EXECUTE_MSG( 374 ASSERT_OR_EXECUTE_MSG(
410 BufferDescriptorX().size() > buffer_index, { return {}; }, 375 BufferDescriptorX().size() > buffer_index, { return {}; },
411 "BufferDescriptorX invalid buffer_index {}", buffer_index); 376 "BufferDescriptorX invalid buffer_index {}", buffer_index);
412 auto& read_buffer = read_buffer_x[buffer_index]; 377 return gm.Read(BufferDescriptorX()[buffer_index].Address(),
413 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), 378 BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]);
414 BufferDescriptorX()[buffer_index].Size(),
415 &read_buffer_data_x[buffer_index]);
416 } 379 }
417} 380}
418 381
diff --git a/src/core/hle/service/hle_ipc.h b/src/core/hle/service/hle_ipc.h
index 440737db5..8329d7265 100644
--- a/src/core/hle/service/hle_ipc.h
+++ b/src/core/hle/service/hle_ipc.h
@@ -41,6 +41,8 @@ class KernelCore;
41class KHandleTable; 41class KHandleTable;
42class KProcess; 42class KProcess;
43class KServerSession; 43class KServerSession;
44template <typename T>
45class KScopedAutoObject;
44class KThread; 46class KThread;
45} // namespace Kernel 47} // namespace Kernel
46 48
@@ -424,6 +426,9 @@ private:
424 426
425 Kernel::KernelCore& kernel; 427 Kernel::KernelCore& kernel;
426 Core::Memory::Memory& memory; 428 Core::Memory::Memory& memory;
429
430 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{};
431 mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{};
427}; 432};
428 433
429} // namespace Service 434} // namespace Service
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index 37ca24f5d..21ef57d27 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -2,27 +2,135 @@
2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors 2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include <atomic>
6#include <deque>
7#include <mutex>
8
9#include "core/hle/kernel/k_process.h"
5#include "core/hle/service/nvdrv/core/container.h" 10#include "core/hle/service/nvdrv/core/container.h"
11#include "core/hle/service/nvdrv/core/heap_mapper.h"
6#include "core/hle/service/nvdrv/core/nvmap.h" 12#include "core/hle/service/nvdrv/core/nvmap.h"
7#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 13#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
14#include "core/memory.h"
8#include "video_core/host1x/host1x.h" 15#include "video_core/host1x/host1x.h"
9 16
10namespace Service::Nvidia::NvCore { 17namespace Service::Nvidia::NvCore {
11 18
19Session::Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_)
20 : id{id_}, process{process_}, asid{asid_}, has_preallocated_area{}, mapper{}, is_active{} {}
21
22Session::~Session() = default;
23
12struct ContainerImpl { 24struct ContainerImpl {
13 explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_) 25 explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_)
14 : file{host1x_}, manager{host1x_}, device_file_data{} {} 26 : host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {}
27 Tegra::Host1x::Host1x& host1x;
15 NvMap file; 28 NvMap file;
16 SyncpointManager manager; 29 SyncpointManager manager;
17 Container::Host1xDeviceFileData device_file_data; 30 Container::Host1xDeviceFileData device_file_data;
31 std::deque<Session> sessions;
32 size_t new_ids{};
33 std::deque<size_t> id_pool;
34 std::mutex session_guard;
18}; 35};
19 36
20Container::Container(Tegra::Host1x::Host1x& host1x_) { 37Container::Container(Tegra::Host1x::Host1x& host1x_) {
21 impl = std::make_unique<ContainerImpl>(host1x_); 38 impl = std::make_unique<ContainerImpl>(*this, host1x_);
22} 39}
23 40
24Container::~Container() = default; 41Container::~Container() = default;
25 42
43SessionId Container::OpenSession(Kernel::KProcess* process) {
44 using namespace Common::Literals;
45
46 std::scoped_lock lk(impl->session_guard);
47 for (auto& session : impl->sessions) {
48 if (!session.is_active) {
49 continue;
50 }
51 if (session.process == process) {
52 return session.id;
53 }
54 }
55 size_t new_id{};
56 auto* memory_interface = &process->GetMemory();
57 auto& smmu = impl->host1x.MemoryManager();
58 auto asid = smmu.RegisterProcess(memory_interface);
59 if (!impl->id_pool.empty()) {
60 new_id = impl->id_pool.front();
61 impl->id_pool.pop_front();
62 impl->sessions[new_id] = Session{SessionId{new_id}, process, asid};
63 } else {
64 new_id = impl->new_ids++;
65 impl->sessions.emplace_back(SessionId{new_id}, process, asid);
66 }
67 auto& session = impl->sessions[new_id];
68 session.is_active = true;
69 // Optimization
70 if (process->IsApplication()) {
71 auto& page_table = process->GetPageTable().GetBasePageTable();
72 auto heap_start = page_table.GetHeapRegionStart();
73
74 Kernel::KProcessAddress cur_addr = heap_start;
75 size_t region_size = 0;
76 VAddr region_start = 0;
77 while (true) {
78 Kernel::KMemoryInfo mem_info{};
79 Kernel::Svc::PageInfo page_info{};
80 R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
81 cur_addr));
82 auto svc_mem_info = mem_info.GetSvcMemoryInfo();
83
84 // Check if this memory block is heap.
85 if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
86 if (svc_mem_info.size > region_size) {
87 region_size = svc_mem_info.size;
88 region_start = svc_mem_info.base_address;
89 }
90 }
91
92 // Check if we're done.
93 const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
94 if (next_address <= GetInteger(cur_addr)) {
95 break;
96 }
97
98 cur_addr = next_address;
99 }
100 session.has_preallocated_area = false;
101 auto start_region = region_size >= 32_MiB ? smmu.Allocate(region_size) : 0;
102 if (start_region != 0) {
103 session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
104 asid, impl->host1x);
105 smmu.TrackContinuity(start_region, region_start, region_size, asid);
106 session.has_preallocated_area = true;
107 LOG_DEBUG(Debug, "Preallocation created!");
108 }
109 }
110 return SessionId{new_id};
111}
112
113void Container::CloseSession(SessionId session_id) {
114 std::scoped_lock lk(impl->session_guard);
115 auto& session = impl->sessions[session_id.id];
116 auto& smmu = impl->host1x.MemoryManager();
117 if (session.has_preallocated_area) {
118 const DAddr region_start = session.mapper->GetRegionStart();
119 const size_t region_size = session.mapper->GetRegionSize();
120 session.mapper.reset();
121 smmu.Free(region_start, region_size);
122 session.has_preallocated_area = false;
123 }
124 session.is_active = false;
125 smmu.UnregisterProcess(impl->sessions[session_id.id].asid);
126 impl->id_pool.emplace_front(session_id.id);
127}
128
129Session* Container::GetSession(SessionId session_id) {
130 std::atomic_thread_fence(std::memory_order_acquire);
131 return &impl->sessions[session_id.id];
132}
133
26NvMap& Container::GetNvMapFile() { 134NvMap& Container::GetNvMapFile() {
27 return impl->file; 135 return impl->file;
28} 136}
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index b4b63ac90..b4d3938a8 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -8,24 +8,56 @@
8#include <memory> 8#include <memory>
9#include <unordered_map> 9#include <unordered_map>
10 10
11#include "core/device_memory_manager.h"
11#include "core/hle/service/nvdrv/nvdata.h" 12#include "core/hle/service/nvdrv/nvdata.h"
12 13
14namespace Kernel {
15class KProcess;
16}
17
13namespace Tegra::Host1x { 18namespace Tegra::Host1x {
14class Host1x; 19class Host1x;
15} // namespace Tegra::Host1x 20} // namespace Tegra::Host1x
16 21
17namespace Service::Nvidia::NvCore { 22namespace Service::Nvidia::NvCore {
18 23
24class HeapMapper;
19class NvMap; 25class NvMap;
20class SyncpointManager; 26class SyncpointManager;
21 27
22struct ContainerImpl; 28struct ContainerImpl;
23 29
30struct SessionId {
31 size_t id;
32};
33
34struct Session {
35 Session(SessionId id_, Kernel::KProcess* process_, Core::Asid asid_);
36 ~Session();
37
38 Session(const Session&) = delete;
39 Session& operator=(const Session&) = delete;
40 Session(Session&&) = default;
41 Session& operator=(Session&&) = default;
42
43 SessionId id;
44 Kernel::KProcess* process;
45 Core::Asid asid;
46 bool has_preallocated_area{};
47 std::unique_ptr<HeapMapper> mapper{};
48 bool is_active{};
49};
50
24class Container { 51class Container {
25public: 52public:
26 explicit Container(Tegra::Host1x::Host1x& host1x); 53 explicit Container(Tegra::Host1x::Host1x& host1x);
27 ~Container(); 54 ~Container();
28 55
56 SessionId OpenSession(Kernel::KProcess* process);
57 void CloseSession(SessionId id);
58
59 Session* GetSession(SessionId id);
60
29 NvMap& GetNvMapFile(); 61 NvMap& GetNvMapFile();
30 62
31 const NvMap& GetNvMapFile() const; 63 const NvMap& GetNvMapFile() const;
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
new file mode 100644
index 000000000..096dc5deb
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
@@ -0,0 +1,175 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <mutex>
5
6#include <boost/container/small_vector.hpp>
7#define BOOST_NO_MT
8#include <boost/pool/detail/mutex.hpp>
9#undef BOOST_NO_MT
10#include <boost/icl/interval.hpp>
11#include <boost/icl/interval_base_set.hpp>
12#include <boost/icl/interval_set.hpp>
13#include <boost/icl/split_interval_map.hpp>
14#include <boost/pool/pool.hpp>
15#include <boost/pool/pool_alloc.hpp>
16#include <boost/pool/poolfwd.hpp>
17
18#include "core/hle/service/nvdrv/core/heap_mapper.h"
19#include "video_core/host1x/host1x.h"
20
21namespace boost {
22template <typename T>
23class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
24}
25
26namespace Service::Nvidia::NvCore {
27
28using IntervalCompare = std::less<DAddr>;
29using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
30using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
31using IntervalSet = boost::icl::interval_set<DAddr>;
32using IntervalType = typename IntervalSet::interval_type;
33
34template <typename Type>
35struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
36 // types
37 typedef counter_add_functor<Type> type;
38 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
39
40 // public member functions
41 void operator()(Type& current, const Type& added) const {
42 current += added;
43 if (current < base_type::identity_element()) {
44 current = base_type::identity_element();
45 }
46 }
47
48 // public static functions
49 static void version(Type&){};
50};
51
52using OverlapCombine = counter_add_functor<int>;
53using OverlapSection = boost::icl::inter_section<int>;
54using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
55
56struct HeapMapper::HeapMapperInternal {
57 HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
58 ~HeapMapperInternal() = default;
59
60 template <typename Func>
61 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
62 Func&& func) {
63 const DAddr start_address = cpu_addr;
64 const DAddr end_address = start_address + size;
65 const IntervalType search_interval{start_address, end_address};
66 auto it = current_range.lower_bound(search_interval);
67 if (it == current_range.end()) {
68 return;
69 }
70 auto end_it = current_range.upper_bound(search_interval);
71 for (; it != end_it; it++) {
72 auto& inter = it->first;
73 DAddr inter_addr_end = inter.upper();
74 DAddr inter_addr = inter.lower();
75 if (inter_addr_end > end_address) {
76 inter_addr_end = end_address;
77 }
78 if (inter_addr < start_address) {
79 inter_addr = start_address;
80 }
81 func(inter_addr, inter_addr_end, it->second);
82 }
83 }
84
85 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
86 const IntervalType search_interval, int subtract_value) {
87 bool any_removals = false;
88 current_range.add(std::make_pair(search_interval, subtract_value));
89 do {
90 any_removals = false;
91 auto it = current_range.lower_bound(search_interval);
92 if (it == current_range.end()) {
93 return;
94 }
95 auto end_it = current_range.upper_bound(search_interval);
96 for (; it != end_it; it++) {
97 if (it->second <= 0) {
98 any_removals = true;
99 current_range.erase(it);
100 break;
101 }
102 }
103 } while (any_removals);
104 }
105
106 IntervalSet base_set;
107 OverlapCounter mapping_overlaps;
108 Tegra::MaxwellDeviceMemoryManager& device_memory;
109 std::mutex guard;
110};
111
112HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
113 Tegra::Host1x::Host1x& host1x)
114 : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_asid{asid} {
115 m_internal = std::make_unique<HeapMapperInternal>(host1x);
116}
117
118HeapMapper::~HeapMapper() {
119 m_internal->device_memory.Unmap(m_daddress, m_size);
120}
121
122DAddr HeapMapper::Map(VAddr start, size_t size) {
123 std::scoped_lock lk(m_internal->guard);
124 m_internal->base_set.clear();
125 const IntervalType interval{start, start + size};
126 m_internal->base_set.insert(interval);
127 m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
128 [this](VAddr start_addr, VAddr end_addr, int) {
129 const IntervalType other{start_addr, end_addr};
130 m_internal->base_set.subtract(other);
131 });
132 if (!m_internal->base_set.empty()) {
133 auto it = m_internal->base_set.begin();
134 auto end_it = m_internal->base_set.end();
135 for (; it != end_it; it++) {
136 const VAddr inter_addr_end = it->upper();
137 const VAddr inter_addr = it->lower();
138 const size_t offset = inter_addr - m_vaddress;
139 const size_t sub_size = inter_addr_end - inter_addr;
140 m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size,
141 m_asid);
142 }
143 }
144 m_internal->mapping_overlaps += std::make_pair(interval, 1);
145 m_internal->base_set.clear();
146 return m_daddress + (start - m_vaddress);
147}
148
149void HeapMapper::Unmap(VAddr start, size_t size) {
150 std::scoped_lock lk(m_internal->guard);
151 m_internal->base_set.clear();
152 m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size,
153 [this](VAddr start_addr, VAddr end_addr, int value) {
154 if (value <= 1) {
155 const IntervalType other{start_addr, end_addr};
156 m_internal->base_set.insert(other);
157 }
158 });
159 if (!m_internal->base_set.empty()) {
160 auto it = m_internal->base_set.begin();
161 auto end_it = m_internal->base_set.end();
162 for (; it != end_it; it++) {
163 const VAddr inter_addr_end = it->upper();
164 const VAddr inter_addr = it->lower();
165 const size_t offset = inter_addr - m_vaddress;
166 const size_t sub_size = inter_addr_end - inter_addr;
167 m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
168 }
169 }
170 const IntervalType to_remove{start, start + size};
171 m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
172 m_internal->base_set.clear();
173}
174
175} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h
new file mode 100644
index 000000000..491a12e4f
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.h
@@ -0,0 +1,49 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <memory>
7
8#include "common/common_types.h"
9#include "core/device_memory_manager.h"
10
11namespace Tegra::Host1x {
12class Host1x;
13} // namespace Tegra::Host1x
14
15namespace Service::Nvidia::NvCore {
16
17class HeapMapper {
18public:
19 HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, Core::Asid asid,
20 Tegra::Host1x::Host1x& host1x);
21 ~HeapMapper();
22
23 bool IsInBounds(VAddr start, size_t size) const {
24 VAddr end = start + size;
25 return start >= m_vaddress && end <= (m_vaddress + m_size);
26 }
27
28 DAddr Map(VAddr start, size_t size);
29
30 void Unmap(VAddr start, size_t size);
31
32 DAddr GetRegionStart() const {
33 return m_daddress;
34 }
35
36 size_t GetRegionSize() const {
37 return m_size;
38 }
39
40private:
41 struct HeapMapperInternal;
42 VAddr m_vaddress;
43 DAddr m_daddress;
44 size_t m_size;
45 Core::Asid m_asid;
46 std::unique_ptr<HeapMapperInternal> m_internal;
47};
48
49} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 0ca05257e..1b59c6b15 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -2,14 +2,19 @@
2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors 2// SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include <functional>
6
5#include "common/alignment.h" 7#include "common/alignment.h"
6#include "common/assert.h" 8#include "common/assert.h"
7#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/hle/service/nvdrv/core/container.h"
11#include "core/hle/service/nvdrv/core/heap_mapper.h"
8#include "core/hle/service/nvdrv/core/nvmap.h" 12#include "core/hle/service/nvdrv/core/nvmap.h"
9#include "core/memory.h" 13#include "core/memory.h"
10#include "video_core/host1x/host1x.h" 14#include "video_core/host1x/host1x.h"
11 15
12using Core::Memory::YUZU_PAGESIZE; 16using Core::Memory::YUZU_PAGESIZE;
17constexpr size_t BIG_PAGE_SIZE = YUZU_PAGESIZE * 16;
13 18
14namespace Service::Nvidia::NvCore { 19namespace Service::Nvidia::NvCore {
15NvMap::Handle::Handle(u64 size_, Id id_) 20NvMap::Handle::Handle(u64 size_, Id id_)
@@ -17,9 +22,9 @@ NvMap::Handle::Handle(u64 size_, Id id_)
17 flags.raw = 0; 22 flags.raw = 0;
18} 23}
19 24
20NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { 25NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress,
26 NvCore::SessionId pSessionId) {
21 std::scoped_lock lock(mutex); 27 std::scoped_lock lock(mutex);
22
23 // Handles cannot be allocated twice 28 // Handles cannot be allocated twice
24 if (allocated) { 29 if (allocated) {
25 return NvResult::AccessDenied; 30 return NvResult::AccessDenied;
@@ -28,6 +33,7 @@ NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress)
28 flags = pFlags; 33 flags = pFlags;
29 kind = pKind; 34 kind = pKind;
30 align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign; 35 align = pAlign < YUZU_PAGESIZE ? YUZU_PAGESIZE : pAlign;
36 session_id = pSessionId;
31 37
32 // This flag is only applicable for handles with an address passed 38 // This flag is only applicable for handles with an address passed
33 if (pAddress) { 39 if (pAddress) {
@@ -63,7 +69,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) {
63 return NvResult::Success; 69 return NvResult::Success;
64} 70}
65 71
66NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {} 72NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {}
67 73
68void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) { 74void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
69 std::scoped_lock lock(handles_lock); 75 std::scoped_lock lock(handles_lock);
@@ -78,12 +84,30 @@ void NvMap::UnmapHandle(Handle& handle_description) {
78 handle_description.unmap_queue_entry.reset(); 84 handle_description.unmap_queue_entry.reset();
79 } 85 }
80 86
87 // Free and unmap the handle from Host1x GMMU
88 if (handle_description.pin_virt_address) {
89 host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
90 handle_description.aligned_size);
91 host1x.Allocator().Free(handle_description.pin_virt_address,
92 static_cast<u32>(handle_description.aligned_size));
93 handle_description.pin_virt_address = 0;
94 }
95
81 // Free and unmap the handle from the SMMU 96 // Free and unmap the handle from the SMMU
82 host1x.MemoryManager().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address), 97 const size_t map_size = handle_description.aligned_size;
83 handle_description.aligned_size); 98 if (!handle_description.in_heap) {
84 host1x.Allocator().Free(handle_description.pin_virt_address, 99 auto& smmu = host1x.MemoryManager();
85 static_cast<u32>(handle_description.aligned_size)); 100 size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE);
86 handle_description.pin_virt_address = 0; 101 smmu.Unmap(handle_description.d_address, map_size);
102 smmu.Free(handle_description.d_address, static_cast<size_t>(aligned_up));
103 handle_description.d_address = 0;
104 return;
105 }
106 const VAddr vaddress = handle_description.address;
107 auto* session = core.GetSession(handle_description.session_id);
108 session->mapper->Unmap(vaddress, map_size);
109 handle_description.d_address = 0;
110 handle_description.in_heap = false;
87} 111}
88 112
89bool NvMap::TryRemoveHandle(const Handle& handle_description) { 113bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -124,22 +148,33 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) {
124 } 148 }
125} 149}
126 150
127VAddr NvMap::GetHandleAddress(Handle::Id handle) { 151DAddr NvMap::GetHandleAddress(Handle::Id handle) {
128 std::scoped_lock lock(handles_lock); 152 std::scoped_lock lock(handles_lock);
129 try { 153 try {
130 return handles.at(handle)->address; 154 return handles.at(handle)->d_address;
131 } catch (std::out_of_range&) { 155 } catch (std::out_of_range&) {
132 return 0; 156 return 0;
133 } 157 }
134} 158}
135 159
136u32 NvMap::PinHandle(NvMap::Handle::Id handle) { 160DAddr NvMap::PinHandle(NvMap::Handle::Id handle, bool low_area_pin) {
137 auto handle_description{GetHandle(handle)}; 161 auto handle_description{GetHandle(handle)};
138 if (!handle_description) [[unlikely]] { 162 if (!handle_description) [[unlikely]] {
139 return 0; 163 return 0;
140 } 164 }
141 165
142 std::scoped_lock lock(handle_description->mutex); 166 std::scoped_lock lock(handle_description->mutex);
167 const auto map_low_area = [&] {
168 if (handle_description->pin_virt_address == 0) {
169 auto& gmmu_allocator = host1x.Allocator();
170 auto& gmmu = host1x.GMMU();
171 u32 address =
172 gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size));
173 gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address,
174 handle_description->aligned_size);
175 handle_description->pin_virt_address = address;
176 }
177 };
143 if (!handle_description->pins) { 178 if (!handle_description->pins) {
144 // If we're in the unmap queue we can just remove ourselves and return since we're already 179 // If we're in the unmap queue we can just remove ourselves and return since we're already
145 // mapped 180 // mapped
@@ -151,37 +186,58 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle) {
151 unmap_queue.erase(*handle_description->unmap_queue_entry); 186 unmap_queue.erase(*handle_description->unmap_queue_entry);
152 handle_description->unmap_queue_entry.reset(); 187 handle_description->unmap_queue_entry.reset();
153 188
189 if (low_area_pin) {
190 map_low_area();
191 handle_description->pins++;
192 return static_cast<DAddr>(handle_description->pin_virt_address);
193 }
194
154 handle_description->pins++; 195 handle_description->pins++;
155 return handle_description->pin_virt_address; 196 return handle_description->d_address;
156 } 197 }
157 } 198 }
158 199
200 using namespace std::placeholders;
159 // If not then allocate some space and map it 201 // If not then allocate some space and map it
160 u32 address{}; 202 DAddr address{};
161 auto& smmu_allocator = host1x.Allocator(); 203 auto& smmu = host1x.MemoryManager();
162 auto& smmu_memory_manager = host1x.MemoryManager(); 204 auto* session = core.GetSession(handle_description->session_id);
163 while ((address = smmu_allocator.Allocate( 205 const VAddr vaddress = handle_description->address;
164 static_cast<u32>(handle_description->aligned_size))) == 0) { 206 const size_t map_size = handle_description->aligned_size;
165 // Free handles until the allocation succeeds 207 if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
166 std::scoped_lock queueLock(unmap_queue_lock); 208 handle_description->d_address = session->mapper->Map(vaddress, map_size);
167 if (auto freeHandleDesc{unmap_queue.front()}) { 209 handle_description->in_heap = true;
168 // Handles in the unmap queue are guaranteed not to be pinned so don't bother 210 } else {
169 // checking if they are before unmapping 211 size_t aligned_up = Common::AlignUp(map_size, BIG_PAGE_SIZE);
170 std::scoped_lock freeLock(freeHandleDesc->mutex); 212 while ((address = smmu.Allocate(aligned_up)) == 0) {
171 if (handle_description->pin_virt_address) 213 // Free handles until the allocation succeeds
172 UnmapHandle(*freeHandleDesc); 214 std::scoped_lock queueLock(unmap_queue_lock);
173 } else { 215 if (auto freeHandleDesc{unmap_queue.front()}) {
174 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); 216 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
217 // checking if they are before unmapping
218 std::scoped_lock freeLock(freeHandleDesc->mutex);
219 if (handle_description->d_address)
220 UnmapHandle(*freeHandleDesc);
221 } else {
222 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
223 }
175 } 224 }
225
226 handle_description->d_address = address;
227 smmu.Map(address, vaddress, map_size, session->asid, true);
228 handle_description->in_heap = false;
176 } 229 }
230 }
177 231
178 smmu_memory_manager.Map(static_cast<GPUVAddr>(address), handle_description->address, 232 if (low_area_pin) {
179 handle_description->aligned_size); 233 map_low_area();
180 handle_description->pin_virt_address = address;
181 } 234 }
182 235
183 handle_description->pins++; 236 handle_description->pins++;
184 return handle_description->pin_virt_address; 237 if (low_area_pin) {
238 return static_cast<DAddr>(handle_description->pin_virt_address);
239 }
240 return handle_description->d_address;
185} 241}
186 242
187void NvMap::UnpinHandle(Handle::Id handle) { 243void NvMap::UnpinHandle(Handle::Id handle) {
@@ -232,7 +288,7 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna
232 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); 288 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!");
233 } else if (handle_description->dupes == 0) { 289 } else if (handle_description->dupes == 0) {
234 // Force unmap the handle 290 // Force unmap the handle
235 if (handle_description->pin_virt_address) { 291 if (handle_description->d_address) {
236 std::scoped_lock queueLock(unmap_queue_lock); 292 std::scoped_lock queueLock(unmap_queue_lock);
237 UnmapHandle(*handle_description); 293 UnmapHandle(*handle_description);
238 } 294 }
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index a8e573890..d7f695845 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -14,6 +14,7 @@
14 14
15#include "common/bit_field.h" 15#include "common/bit_field.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "core/hle/service/nvdrv/core/container.h"
17#include "core/hle/service/nvdrv/nvdata.h" 18#include "core/hle/service/nvdrv/nvdata.h"
18 19
19namespace Tegra { 20namespace Tegra {
@@ -25,6 +26,8 @@ class Host1x;
25} // namespace Tegra 26} // namespace Tegra
26 27
27namespace Service::Nvidia::NvCore { 28namespace Service::Nvidia::NvCore {
29
30class Container;
28/** 31/**
29 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage 32 * @brief The nvmap core class holds the global state for nvmap and provides methods to manage
30 * handles 33 * handles
@@ -48,7 +51,7 @@ public:
48 using Id = u32; 51 using Id = u32;
49 Id id; //!< A globally unique identifier for this handle 52 Id id; //!< A globally unique identifier for this handle
50 53
51 s32 pins{}; 54 s64 pins{};
52 u32 pin_virt_address{}; 55 u32 pin_virt_address{};
53 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; 56 std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{};
54 57
@@ -61,15 +64,18 @@ public:
61 } flags{}; 64 } flags{};
62 static_assert(sizeof(Flags) == sizeof(u32)); 65 static_assert(sizeof(Flags) == sizeof(u32));
63 66
64 u64 address{}; //!< The memory location in the guest's AS that this handle corresponds to, 67 VAddr address{}; //!< The memory location in the guest's AS that this handle corresponds to,
65 //!< this can also be in the nvdrv tmem 68 //!< this can also be in the nvdrv tmem
66 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC 69 bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
67 //!< call 70 //!< call
68 71
69 u8 kind{}; //!< Used for memory compression 72 u8 kind{}; //!< Used for memory compression
70 bool allocated{}; //!< If the handle has been allocated with `Alloc` 73 bool allocated{}; //!< If the handle has been allocated with `Alloc`
74 bool in_heap{};
75 NvCore::SessionId session_id{};
71 76
72 u64 dma_map_addr{}; //! remove me after implementing pinning. 77 DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds
78 //!< to, this can also be in the nvdrv tmem
73 79
74 Handle(u64 size, Id id); 80 Handle(u64 size, Id id);
75 81
@@ -77,7 +83,8 @@ public:
77 * @brief Sets up the handle with the given memory config, can allocate memory from the tmem 83 * @brief Sets up the handle with the given memory config, can allocate memory from the tmem
78 * if a 0 address is passed 84 * if a 0 address is passed
79 */ 85 */
80 [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress); 86 [[nodiscard]] NvResult Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress,
87 NvCore::SessionId pSessionId);
81 88
82 /** 89 /**
83 * @brief Increases the dupe counter of the handle for the given session 90 * @brief Increases the dupe counter of the handle for the given session
@@ -108,7 +115,7 @@ public:
108 bool can_unlock; //!< If the address region is ready to be unlocked 115 bool can_unlock; //!< If the address region is ready to be unlocked
109 }; 116 };
110 117
111 explicit NvMap(Tegra::Host1x::Host1x& host1x); 118 explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x);
112 119
113 /** 120 /**
114 * @brief Creates an unallocated handle of the given size 121 * @brief Creates an unallocated handle of the given size
@@ -117,7 +124,7 @@ public:
117 124
118 std::shared_ptr<Handle> GetHandle(Handle::Id handle); 125 std::shared_ptr<Handle> GetHandle(Handle::Id handle);
119 126
120 VAddr GetHandleAddress(Handle::Id handle); 127 DAddr GetHandleAddress(Handle::Id handle);
121 128
122 /** 129 /**
123 * @brief Maps a handle into the SMMU address space 130 * @brief Maps a handle into the SMMU address space
@@ -125,7 +132,7 @@ public:
125 * number of calls to `UnpinHandle` 132 * number of calls to `UnpinHandle`
126 * @return The SMMU virtual address that the handle has been mapped to 133 * @return The SMMU virtual address that the handle has been mapped to
127 */ 134 */
128 u32 PinHandle(Handle::Id handle); 135 DAddr PinHandle(Handle::Id handle, bool low_area_pin);
129 136
130 /** 137 /**
131 * @brief When this has been called an equal number of times to `PinHandle` for the supplied 138 * @brief When this has been called an equal number of times to `PinHandle` for the supplied
@@ -172,5 +179,7 @@ private:
172 * @return If the handle was removed from the map 179 * @return If the handle was removed from the map
173 */ 180 */
174 bool TryRemoveHandle(const Handle& handle_description); 181 bool TryRemoveHandle(const Handle& handle_description);
182
183 Container& core;
175}; 184};
176} // namespace Service::Nvidia::NvCore 185} // namespace Service::Nvidia::NvCore
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index a04538d5d..8adaddc60 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -7,6 +7,7 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/nvdrv/core/container.h"
10#include "core/hle/service/nvdrv/nvdata.h" 11#include "core/hle/service/nvdrv/nvdata.h"
11 12
12namespace Core { 13namespace Core {
@@ -62,7 +63,7 @@ public:
62 * Called once a device is opened 63 * Called once a device is opened
63 * @param fd The device fd 64 * @param fd The device fd
64 */ 65 */
65 virtual void OnOpen(DeviceFD fd) = 0; 66 virtual void OnOpen(NvCore::SessionId session_id, DeviceFD fd) = 0;
66 67
67 /** 68 /**
68 * Called once a device is closed 69 * Called once a device is closed
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 05a43d8dc..c1ebbd62d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -35,14 +35,14 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
35 return NvResult::NotImplemented; 35 return NvResult::NotImplemented;
36} 36}
37 37
38void nvdisp_disp0::OnOpen(DeviceFD fd) {} 38void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
39void nvdisp_disp0::OnClose(DeviceFD fd) {} 39void nvdisp_disp0::OnClose(DeviceFD fd) {}
40 40
41void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, 41void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
42 u32 height, u32 stride, android::BufferTransformFlags transform, 42 u32 height, u32 stride, android::BufferTransformFlags transform,
43 const Common::Rectangle<int>& crop_rect, 43 const Common::Rectangle<int>& crop_rect,
44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { 44 std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
45 const VAddr addr = nvmap.GetHandleAddress(buffer_handle); 45 const DAddr addr = nvmap.GetHandleAddress(buffer_handle);
46 LOG_TRACE(Service, 46 LOG_TRACE(Service,
47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 47 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
48 addr, offset, width, height, stride, format); 48 addr, offset, width, height, stride, format);
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index daee05fe8..5f13a50a2 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -32,7 +32,7 @@ public:
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::span<u8> inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
37 37
38 /// Performs a screen flip, drawing the buffer pointed to by the handle. 38 /// Performs a screen flip, drawing the buffer pointed to by the handle.
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 6b3639008..e6646ba04 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -86,7 +86,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
86 return NvResult::NotImplemented; 86 return NvResult::NotImplemented;
87} 87}
88 88
89void nvhost_as_gpu::OnOpen(DeviceFD fd) {} 89void nvhost_as_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
90void nvhost_as_gpu::OnClose(DeviceFD fd) {} 90void nvhost_as_gpu::OnClose(DeviceFD fd) {}
91 91
92NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { 92NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) {
@@ -206,6 +206,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
206 static_cast<u32>(aligned_size >> page_size_bits)); 206 static_cast<u32>(aligned_size >> page_size_bits));
207 } 207 }
208 208
209 nvmap.UnpinHandle(mapping->handle);
210
209 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state 211 // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
210 // Only FreeSpace can unmap them fully 212 // Only FreeSpace can unmap them fully
211 if (mapping->sparse_alloc) { 213 if (mapping->sparse_alloc) {
@@ -293,12 +295,12 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) {
293 return NvResult::BadValue; 295 return NvResult::BadValue;
294 } 296 }
295 297
296 VAddr cpu_address{static_cast<VAddr>( 298 DAddr base = nvmap.PinHandle(entry.handle, false);
297 handle->address + 299 DAddr device_address{static_cast<DAddr>(
298 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; 300 base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
299 301
300 gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), 302 gmmu->Map(virtual_address, device_address, size,
301 use_big_pages); 303 static_cast<Tegra::PTEKind>(entry.kind), use_big_pages);
302 } 304 }
303 } 305 }
304 306
@@ -331,9 +333,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
331 } 333 }
332 334
333 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; 335 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
334 VAddr cpu_address{mapping->ptr + params.buffer_offset}; 336 VAddr device_address{mapping->ptr + params.buffer_offset};
335 337
336 gmmu->Map(gpu_address, cpu_address, params.mapping_size, 338 gmmu->Map(gpu_address, device_address, params.mapping_size,
337 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); 339 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page);
338 340
339 return NvResult::Success; 341 return NvResult::Success;
@@ -349,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
349 return NvResult::BadValue; 351 return NvResult::BadValue;
350 } 352 }
351 353
352 VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; 354 DAddr device_address{
355 static_cast<DAddr>(nvmap.PinHandle(params.handle, false) + params.buffer_offset)};
353 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; 356 u64 size{params.mapping_size ? params.mapping_size : handle->orig_size};
354 357
355 bool big_page{[&]() { 358 bool big_page{[&]() {
@@ -373,15 +376,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
373 } 376 }
374 377
375 const bool use_big_pages = alloc->second.big_pages && big_page; 378 const bool use_big_pages = alloc->second.big_pages && big_page;
376 gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), 379 gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind),
377 use_big_pages); 380 use_big_pages);
378 381
379 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, 382 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
380 use_big_pages, alloc->second.sparse)}; 383 true, use_big_pages, alloc->second.sparse)};
381 alloc->second.mappings.push_back(mapping); 384 alloc->second.mappings.push_back(mapping);
382 mapping_map[params.offset] = mapping; 385 mapping_map[params.offset] = mapping;
383 } else { 386 } else {
384
385 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; 387 auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator};
386 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; 388 u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE};
387 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; 389 u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS};
@@ -394,11 +396,11 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) {
394 return NvResult::InsufficientMemory; 396 return NvResult::InsufficientMemory;
395 } 397 }
396 398
397 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), 399 gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size),
398 static_cast<Tegra::PTEKind>(params.kind), big_page); 400 static_cast<Tegra::PTEKind>(params.kind), big_page);
399 401
400 auto mapping{ 402 auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size,
401 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; 403 false, big_page, false)};
402 mapping_map[params.offset] = mapping; 404 mapping_map[params.offset] = mapping;
403 } 405 }
404 406
@@ -433,6 +435,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) {
433 gmmu->Unmap(params.offset, mapping->size); 435 gmmu->Unmap(params.offset, mapping->size);
434 } 436 }
435 437
438 nvmap.UnpinHandle(mapping->handle);
439
436 mapping_map.erase(params.offset); 440 mapping_map.erase(params.offset);
437 } catch (const std::out_of_range&) { 441 } catch (const std::out_of_range&) {
438 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); 442 LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 79a21683d..7d0a99988 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -55,7 +55,7 @@ public:
55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
56 std::span<u8> inline_output) override; 56 std::span<u8> inline_output) override;
57 57
58 void OnOpen(DeviceFD fd) override; 58 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
59 void OnClose(DeviceFD fd) override; 59 void OnClose(DeviceFD fd) override;
60 60
61 Kernel::KEvent* QueryEvent(u32 event_id) override; 61 Kernel::KEvent* QueryEvent(u32 event_id) override;
@@ -159,16 +159,18 @@ private:
159 NvCore::NvMap& nvmap; 159 NvCore::NvMap& nvmap;
160 160
161 struct Mapping { 161 struct Mapping {
162 VAddr ptr; 162 NvCore::NvMap::Handle::Id handle;
163 DAddr ptr;
163 u64 offset; 164 u64 offset;
164 u64 size; 165 u64 size;
165 bool fixed; 166 bool fixed;
166 bool big_page; // Only valid if fixed == false 167 bool big_page; // Only valid if fixed == false
167 bool sparse_alloc; 168 bool sparse_alloc;
168 169
169 Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) 170 Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_,
170 : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), 171 bool big_page_, bool sparse_alloc_)
171 sparse_alloc(sparse_alloc_) {} 172 : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_),
173 big_page(big_page_), sparse_alloc(sparse_alloc_) {}
172 }; 174 };
173 175
174 struct Allocation { 176 struct Allocation {
@@ -212,9 +214,6 @@ private:
212 bool initialised{}; 214 bool initialised{};
213 } vm; 215 } vm;
214 std::shared_ptr<Tegra::MemoryManager> gmmu; 216 std::shared_ptr<Tegra::MemoryManager> gmmu;
215
216 // s32 channel{};
217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
218}; 217};
219 218
220} // namespace Service::Nvidia::Devices 219} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b8dd34e24..250d01de3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -76,7 +76,7 @@ NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inp
76 return NvResult::NotImplemented; 76 return NvResult::NotImplemented;
77} 77}
78 78
79void nvhost_ctrl::OnOpen(DeviceFD fd) {} 79void nvhost_ctrl::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
80 80
81void nvhost_ctrl::OnClose(DeviceFD fd) {} 81void nvhost_ctrl::OnClose(DeviceFD fd) {}
82 82
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 992124b60..403f1a746 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -32,7 +32,7 @@ public:
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::span<u8> inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
37 37
38 Kernel::KEvent* QueryEvent(u32 event_id) override; 38 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 3e0c96456..ddd85678b 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
82 return NvResult::NotImplemented; 82 return NvResult::NotImplemented;
83} 83}
84 84
85void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} 85void nvhost_ctrl_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
86void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} 86void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
87 87
88NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { 88NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index d170299bd..d2ab05b21 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -28,7 +28,7 @@ public:
28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
29 std::span<u8> inline_output) override; 29 std::span<u8> inline_output) override;
30 30
31 void OnOpen(DeviceFD fd) override; 31 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
32 void OnClose(DeviceFD fd) override; 32 void OnClose(DeviceFD fd) override;
33 33
34 Kernel::KEvent* QueryEvent(u32 event_id) override; 34 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index b0395c2f0..bf12d69a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -120,7 +120,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
120 return NvResult::NotImplemented; 120 return NvResult::NotImplemented;
121} 121}
122 122
123void nvhost_gpu::OnOpen(DeviceFD fd) {} 123void nvhost_gpu::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
124void nvhost_gpu::OnClose(DeviceFD fd) {} 124void nvhost_gpu::OnClose(DeviceFD fd) {}
125 125
126NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) { 126NvResult nvhost_gpu::SetNVMAPfd(IoctlSetNvmapFD& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 88fd228ff..e34a978db 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -47,7 +47,7 @@ public:
47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
48 std::span<u8> inline_output) override; 48 std::span<u8> inline_output) override;
49 49
50 void OnOpen(DeviceFD fd) override; 50 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
51 void OnClose(DeviceFD fd) override; 51 void OnClose(DeviceFD fd) override;
52 52
53 Kernel::KEvent* QueryEvent(u32 event_id) override; 53 Kernel::KEvent* QueryEvent(u32 event_id) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index f43914e1b..2c0ac2a46 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -35,7 +35,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
35 case 0x7: 35 case 0x7:
36 return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); 36 return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output);
37 case 0x9: 37 case 0x9:
38 return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output); 38 return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output, fd);
39 case 0xa: 39 case 0xa:
40 return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); 40 return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output);
41 default: 41 default:
@@ -68,9 +68,10 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
68 return NvResult::NotImplemented; 68 return NvResult::NotImplemented;
69} 69}
70 70
71void nvhost_nvdec::OnOpen(DeviceFD fd) { 71void nvhost_nvdec::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
72 LOG_INFO(Service_NVDRV, "NVDEC video stream started"); 72 LOG_INFO(Service_NVDRV, "NVDEC video stream started");
73 system.SetNVDECActive(true); 73 system.SetNVDECActive(true);
74 sessions[fd] = session_id;
74} 75}
75 76
76void nvhost_nvdec::OnClose(DeviceFD fd) { 77void nvhost_nvdec::OnClose(DeviceFD fd) {
@@ -81,6 +82,10 @@ void nvhost_nvdec::OnClose(DeviceFD fd) {
81 system.GPU().ClearCdmaInstance(iter->second); 82 system.GPU().ClearCdmaInstance(iter->second);
82 } 83 }
83 system.SetNVDECActive(false); 84 system.SetNVDECActive(false);
85 auto it = sessions.find(fd);
86 if (it != sessions.end()) {
87 sessions.erase(it);
88 }
84} 89}
85 90
86} // namespace Service::Nvidia::Devices 91} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index ad2233c49..627686757 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -20,7 +20,7 @@ public:
20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
21 std::span<u8> inline_output) override; 21 std::span<u8> inline_output) override;
22 22
23 void OnOpen(DeviceFD fd) override; 23 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
24 void OnClose(DeviceFD fd) override; 24 void OnClose(DeviceFD fd) override;
25}; 25};
26 26
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 74c701b95..a0a7bfa40 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -8,6 +8,7 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/kernel/k_process.h"
11#include "core/hle/service/nvdrv/core/container.h" 12#include "core/hle/service/nvdrv/core/container.h"
12#include "core/hle/service/nvdrv/core/nvmap.h" 13#include "core/hle/service/nvdrv/core/nvmap.h"
13#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 14#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@@ -95,6 +96,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
95 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); 96 offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
96 97
97 auto& gpu = system.GPU(); 98 auto& gpu = system.GPU();
99 auto* session = core.GetSession(sessions[fd]);
100
98 if (gpu.UseNvdec()) { 101 if (gpu.UseNvdec()) {
99 for (std::size_t i = 0; i < syncpt_increments.size(); i++) { 102 for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
100 const SyncptIncr& syncpt_incr = syncpt_increments[i]; 103 const SyncptIncr& syncpt_incr = syncpt_increments[i];
@@ -106,8 +109,8 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
106 const auto object = nvmap.GetHandle(cmd_buffer.memory_id); 109 const auto object = nvmap.GetHandle(cmd_buffer.memory_id);
107 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 110 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
108 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); 111 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
109 system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), 112 session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
110 cmdlist.size() * sizeof(u32)); 113 cmdlist.size() * sizeof(u32));
111 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); 114 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
112 } 115 }
113 // Some games expect command_buffers to be written back 116 // Some games expect command_buffers to be written back
@@ -133,10 +136,12 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
133 return NvResult::Success; 136 return NvResult::Success;
134} 137}
135 138
136NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries) { 139NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
140 DeviceFD fd) {
137 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); 141 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
138 for (size_t i = 0; i < num_entries; i++) { 142 for (size_t i = 0; i < num_entries; i++) {
139 entries[i].map_address = nvmap.PinHandle(entries[i].map_handle); 143 DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, true);
144 entries[i].map_address = static_cast<u32>(pin_address);
140 } 145 }
141 146
142 return NvResult::Success; 147 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 7ce748e18..900db81d2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -4,7 +4,9 @@
4#pragma once 4#pragma once
5 5
6#include <deque> 6#include <deque>
7#include <unordered_map>
7#include <vector> 8#include <vector>
9
8#include "common/common_types.h" 10#include "common/common_types.h"
9#include "common/swap.h" 11#include "common/swap.h"
10#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 12#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@@ -111,7 +113,7 @@ protected:
111 NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd); 113 NvResult Submit(IoctlSubmit& params, std::span<u8> input, DeviceFD fd);
112 NvResult GetSyncpoint(IoctlGetSyncpoint& params); 114 NvResult GetSyncpoint(IoctlGetSyncpoint& params);
113 NvResult GetWaitbase(IoctlGetWaitbase& params); 115 NvResult GetWaitbase(IoctlGetWaitbase& params);
114 NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); 116 NvResult MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd);
115 NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries); 117 NvResult UnmapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries);
116 NvResult SetSubmitTimeout(u32 timeout); 118 NvResult SetSubmitTimeout(u32 timeout);
117 119
@@ -125,6 +127,7 @@ protected:
125 NvCore::NvMap& nvmap; 127 NvCore::NvMap& nvmap;
126 NvCore::ChannelType channel_type; 128 NvCore::ChannelType channel_type;
127 std::array<u32, MaxSyncPoints> device_syncpoints{}; 129 std::array<u32, MaxSyncPoints> device_syncpoints{};
130 std::unordered_map<DeviceFD, NvCore::SessionId> sessions;
128}; 131};
129}; // namespace Devices 132}; // namespace Devices
130} // namespace Service::Nvidia 133} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 9e6b86458..f87d53f12 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -44,7 +44,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
44 return NvResult::NotImplemented; 44 return NvResult::NotImplemented;
45} 45}
46 46
47void nvhost_nvjpg::OnOpen(DeviceFD fd) {} 47void nvhost_nvjpg::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {}
48void nvhost_nvjpg::OnClose(DeviceFD fd) {} 48void nvhost_nvjpg::OnClose(DeviceFD fd) {}
49 49
50NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) { 50NvResult nvhost_nvjpg::SetNVMAPfd(IoctlSetNvmapFD& params) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 790c97f6a..def9c254d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -22,7 +22,7 @@ public:
22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
23 std::span<u8> inline_output) override; 23 std::span<u8> inline_output) override;
24 24
25 void OnOpen(DeviceFD fd) override; 25 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
26 void OnClose(DeviceFD fd) override; 26 void OnClose(DeviceFD fd) override;
27 27
28private: 28private:
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 87f8d7c22..bf090f5eb 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -33,7 +33,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
33 case 0x3: 33 case 0x3:
34 return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); 34 return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output);
35 case 0x9: 35 case 0x9:
36 return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output); 36 return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output, fd);
37 case 0xa: 37 case 0xa:
38 return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); 38 return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output);
39 default: 39 default:
@@ -68,7 +68,9 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
68 return NvResult::NotImplemented; 68 return NvResult::NotImplemented;
69} 69}
70 70
71void nvhost_vic::OnOpen(DeviceFD fd) {} 71void nvhost_vic::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
72 sessions[fd] = session_id;
73}
72 74
73void nvhost_vic::OnClose(DeviceFD fd) { 75void nvhost_vic::OnClose(DeviceFD fd) {
74 auto& host1x_file = core.Host1xDeviceFile(); 76 auto& host1x_file = core.Host1xDeviceFile();
@@ -76,6 +78,7 @@ void nvhost_vic::OnClose(DeviceFD fd) {
76 if (iter != host1x_file.fd_to_id.end()) { 78 if (iter != host1x_file.fd_to_id.end()) {
77 system.GPU().ClearCdmaInstance(iter->second); 79 system.GPU().ClearCdmaInstance(iter->second);
78 } 80 }
81 sessions.erase(fd);
79} 82}
80 83
81} // namespace Service::Nvidia::Devices 84} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index cadbcb0a5..0cc04354a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -19,7 +19,7 @@ public:
19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
20 std::span<u8> inline_output) override; 20 std::span<u8> inline_output) override;
21 21
22 void OnOpen(DeviceFD fd) override; 22 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
23 void OnClose(DeviceFD fd) override; 23 void OnClose(DeviceFD fd) override;
24}; 24};
25} // namespace Service::Nvidia::Devices 25} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 71b2e62ec..da61a3bfe 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -36,9 +36,9 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
36 case 0x3: 36 case 0x3:
37 return WrapFixed(this, &nvmap::IocFromId, input, output); 37 return WrapFixed(this, &nvmap::IocFromId, input, output);
38 case 0x4: 38 case 0x4:
39 return WrapFixed(this, &nvmap::IocAlloc, input, output); 39 return WrapFixed(this, &nvmap::IocAlloc, input, output, fd);
40 case 0x5: 40 case 0x5:
41 return WrapFixed(this, &nvmap::IocFree, input, output); 41 return WrapFixed(this, &nvmap::IocFree, input, output, fd);
42 case 0x9: 42 case 0x9:
43 return WrapFixed(this, &nvmap::IocParam, input, output); 43 return WrapFixed(this, &nvmap::IocParam, input, output);
44 case 0xe: 44 case 0xe:
@@ -67,8 +67,15 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, st
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
69 69
70void nvmap::OnOpen(DeviceFD fd) {} 70void nvmap::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {
71void nvmap::OnClose(DeviceFD fd) {} 71 sessions[fd] = session_id;
72}
73void nvmap::OnClose(DeviceFD fd) {
74 auto it = sessions.find(fd);
75 if (it != sessions.end()) {
76 sessions.erase(it);
77 }
78}
72 79
73NvResult nvmap::IocCreate(IocCreateParams& params) { 80NvResult nvmap::IocCreate(IocCreateParams& params) {
74 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); 81 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@@ -87,7 +94,7 @@ NvResult nvmap::IocCreate(IocCreateParams& params) {
87 return NvResult::Success; 94 return NvResult::Success;
88} 95}
89 96
90NvResult nvmap::IocAlloc(IocAllocParams& params) { 97NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) {
91 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); 98 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
92 99
93 if (!params.handle) { 100 if (!params.handle) {
@@ -116,15 +123,15 @@ NvResult nvmap::IocAlloc(IocAllocParams& params) {
116 return NvResult::InsufficientMemory; 123 return NvResult::InsufficientMemory;
117 } 124 }
118 125
119 const auto result = 126 const auto result = handle_description->Alloc(params.flags, params.align, params.kind,
120 handle_description->Alloc(params.flags, params.align, params.kind, params.address); 127 params.address, sessions[fd]);
121 if (result != NvResult::Success) { 128 if (result != NvResult::Success) {
122 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); 129 LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle);
123 return result; 130 return result;
124 } 131 }
125 bool is_out_io{}; 132 bool is_out_io{};
126 ASSERT(system.ApplicationProcess() 133 auto process = container.GetSession(sessions[fd])->process;
127 ->GetPageTable() 134 ASSERT(process->GetPageTable()
128 .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address, 135 .LockForMapDeviceAddressSpace(&is_out_io, handle_description->address,
129 handle_description->size, 136 handle_description->size,
130 Kernel::KMemoryPermission::None, true, false) 137 Kernel::KMemoryPermission::None, true, false)
@@ -224,7 +231,7 @@ NvResult nvmap::IocParam(IocParamParams& params) {
224 return NvResult::Success; 231 return NvResult::Success;
225} 232}
226 233
227NvResult nvmap::IocFree(IocFreeParams& params) { 234NvResult nvmap::IocFree(IocFreeParams& params, DeviceFD fd) {
228 LOG_DEBUG(Service_NVDRV, "called"); 235 LOG_DEBUG(Service_NVDRV, "called");
229 236
230 if (!params.handle) { 237 if (!params.handle) {
@@ -233,9 +240,9 @@ NvResult nvmap::IocFree(IocFreeParams& params) {
233 } 240 }
234 241
235 if (auto freeInfo{file.FreeHandle(params.handle, false)}) { 242 if (auto freeInfo{file.FreeHandle(params.handle, false)}) {
243 auto process = container.GetSession(sessions[fd])->process;
236 if (freeInfo->can_unlock) { 244 if (freeInfo->can_unlock) {
237 ASSERT(system.ApplicationProcess() 245 ASSERT(process->GetPageTable()
238 ->GetPageTable()
239 .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) 246 .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size)
240 .IsSuccess()); 247 .IsSuccess());
241 } 248 }
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 049c11028..d07d85f88 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -33,7 +33,7 @@ public:
33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, 33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
34 std::span<u8> inline_output) override; 34 std::span<u8> inline_output) override;
35 35
36 void OnOpen(DeviceFD fd) override; 36 void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override;
37 void OnClose(DeviceFD fd) override; 37 void OnClose(DeviceFD fd) override;
38 38
39 enum class HandleParameterType : u32_le { 39 enum class HandleParameterType : u32_le {
@@ -100,11 +100,11 @@ public:
100 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); 100 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
101 101
102 NvResult IocCreate(IocCreateParams& params); 102 NvResult IocCreate(IocCreateParams& params);
103 NvResult IocAlloc(IocAllocParams& params); 103 NvResult IocAlloc(IocAllocParams& params, DeviceFD fd);
104 NvResult IocGetId(IocGetIdParams& params); 104 NvResult IocGetId(IocGetIdParams& params);
105 NvResult IocFromId(IocFromIdParams& params); 105 NvResult IocFromId(IocFromIdParams& params);
106 NvResult IocParam(IocParamParams& params); 106 NvResult IocParam(IocParamParams& params);
107 NvResult IocFree(IocFreeParams& params); 107 NvResult IocFree(IocFreeParams& params, DeviceFD fd);
108 108
109private: 109private:
110 /// Id to use for the next handle that is created. 110 /// Id to use for the next handle that is created.
@@ -115,6 +115,7 @@ private:
115 115
116 NvCore::Container& container; 116 NvCore::Container& container;
117 NvCore::NvMap& file; 117 NvCore::NvMap& file;
118 std::unordered_map<DeviceFD, NvCore::SessionId> sessions;
118}; 119};
119 120
120} // namespace Service::Nvidia::Devices 121} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 9e46ee8dd..cb256e5b4 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -45,13 +45,22 @@ void EventInterface::FreeEvent(Kernel::KEvent* event) {
45void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) { 45void LoopProcess(Nvnflinger::Nvnflinger& nvnflinger, Core::System& system) {
46 auto server_manager = std::make_unique<ServerManager>(system); 46 auto server_manager = std::make_unique<ServerManager>(system);
47 auto module = std::make_shared<Module>(system); 47 auto module = std::make_shared<Module>(system);
48 server_manager->RegisterNamedService("nvdrv", std::make_shared<NVDRV>(system, module, "nvdrv")); 48 const auto NvdrvInterfaceFactoryForApplication = [&, module] {
49 server_manager->RegisterNamedService("nvdrv:a", 49 return std::make_shared<NVDRV>(system, module, "nvdrv");
50 std::make_shared<NVDRV>(system, module, "nvdrv:a")); 50 };
51 server_manager->RegisterNamedService("nvdrv:s", 51 const auto NvdrvInterfaceFactoryForApplets = [&, module] {
52 std::make_shared<NVDRV>(system, module, "nvdrv:s")); 52 return std::make_shared<NVDRV>(system, module, "nvdrv:a");
53 server_manager->RegisterNamedService("nvdrv:t", 53 };
54 std::make_shared<NVDRV>(system, module, "nvdrv:t")); 54 const auto NvdrvInterfaceFactoryForSysmodules = [&, module] {
55 return std::make_shared<NVDRV>(system, module, "nvdrv:s");
56 };
57 const auto NvdrvInterfaceFactoryForTesting = [&, module] {
58 return std::make_shared<NVDRV>(system, module, "nvdrv:t");
59 };
60 server_manager->RegisterNamedService("nvdrv", NvdrvInterfaceFactoryForApplication);
61 server_manager->RegisterNamedService("nvdrv:a", NvdrvInterfaceFactoryForApplets);
62 server_manager->RegisterNamedService("nvdrv:s", NvdrvInterfaceFactoryForSysmodules);
63 server_manager->RegisterNamedService("nvdrv:t", NvdrvInterfaceFactoryForTesting);
55 server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system)); 64 server_manager->RegisterNamedService("nvmemp", std::make_shared<NVMEMP>(system));
56 nvnflinger.SetNVDrvInstance(module); 65 nvnflinger.SetNVDrvInstance(module);
57 ServerManager::RunServer(std::move(server_manager)); 66 ServerManager::RunServer(std::move(server_manager));
@@ -113,7 +122,7 @@ NvResult Module::VerifyFD(DeviceFD fd) const {
113 return NvResult::Success; 122 return NvResult::Success;
114} 123}
115 124
116DeviceFD Module::Open(const std::string& device_name) { 125DeviceFD Module::Open(const std::string& device_name, NvCore::SessionId session_id) {
117 auto it = builders.find(device_name); 126 auto it = builders.find(device_name);
118 if (it == builders.end()) { 127 if (it == builders.end()) {
119 LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name); 128 LOG_ERROR(Service_NVDRV, "Trying to open unknown device {}", device_name);
@@ -124,7 +133,7 @@ DeviceFD Module::Open(const std::string& device_name) {
124 auto& builder = it->second; 133 auto& builder = it->second;
125 auto device = builder(fd)->second; 134 auto device = builder(fd)->second;
126 135
127 device->OnOpen(fd); 136 device->OnOpen(session_id, fd);
128 137
129 return fd; 138 return fd;
130} 139}
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index d8622b3ca..c594f0e5e 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -77,7 +77,7 @@ public:
77 NvResult VerifyFD(DeviceFD fd) const; 77 NvResult VerifyFD(DeviceFD fd) const;
78 78
79 /// Opens a device node and returns a file descriptor to it. 79 /// Opens a device node and returns a file descriptor to it.
80 DeviceFD Open(const std::string& device_name); 80 DeviceFD Open(const std::string& device_name, NvCore::SessionId session_id);
81 81
82 /// Sends an ioctl command to the specified file descriptor. 82 /// Sends an ioctl command to the specified file descriptor.
83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); 83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
@@ -93,6 +93,10 @@ public:
93 93
94 NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event); 94 NvResult QueryEvent(DeviceFD fd, u32 event_id, Kernel::KEvent*& event);
95 95
96 NvCore::Container& GetContainer() {
97 return container;
98 }
99
96private: 100private:
97 friend class EventInterface; 101 friend class EventInterface;
98 friend class Service::Nvnflinger::Nvnflinger; 102 friend class Service::Nvnflinger::Nvnflinger;
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index c8a880e84..6e4825313 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -3,8 +3,10 @@
3// SPDX-License-Identifier: GPL-3.0-or-later 3// SPDX-License-Identifier: GPL-3.0-or-later
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/scope_exit.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/hle/kernel/k_event.h" 8#include "core/hle/kernel/k_event.h"
9#include "core/hle/kernel/k_process.h"
8#include "core/hle/kernel/k_readable_event.h" 10#include "core/hle/kernel/k_readable_event.h"
9#include "core/hle/service/ipc_helpers.h" 11#include "core/hle/service/ipc_helpers.h"
10#include "core/hle/service/nvdrv/nvdata.h" 12#include "core/hle/service/nvdrv/nvdata.h"
@@ -37,7 +39,7 @@ void NVDRV::Open(HLERequestContext& ctx) {
37 return; 39 return;
38 } 40 }
39 41
40 DeviceFD fd = nvdrv->Open(device_name); 42 DeviceFD fd = nvdrv->Open(device_name, session_id);
41 43
42 rb.Push<DeviceFD>(fd); 44 rb.Push<DeviceFD>(fd);
43 rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed); 45 rb.PushEnum(fd != INVALID_NVDRV_FD ? NvResult::Success : NvResult::FileOperationFailed);
@@ -150,12 +152,29 @@ void NVDRV::Close(HLERequestContext& ctx) {
150 152
151void NVDRV::Initialize(HLERequestContext& ctx) { 153void NVDRV::Initialize(HLERequestContext& ctx) {
152 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 154 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
155 IPC::ResponseBuilder rb{ctx, 3};
156 SCOPE_EXIT({
157 rb.Push(ResultSuccess);
158 rb.PushEnum(NvResult::Success);
159 });
153 160
154 is_initialized = true; 161 if (is_initialized) {
162 // No need to initialize again
163 return;
164 }
155 165
156 IPC::ResponseBuilder rb{ctx, 3}; 166 IPC::RequestParser rp{ctx};
157 rb.Push(ResultSuccess); 167 const auto process_handle{ctx.GetCopyHandle(0)};
158 rb.PushEnum(NvResult::Success); 168 // The transfer memory is lent to nvdrv as a work buffer since nvdrv is
169 // unable to allocate as much memory on its own. For HLE it's unnecessary to handle it
170 [[maybe_unused]] const auto transfer_memory_handle{ctx.GetCopyHandle(1)};
171 [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>();
172
173 auto& container = nvdrv->GetContainer();
174 auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle);
175 session_id = container.OpenSession(process.GetPointerUnsafe());
176
177 is_initialized = true;
159} 178}
160 179
161void NVDRV::QueryEvent(HLERequestContext& ctx) { 180void NVDRV::QueryEvent(HLERequestContext& ctx) {
@@ -242,6 +261,9 @@ NVDRV::NVDRV(Core::System& system_, std::shared_ptr<Module> nvdrv_, const char*
242 RegisterHandlers(functions); 261 RegisterHandlers(functions);
243} 262}
244 263
245NVDRV::~NVDRV() = default; 264NVDRV::~NVDRV() {
265 auto& container = nvdrv->GetContainer();
266 container.CloseSession(session_id);
267}
246 268
247} // namespace Service::Nvidia 269} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 6e98115dc..f2195ae1e 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -35,6 +35,7 @@ private:
35 35
36 u64 pid{}; 36 u64 pid{};
37 bool is_initialized{}; 37 bool is_initialized{};
38 NvCore::SessionId session_id{};
38 Common::ScratchBuffer<u8> output_buffer; 39 Common::ScratchBuffer<u8> output_buffer;
39 Common::ScratchBuffer<u8> inline_output_buffer; 40 Common::ScratchBuffer<u8> inline_output_buffer;
40}; 41};
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
index 2fef6cc1a..86e272b41 100644
--- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
+++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.cpp
@@ -87,19 +87,20 @@ Result CreateNvMapHandle(u32* out_nv_map_handle, Nvidia::Devices::nvmap& nvmap,
87 R_SUCCEED(); 87 R_SUCCEED();
88} 88}
89 89
90Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle) { 90Result FreeNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Nvidia::DeviceFD nvmap_fd) {
91 // Free the handle. 91 // Free the handle.
92 Nvidia::Devices::nvmap::IocFreeParams free_params{ 92 Nvidia::Devices::nvmap::IocFreeParams free_params{
93 .handle = handle, 93 .handle = handle,
94 }; 94 };
95 R_UNLESS(nvmap.IocFree(free_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); 95 R_UNLESS(nvmap.IocFree(free_params, nvmap_fd) == Nvidia::NvResult::Success,
96 VI::ResultOperationFailed);
96 97
97 // We succeeded. 98 // We succeeded.
98 R_SUCCEED(); 99 R_SUCCEED();
99} 100}
100 101
101Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer, 102Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::ProcessAddress buffer,
102 u32 size) { 103 u32 size, Nvidia::DeviceFD nvmap_fd) {
103 // Assign the allocated memory to the handle. 104 // Assign the allocated memory to the handle.
104 Nvidia::Devices::nvmap::IocAllocParams alloc_params{ 105 Nvidia::Devices::nvmap::IocAllocParams alloc_params{
105 .handle = handle, 106 .handle = handle,
@@ -109,16 +110,16 @@ Result AllocNvMapHandle(Nvidia::Devices::nvmap& nvmap, u32 handle, Common::Proce
109 .kind = 0, 110 .kind = 0,
110 .address = GetInteger(buffer), 111 .address = GetInteger(buffer),
111 }; 112 };
112 R_UNLESS(nvmap.IocAlloc(alloc_params) == Nvidia::NvResult::Success, VI::ResultOperationFailed); 113 R_UNLESS(nvmap.IocAlloc(alloc_params, nvmap_fd) == Nvidia::NvResult::Success,
114 VI::ResultOperationFailed);
113 115
114 // We succeeded. 116 // We succeeded.
115 R_SUCCEED(); 117 R_SUCCEED();
116} 118}
117 119
118Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, 120Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv, Nvidia::DeviceFD nvmap_fd,
119 Common::ProcessAddress buffer, u32 size) { 121 Common::ProcessAddress buffer, u32 size) {
120 // Get the nvmap device. 122 // Get the nvmap device.
121 auto nvmap_fd = nvdrv.Open("/dev/nvmap");
122 auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd); 123 auto nvmap = nvdrv.GetDevice<Nvidia::Devices::nvmap>(nvmap_fd);
123 ASSERT(nvmap != nullptr); 124 ASSERT(nvmap != nullptr);
124 125
@@ -127,11 +128,11 @@ Result AllocateHandleForBuffer(u32* out_handle, Nvidia::Module& nvdrv,
127 128
128 // Ensure we maintain a clean state on failure. 129 // Ensure we maintain a clean state on failure.
129 ON_RESULT_FAILURE { 130 ON_RESULT_FAILURE {
130 ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle))); 131 ASSERT(R_SUCCEEDED(FreeNvMapHandle(*nvmap, *out_handle, nvmap_fd)));
131 }; 132 };
132 133
133 // Assign the allocated memory to the handle. 134 // Assign the allocated memory to the handle.
134 R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size)); 135 R_RETURN(AllocNvMapHandle(*nvmap, *out_handle, buffer, size, nvmap_fd));
135} 136}
136 137
137constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888; 138constexpr auto SharedBufferBlockLinearFormat = android::PixelFormat::Rgba8888;
@@ -197,9 +198,13 @@ Result FbShareBufferManager::Initialize(u64* out_buffer_id, u64* out_layer_id, u
197 std::addressof(m_buffer_page_group), m_system, 198 std::addressof(m_buffer_page_group), m_system,
198 SharedBufferSize)); 199 SharedBufferSize));
199 200
201 auto& container = m_nvdrv->GetContainer();
202 m_session_id = container.OpenSession(m_system.ApplicationProcess());
203 m_nvmap_fd = m_nvdrv->Open("/dev/nvmap", m_session_id);
204
200 // Create an nvmap handle for the buffer and assign the memory to it. 205 // Create an nvmap handle for the buffer and assign the memory to it.
201 R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, map_address, 206 R_TRY(AllocateHandleForBuffer(std::addressof(m_buffer_nvmap_handle), *m_nvdrv, m_nvmap_fd,
202 SharedBufferSize)); 207 map_address, SharedBufferSize));
203 208
204 // Record the display id. 209 // Record the display id.
205 m_display_id = display_id; 210 m_display_id = display_id;
diff --git a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
index c809c01b4..033bf4bbe 100644
--- a/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
+++ b/src/core/hle/service/nvnflinger/fb_share_buffer_manager.h
@@ -4,6 +4,8 @@
4#pragma once 4#pragma once
5 5
6#include "common/math_util.h" 6#include "common/math_util.h"
7#include "core/hle/service/nvdrv/core/container.h"
8#include "core/hle/service/nvdrv/nvdata.h"
7#include "core/hle/service/nvnflinger/nvnflinger.h" 9#include "core/hle/service/nvnflinger/nvnflinger.h"
8#include "core/hle/service/nvnflinger/ui/fence.h" 10#include "core/hle/service/nvnflinger/ui/fence.h"
9 11
@@ -53,7 +55,8 @@ private:
53 u64 m_layer_id = 0; 55 u64 m_layer_id = 0;
54 u32 m_buffer_nvmap_handle = 0; 56 u32 m_buffer_nvmap_handle = 0;
55 SharedMemoryPoolLayout m_pool_layout = {}; 57 SharedMemoryPoolLayout m_pool_layout = {};
56 58 Nvidia::DeviceFD m_nvmap_fd = {};
59 Nvidia::NvCore::SessionId m_session_id = {};
57 std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group; 60 std::unique_ptr<Kernel::KPageGroup> m_buffer_page_group;
58 61
59 std::mutex m_guard; 62 std::mutex m_guard;
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp
index af6591370..71d6fdb0c 100644
--- a/src/core/hle/service/nvnflinger/nvnflinger.cpp
+++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp
@@ -124,7 +124,7 @@ void Nvnflinger::ShutdownLayers() {
124 124
125void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { 125void Nvnflinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
126 nvdrv = std::move(instance); 126 nvdrv = std::move(instance);
127 disp_fd = nvdrv->Open("/dev/nvdisp_disp0"); 127 disp_fd = nvdrv->Open("/dev/nvdisp_disp0", {});
128} 128}
129 129
130std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) { 130std::optional<u64> Nvnflinger::OpenDisplay(std::string_view name) {
diff --git a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
index ce70946ec..ede2a1193 100644
--- a/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
+++ b/src/core/hle/service/nvnflinger/ui/graphic_buffer.cpp
@@ -22,11 +22,13 @@ GraphicBuffer::GraphicBuffer(Service::Nvidia::NvCore::NvMap& nvmap,
22 : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) { 22 : NvGraphicBuffer(GetBuffer(buffer)), m_nvmap(std::addressof(nvmap)) {
23 if (this->BufferId() > 0) { 23 if (this->BufferId() > 0) {
24 m_nvmap->DuplicateHandle(this->BufferId(), true); 24 m_nvmap->DuplicateHandle(this->BufferId(), true);
25 m_nvmap->PinHandle(this->BufferId(), false);
25 } 26 }
26} 27}
27 28
28GraphicBuffer::~GraphicBuffer() { 29GraphicBuffer::~GraphicBuffer() {
29 if (m_nvmap != nullptr && this->BufferId() > 0) { 30 if (m_nvmap != nullptr && this->BufferId() > 0) {
31 m_nvmap->UnpinHandle(this->BufferId());
30 m_nvmap->FreeHandle(this->BufferId(), true); 32 m_nvmap->FreeHandle(this->BufferId(), true);
31 } 33 }
32} 34}
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 8176a41be..1c218566f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -24,6 +24,8 @@
24#include "core/hle/kernel/k_process.h" 24#include "core/hle/kernel/k_process.h"
25#include "core/memory.h" 25#include "core/memory.h"
26#include "video_core/gpu.h" 26#include "video_core/gpu.h"
27#include "video_core/host1x/gpu_device_memory_manager.h"
28#include "video_core/host1x/host1x.h"
27#include "video_core/rasterizer_download_area.h" 29#include "video_core/rasterizer_download_area.h"
28 30
29namespace Core::Memory { 31namespace Core::Memory {
@@ -637,17 +639,6 @@ struct Memory::Impl {
637 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), 639 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
638 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); 640 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
639 641
640 // During boot, current_page_table might not be set yet, in which case we need not flush
641 if (system.IsPoweredOn()) {
642 auto& gpu = system.GPU();
643 for (u64 i = 0; i < size; i++) {
644 const auto page = base + i;
645 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
646 gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
647 }
648 }
649 }
650
651 const auto end = base + size; 642 const auto end = base + size;
652 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 643 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
653 base + page_table.pointers.size()); 644 base + page_table.pointers.size());
@@ -811,21 +802,33 @@ struct Memory::Impl {
811 return true; 802 return true;
812 } 803 }
813 804
814 void HandleRasterizerDownload(VAddr address, size_t size) { 805 void HandleRasterizerDownload(VAddr v_address, size_t size) {
806 const auto* p = GetPointerImpl(
807 v_address, []() {}, []() {});
808 if (!gpu_device_memory) [[unlikely]] {
809 gpu_device_memory = &system.Host1x().MemoryManager();
810 }
815 const size_t core = system.GetCurrentHostThreadID(); 811 const size_t core = system.GetCurrentHostThreadID();
816 auto& current_area = rasterizer_read_areas[core]; 812 auto& current_area = rasterizer_read_areas[core];
817 const VAddr end_address = address + size; 813 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
818 if (current_area.start_address <= address && end_address <= current_area.end_address) 814 const DAddr end_address = address + size;
819 [[likely]] { 815 if (current_area.start_address <= address && end_address <= current_area.end_address)
820 return; 816 [[likely]] {
821 } 817 return;
822 current_area = system.GPU().OnCPURead(address, size); 818 }
819 current_area = system.GPU().OnCPURead(address, size);
820 });
823 } 821 }
824 822
825 void HandleRasterizerWrite(VAddr address, size_t size) { 823 void HandleRasterizerWrite(VAddr v_address, size_t size) {
824 const auto* p = GetPointerImpl(
825 v_address, []() {}, []() {});
826 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; 826 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
827 const size_t core = std::min(system.GetCurrentHostThreadID(), 827 const size_t core = std::min(system.GetCurrentHostThreadID(),
828 sys_core); // any other calls threads go to syscore. 828 sys_core); // any other calls threads go to syscore.
829 if (!gpu_device_memory) [[unlikely]] {
830 gpu_device_memory = &system.Host1x().MemoryManager();
831 }
829 // Guard on sys_core; 832 // Guard on sys_core;
830 if (core == sys_core) [[unlikely]] { 833 if (core == sys_core) [[unlikely]] {
831 sys_core_guard.lock(); 834 sys_core_guard.lock();
@@ -835,36 +838,53 @@ struct Memory::Impl {
835 sys_core_guard.unlock(); 838 sys_core_guard.unlock();
836 } 839 }
837 }); 840 });
838 auto& current_area = rasterizer_write_areas[core]; 841 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
839 VAddr subaddress = address >> YUZU_PAGEBITS; 842 auto& current_area = rasterizer_write_areas[core];
840 bool do_collection = current_area.last_address == subaddress; 843 PAddr subaddress = address >> YUZU_PAGEBITS;
841 if (!do_collection) [[unlikely]] { 844 bool do_collection = current_area.last_address == subaddress;
842 do_collection = system.GPU().OnCPUWrite(address, size); 845 if (!do_collection) [[unlikely]] {
843 if (!do_collection) { 846 do_collection = system.GPU().OnCPUWrite(address, size);
844 return; 847 if (!do_collection) {
848 return;
849 }
850 current_area.last_address = subaddress;
845 } 851 }
846 current_area.last_address = subaddress; 852 gpu_dirty_managers[core].Collect(address, size);
847 } 853 });
848 gpu_dirty_managers[core].Collect(address, size);
849 } 854 }
850 855
851 struct GPUDirtyState { 856 struct GPUDirtyState {
852 VAddr last_address; 857 PAddr last_address;
853 }; 858 };
854 859
855 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { 860 void InvalidateGPUMemory(u8* p, size_t size) {
856 system.GPU().InvalidateRegion(GetInteger(dest_addr), size); 861 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
857 } 862 const size_t core = std::min(system.GetCurrentHostThreadID(),
858 863 sys_core); // any other calls threads go to syscore.
859 void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { 864 if (!gpu_device_memory) [[unlikely]] {
860 system.GPU().FlushRegion(GetInteger(dest_addr), size); 865 gpu_device_memory = &system.Host1x().MemoryManager();
866 }
867 // Guard on sys_core;
868 if (core == sys_core) [[unlikely]] {
869 sys_core_guard.lock();
870 }
871 SCOPE_EXIT({
872 if (core == sys_core) [[unlikely]] {
873 sys_core_guard.unlock();
874 }
875 });
876 auto& gpu = system.GPU();
877 gpu_device_memory->ApplyOpOnPointer(
878 p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
861 } 879 }
862 880
863 Core::System& system; 881 Core::System& system;
882 Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
864 Common::PageTable* current_page_table = nullptr; 883 Common::PageTable* current_page_table = nullptr;
865 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> 884 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
866 rasterizer_read_areas{}; 885 rasterizer_read_areas{};
867 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; 886 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
887 std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{};
868 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; 888 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
869 std::mutex sys_core_guard; 889 std::mutex sys_core_guard;
870 890
@@ -1059,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
1059 impl->MarkRegionDebug(GetInteger(vaddr), size, debug); 1079 impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
1060} 1080}
1061 1081
1062void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
1063 impl->InvalidateRegion(dest_addr, size);
1064}
1065
1066void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
1067 impl->FlushRegion(dest_addr, size);
1068}
1069
1070bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { 1082bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1071 [[maybe_unused]] bool mapped = true; 1083 [[maybe_unused]] bool mapped = true;
1072 [[maybe_unused]] bool rasterizer = false; 1084 [[maybe_unused]] bool rasterizer = false;
@@ -1078,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1078 GetInteger(vaddr)); 1090 GetInteger(vaddr));
1079 mapped = false; 1091 mapped = false;
1080 }, 1092 },
1081 [&] { 1093 [&] { rasterizer = true; });
1082 impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); 1094 if (rasterizer) {
1083 rasterizer = true; 1095 impl->InvalidateGPUMemory(ptr, size);
1084 }); 1096 }
1085 1097
1086#ifdef __linux__ 1098#ifdef __linux__
1087 if (!rasterizer && mapped) { 1099 if (!rasterizer && mapped) {
diff --git a/src/core/memory.h b/src/core/memory.h
index dddfaf4a4..f7e6b297f 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -12,6 +12,7 @@
12 12
13#include "common/scratch_buffer.h" 13#include "common/scratch_buffer.h"
14#include "common/typed_address.h" 14#include "common/typed_address.h"
15#include "core/guest_memory.h"
15#include "core/hle/result.h" 16#include "core/hle/result.h"
16 17
17namespace Common { 18namespace Common {
@@ -486,10 +487,10 @@ public:
486 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 487 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
487 488
488 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 489 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
489 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); 490
490 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); 491 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
492
491 bool InvalidateSeparateHeap(void* fault_address); 493 bool InvalidateSeparateHeap(void* fault_address);
492 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
493 494
494private: 495private:
495 Core::System& system; 496 Core::System& system;
@@ -498,209 +499,9 @@ private:
498 std::unique_ptr<Impl> impl; 499 std::unique_ptr<Impl> impl;
499}; 500};
500 501
501enum GuestMemoryFlags : u32 {
502 Read = 1 << 0,
503 Write = 1 << 1,
504 Safe = 1 << 2,
505 Cached = 1 << 3,
506
507 SafeRead = Read | Safe,
508 SafeWrite = Write | Safe,
509 SafeReadWrite = SafeRead | SafeWrite,
510 SafeReadCachedWrite = SafeReadWrite | Cached,
511
512 UnsafeRead = Read,
513 UnsafeWrite = Write,
514 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
515 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
516};
517
518namespace {
519template <typename M, typename T, GuestMemoryFlags FLAGS>
520class GuestMemory {
521 using iterator = T*;
522 using const_iterator = const T*;
523 using value_type = T;
524 using element_type = T;
525 using iterator_category = std::contiguous_iterator_tag;
526
527public:
528 GuestMemory() = delete;
529 explicit GuestMemory(M& memory, u64 addr, std::size_t size,
530 Common::ScratchBuffer<T>* backup = nullptr)
531 : m_memory{memory}, m_addr{addr}, m_size{size} {
532 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
533 if constexpr (FLAGS & GuestMemoryFlags::Read) {
534 Read(addr, size, backup);
535 }
536 }
537
538 ~GuestMemory() = default;
539
540 T* data() noexcept {
541 return m_data_span.data();
542 }
543
544 const T* data() const noexcept {
545 return m_data_span.data();
546 }
547
548 size_t size() const noexcept {
549 return m_size;
550 }
551
552 size_t size_bytes() const noexcept {
553 return this->size() * sizeof(T);
554 }
555
556 [[nodiscard]] T* begin() noexcept {
557 return this->data();
558 }
559
560 [[nodiscard]] const T* begin() const noexcept {
561 return this->data();
562 }
563
564 [[nodiscard]] T* end() noexcept {
565 return this->data() + this->size();
566 }
567
568 [[nodiscard]] const T* end() const noexcept {
569 return this->data() + this->size();
570 }
571
572 T& operator[](size_t index) noexcept {
573 return m_data_span[index];
574 }
575
576 const T& operator[](size_t index) const noexcept {
577 return m_data_span[index];
578 }
579
580 void SetAddressAndSize(u64 addr, std::size_t size) noexcept {
581 m_addr = addr;
582 m_size = size;
583 m_addr_changed = true;
584 }
585
586 std::span<T> Read(u64 addr, std::size_t size,
587 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
588 m_addr = addr;
589 m_size = size;
590 if (m_size == 0) {
591 m_is_data_copy = true;
592 return {};
593 }
594
595 if (this->TrySetSpan()) {
596 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
597 m_memory.FlushRegion(m_addr, this->size_bytes());
598 }
599 } else {
600 if (backup) {
601 backup->resize_destructive(this->size());
602 m_data_span = *backup;
603 } else {
604 m_data_copy.resize(this->size());
605 m_data_span = std::span(m_data_copy);
606 }
607 m_is_data_copy = true;
608 m_span_valid = true;
609 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
610 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes());
611 } else {
612 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes());
613 }
614 }
615 return m_data_span;
616 }
617
618 void Write(std::span<T> write_data) noexcept {
619 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
620 m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes());
621 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
622 m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes());
623 } else {
624 m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes());
625 }
626 }
627
628 bool TrySetSpan() noexcept {
629 if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) {
630 m_data_span = {reinterpret_cast<T*>(ptr), this->size()};
631 m_span_valid = true;
632 return true;
633 }
634 return false;
635 }
636
637protected:
638 bool IsDataCopy() const noexcept {
639 return m_is_data_copy;
640 }
641
642 bool AddressChanged() const noexcept {
643 return m_addr_changed;
644 }
645
646 M& m_memory;
647 u64 m_addr{};
648 size_t m_size{};
649 std::span<T> m_data_span{};
650 std::vector<T> m_data_copy{};
651 bool m_span_valid{false};
652 bool m_is_data_copy{false};
653 bool m_addr_changed{false};
654};
655
656template <typename M, typename T, GuestMemoryFlags FLAGS>
657class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
658public:
659 GuestMemoryScoped() = delete;
660 explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size,
661 Common::ScratchBuffer<T>* backup = nullptr)
662 : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) {
663 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
664 if (!this->TrySetSpan()) {
665 if (backup) {
666 this->m_data_span = *backup;
667 this->m_span_valid = true;
668 this->m_is_data_copy = true;
669 }
670 }
671 }
672 }
673
674 ~GuestMemoryScoped() {
675 if constexpr (FLAGS & GuestMemoryFlags::Write) {
676 if (this->size() == 0) [[unlikely]] {
677 return;
678 }
679
680 if (this->AddressChanged() || this->IsDataCopy()) {
681 ASSERT(this->m_span_valid);
682 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
683 this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes());
684 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
685 this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes());
686 } else {
687 this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes());
688 }
689 } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) ||
690 (FLAGS & GuestMemoryFlags::Cached)) {
691 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes());
692 }
693 }
694 }
695};
696} // namespace
697
698template <typename T, GuestMemoryFlags FLAGS> 502template <typename T, GuestMemoryFlags FLAGS>
699using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; 503using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>;
700template <typename T, GuestMemoryFlags FLAGS> 504template <typename T, GuestMemoryFlags FLAGS>
701using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; 505using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>;
702template <typename T, GuestMemoryFlags FLAGS> 506
703using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
704template <typename T, GuestMemoryFlags FLAGS>
705using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
706} // namespace Core::Memory 507} // namespace Core::Memory
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp
index 618793668..0e559a590 100644
--- a/src/tests/video_core/memory_tracker.cpp
+++ b/src/tests/video_core/memory_tracker.cpp
@@ -24,9 +24,8 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
24class RasterizerInterface { 24class RasterizerInterface {
25public: 25public:
26 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { 26 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
27 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; 27 const u64 page_start{addr >> Core::DEVICE_PAGEBITS};
28 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> 28 const u64 page_end{(addr + size + Core::DEVICE_PAGESIZE - 1) >> Core::DEVICE_PAGEBITS};
29 Core::Memory::YUZU_PAGEBITS};
30 for (u64 page = page_start; page < page_end; ++page) { 29 for (u64 page = page_start; page < page_end; ++page) {
31 int& value = page_table[page]; 30 int& value = page_table[page];
32 value += delta; 31 value += delta;
@@ -40,7 +39,7 @@ public:
40 } 39 }
41 40
42 [[nodiscard]] int Count(VAddr addr) const noexcept { 41 [[nodiscard]] int Count(VAddr addr) const noexcept {
43 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); 42 const auto it = page_table.find(addr >> Core::DEVICE_PAGEBITS);
44 return it == page_table.end() ? 0 : it->second; 43 return it == page_table.end() ? 0 : it->second;
45 } 44 }
46 45
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c22c7631c..5ed0ad0ed 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -71,6 +71,8 @@ add_library(video_core STATIC
71 host1x/ffmpeg/ffmpeg.h 71 host1x/ffmpeg/ffmpeg.h
72 host1x/control.cpp 72 host1x/control.cpp
73 host1x/control.h 73 host1x/control.h
74 host1x/gpu_device_memory_manager.cpp
75 host1x/gpu_device_memory_manager.h
74 host1x/host1x.cpp 76 host1x/host1x.cpp
75 host1x/host1x.h 77 host1x/host1x.h
76 host1x/nvdec.cpp 78 host1x/nvdec.cpp
@@ -93,6 +95,7 @@ add_library(video_core STATIC
93 gpu.h 95 gpu.h
94 gpu_thread.cpp 96 gpu_thread.cpp
95 gpu_thread.h 97 gpu_thread.h
98 guest_memory.h
96 invalidation_accumulator.h 99 invalidation_accumulator.h
97 memory_manager.cpp 100 memory_manager.cpp
98 memory_manager.h 101 memory_manager.h
@@ -105,8 +108,6 @@ add_library(video_core STATIC
105 query_cache/query_stream.h 108 query_cache/query_stream.h
106 query_cache/types.h 109 query_cache/types.h
107 query_cache.h 110 query_cache.h
108 rasterizer_accelerated.cpp
109 rasterizer_accelerated.h
110 rasterizer_interface.h 111 rasterizer_interface.h
111 renderer_base.cpp 112 renderer_base.cpp
112 renderer_base.h 113 renderer_base.h
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 0bb3bf8ae..40e98e395 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -33,13 +33,12 @@ struct NullBufferParams {};
33 * 33 *
34 * The buffer size and address is forcefully aligned to CPU page boundaries. 34 * The buffer size and address is forcefully aligned to CPU page boundaries.
35 */ 35 */
36template <class RasterizerInterface>
37class BufferBase { 36class BufferBase {
38public: 37public:
39 static constexpr u64 BASE_PAGE_BITS = 16; 38 static constexpr u64 BASE_PAGE_BITS = 16;
40 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
41 40
42 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) 41 explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_)
43 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} 42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
44 43
45 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 6d1fc3887..b4bf369d1 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -8,16 +8,16 @@
8#include <numeric> 8#include <numeric>
9 9
10#include "video_core/buffer_cache/buffer_cache_base.h" 10#include "video_core/buffer_cache/buffer_cache_base.h"
11#include "video_core/guest_memory.h"
12#include "video_core/host1x/gpu_device_memory_manager.h"
11 13
12namespace VideoCommon { 14namespace VideoCommon {
13 15
14using Core::Memory::YUZU_PAGESIZE; 16using Core::DEVICE_PAGESIZE;
15 17
16template <class P> 18template <class P>
17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 19BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 20 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
21 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 23 common_ranges.clear();
@@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
29 return; 29 return;
30 } 30 }
31 31
32 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 32 const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
33 const s64 min_spacing_expected = device_memory - 1_GiB; 33 const s64 min_spacing_expected = device_local_memory - 1_GiB;
34 const s64 min_spacing_critical = device_memory - 512_MiB; 34 const s64 min_spacing_critical = device_local_memory - 512_MiB;
35 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 35 const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
38 minimum_memory = static_cast<u64>( 38 minimum_memory = static_cast<u64>(
39 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), 39 std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
40 DEFAULT_EXPECTED_MEMORY)); 40 DEFAULT_EXPECTED_MEMORY));
41 critical_memory = static_cast<u64>( 41 critical_memory = static_cast<u64>(
42 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), 42 std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
43 DEFAULT_CRITICAL_MEMORY)); 43 DEFAULT_CRITICAL_MEMORY));
44} 44}
45 45
@@ -105,71 +105,71 @@ void BufferCache<P>::TickFrame() {
105} 105}
106 106
107template <class P> 107template <class P>
108void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 108void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
109 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 109 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
110 const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; 110 const IntervalType subtract_interval{device_addr, device_addr + size};
111 ClearDownload(subtract_interval); 111 ClearDownload(subtract_interval);
112 common_ranges.subtract(subtract_interval); 112 common_ranges.subtract(subtract_interval);
113 } 113 }
114 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); 114 memory_tracker.MarkRegionAsCpuModified(device_addr, size);
115} 115}
116 116
117template <class P> 117template <class P>
118void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 118void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) {
119 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 119 const bool is_dirty = IsRegionRegistered(device_addr, size);
120 if (!is_dirty) { 120 if (!is_dirty) {
121 return; 121 return;
122 } 122 }
123 VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); 123 DAddr aligned_start = Common::AlignDown(device_addr, DEVICE_PAGESIZE);
124 VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); 124 DAddr aligned_end = Common::AlignUp(device_addr + size, DEVICE_PAGESIZE);
125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
126 WriteMemory(cpu_addr, size); 126 WriteMemory(device_addr, size);
127 return; 127 return;
128 } 128 }
129 129
130 tmp_buffer.resize_destructive(size); 130 tmp_buffer.resize_destructive(size);
131 cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); 131 device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size);
132 132
133 InlineMemoryImplementation(cpu_addr, size, tmp_buffer); 133 InlineMemoryImplementation(device_addr, size, tmp_buffer);
134} 134}
135 135
136template <class P> 136template <class P>
137bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { 137bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) {
138 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 138 const bool is_dirty = IsRegionRegistered(device_addr, size);
139 if (!is_dirty) { 139 if (!is_dirty) {
140 return false; 140 return false;
141 } 141 }
142 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 142 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
143 return true; 143 return true;
144 } 144 }
145 WriteMemory(cpu_addr, size); 145 WriteMemory(device_addr, size);
146 return false; 146 return false;
147} 147}
148 148
149template <class P> 149template <class P>
150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, 150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr,
151 u64 size) { 151 u64 size) {
152 std::optional<VideoCore::RasterizerDownloadArea> area{}; 152 std::optional<VideoCore::RasterizerDownloadArea> area{};
153 area.emplace(); 153 area.emplace();
154 VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); 154 DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::DEVICE_PAGESIZE);
155 VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 155 DAddr device_addr_end_aligned = Common::AlignUp(device_addr + size, Core::DEVICE_PAGESIZE);
156 area->start_address = cpu_addr_start_aligned; 156 area->start_address = device_addr_start_aligned;
157 area->end_address = cpu_addr_end_aligned; 157 area->end_address = device_addr_end_aligned;
158 if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { 158 if (memory_tracker.IsRegionPreflushable(device_addr, size)) {
159 area->preemtive = true; 159 area->preemtive = true;
160 return area; 160 return area;
161 }; 161 };
162 area->preemtive = 162 area->preemtive = !IsRegionGpuModified(device_addr_start_aligned,
163 !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); 163 device_addr_end_aligned - device_addr_start_aligned);
164 memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, 164 memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned,
165 cpu_addr_end_aligned - cpu_addr_start_aligned); 165 device_addr_end_aligned - device_addr_start_aligned);
166 return area; 166 return area;
167} 167}
168 168
169template <class P> 169template <class P>
170void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 170void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
171 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 171 ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) {
172 DownloadBufferMemory(buffer, cpu_addr, size); 172 DownloadBufferMemory(buffer, device_addr, size);
173 }); 173 });
174} 174}
175 175
@@ -184,8 +184,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
184 184
185template <class P> 185template <class P>
186bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 186bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
187 const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); 187 const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
188 const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); 188 const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
189 if (!cpu_src_address || !cpu_dest_address) { 189 if (!cpu_src_address || !cpu_dest_address) {
190 return false; 190 return false;
191 } 191 }
@@ -216,10 +216,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
216 }}; 216 }};
217 217
218 boost::container::small_vector<IntervalType, 4> tmp_intervals; 218 boost::container::small_vector<IntervalType, 4> tmp_intervals;
219 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 219 auto mirror = [&](DAddr base_address, DAddr base_address_end) {
220 const u64 size = base_address_end - base_address; 220 const u64 size = base_address_end - base_address;
221 const VAddr diff = base_address - *cpu_src_address; 221 const DAddr diff = base_address - *cpu_src_address;
222 const VAddr new_base_address = *cpu_dest_address + diff; 222 const DAddr new_base_address = *cpu_dest_address + diff;
223 const IntervalType add_interval{new_base_address, new_base_address + size}; 223 const IntervalType add_interval{new_base_address, new_base_address + size};
224 tmp_intervals.push_back(add_interval); 224 tmp_intervals.push_back(add_interval);
225 uncommitted_ranges.add(add_interval); 225 uncommitted_ranges.add(add_interval);
@@ -239,15 +239,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
240 } 240 }
241 241
242 Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( 242 Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite>
243 cpu_memory, *cpu_src_address, amount, &tmp_buffer); 243 tmp(device_memory, *cpu_src_address, amount, &tmp_buffer);
244 tmp.SetAddressAndSize(*cpu_dest_address, amount); 244 tmp.SetAddressAndSize(*cpu_dest_address, amount);
245 return true; 245 return true;
246} 246}
247 247
248template <class P> 248template <class P>
249bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { 249bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
250 const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); 250 const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
251 if (!cpu_dst_address) { 251 if (!cpu_dst_address) {
252 return false; 252 return false;
253 } 253 }
@@ -273,23 +273,23 @@ template <class P>
273std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, 273std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
274 ObtainBufferSynchronize sync_info, 274 ObtainBufferSynchronize sync_info,
275 ObtainBufferOperation post_op) { 275 ObtainBufferOperation post_op) {
276 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 276 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
277 if (!cpu_addr) { 277 if (!device_addr) {
278 return {&slot_buffers[NULL_BUFFER_ID], 0}; 278 return {&slot_buffers[NULL_BUFFER_ID], 0};
279 } 279 }
280 return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); 280 return ObtainCPUBuffer(*device_addr, size, sync_info, post_op);
281} 281}
282 282
283template <class P> 283template <class P>
284std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( 284std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
285 VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { 285 DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
286 const BufferId buffer_id = FindBuffer(cpu_addr, size); 286 const BufferId buffer_id = FindBuffer(device_addr, size);
287 Buffer& buffer = slot_buffers[buffer_id]; 287 Buffer& buffer = slot_buffers[buffer_id];
288 288
289 // synchronize op 289 // synchronize op
290 switch (sync_info) { 290 switch (sync_info) {
291 case ObtainBufferSynchronize::FullSynchronize: 291 case ObtainBufferSynchronize::FullSynchronize:
292 SynchronizeBuffer(buffer, cpu_addr, size); 292 SynchronizeBuffer(buffer, device_addr, size);
293 break; 293 break;
294 default: 294 default:
295 break; 295 break;
@@ -297,12 +297,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
297 297
298 switch (post_op) { 298 switch (post_op) {
299 case ObtainBufferOperation::MarkAsWritten: 299 case ObtainBufferOperation::MarkAsWritten:
300 MarkWrittenBuffer(buffer_id, cpu_addr, size); 300 MarkWrittenBuffer(buffer_id, device_addr, size);
301 break; 301 break;
302 case ObtainBufferOperation::DiscardWrite: { 302 case ObtainBufferOperation::DiscardWrite: {
303 VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); 303 DAddr device_addr_start = Common::AlignDown(device_addr, 64);
304 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); 304 DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
305 IntervalType interval{cpu_addr_start, cpu_addr_end}; 305 IntervalType interval{device_addr_start, device_addr_end};
306 ClearDownload(interval); 306 ClearDownload(interval);
307 common_ranges.subtract(interval); 307 common_ranges.subtract(interval);
308 break; 308 break;
@@ -311,15 +311,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
311 break; 311 break;
312 } 312 }
313 313
314 return {&buffer, buffer.Offset(cpu_addr)}; 314 return {&buffer, buffer.Offset(device_addr)};
315} 315}
316 316
317template <class P> 317template <class P>
318void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 318void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
319 u32 size) { 319 u32 size) {
320 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 320 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
321 const Binding binding{ 321 const Binding binding{
322 .cpu_addr = *cpu_addr, 322 .device_addr = *device_addr,
323 .size = size, 323 .size = size,
324 .buffer_id = BufferId{}, 324 .buffer_id = BufferId{},
325 }; 325 };
@@ -555,16 +555,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
555 for (const IntervalSet& intervals : committed_ranges) { 555 for (const IntervalSet& intervals : committed_ranges) {
556 for (auto& interval : intervals) { 556 for (auto& interval : intervals) {
557 const std::size_t size = interval.upper() - interval.lower(); 557 const std::size_t size = interval.upper() - interval.lower();
558 const VAddr cpu_addr = interval.lower(); 558 const DAddr device_addr = interval.lower();
559 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 559 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
560 const VAddr buffer_start = buffer.CpuAddr(); 560 const DAddr buffer_start = buffer.CpuAddr();
561 const VAddr buffer_end = buffer_start + buffer.SizeBytes(); 561 const DAddr buffer_end = buffer_start + buffer.SizeBytes();
562 const VAddr new_start = std::max(buffer_start, cpu_addr); 562 const DAddr new_start = std::max(buffer_start, device_addr);
563 const VAddr new_end = std::min(buffer_end, cpu_addr + size); 563 const DAddr new_end = std::min(buffer_end, device_addr + size);
564 memory_tracker.ForEachDownloadRange( 564 memory_tracker.ForEachDownloadRange(
565 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { 565 new_start, new_end - new_start, false,
566 const VAddr buffer_addr = buffer.CpuAddr(); 566 [&](u64 device_addr_out, u64 range_size) {
567 const auto add_download = [&](VAddr start, VAddr end) { 567 const DAddr buffer_addr = buffer.CpuAddr();
568 const auto add_download = [&](DAddr start, DAddr end) {
568 const u64 new_offset = start - buffer_addr; 569 const u64 new_offset = start - buffer_addr;
569 const u64 new_size = end - start; 570 const u64 new_size = end - start;
570 downloads.push_back({ 571 downloads.push_back({
@@ -582,7 +583,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
582 largest_copy = std::max(largest_copy, new_size); 583 largest_copy = std::max(largest_copy, new_size);
583 }; 584 };
584 585
585 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); 586 ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download);
586 }); 587 });
587 }); 588 });
588 } 589 }
@@ -605,8 +606,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
605 BufferCopy second_copy{copy}; 606 BufferCopy second_copy{copy};
606 Buffer& buffer = slot_buffers[buffer_id]; 607 Buffer& buffer = slot_buffers[buffer_id];
607 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 608 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
608 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 609 DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
609 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 610 const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size};
610 async_downloads += std::make_pair(base_interval, 1); 611 async_downloads += std::make_pair(base_interval, 1);
611 buffer.MarkUsage(copy.src_offset, copy.size); 612 buffer.MarkUsage(copy.src_offset, copy.size);
612 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 613 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
@@ -635,11 +636,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
635 runtime.Finish(); 636 runtime.Finish();
636 for (const auto& [copy, buffer_id] : downloads) { 637 for (const auto& [copy, buffer_id] : downloads) {
637 const Buffer& buffer = slot_buffers[buffer_id]; 638 const Buffer& buffer = slot_buffers[buffer_id];
638 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 639 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
639 // Undo the modified offset 640 // Undo the modified offset
640 const u64 dst_offset = copy.dst_offset - download_staging.offset; 641 const u64 dst_offset = copy.dst_offset - download_staging.offset;
641 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 642 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
642 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 643 device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
643 } 644 }
644 } else { 645 } else {
645 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 646 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
@@ -647,8 +648,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
647 Buffer& buffer = slot_buffers[buffer_id]; 648 Buffer& buffer = slot_buffers[buffer_id];
648 buffer.ImmediateDownload(copy.src_offset, 649 buffer.ImmediateDownload(copy.src_offset,
649 immediate_buffer.subspan(0, copy.size)); 650 immediate_buffer.subspan(0, copy.size));
650 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 651 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
651 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 652 device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
652 } 653 }
653 } 654 }
654 } 655 }
@@ -681,19 +682,19 @@ void BufferCache<P>::PopAsyncBuffers() {
681 u8* base = async_buffer->mapped_span.data(); 682 u8* base = async_buffer->mapped_span.data();
682 const size_t base_offset = async_buffer->offset; 683 const size_t base_offset = async_buffer->offset;
683 for (const auto& copy : downloads) { 684 for (const auto& copy : downloads) {
684 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); 685 const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
685 const u64 dst_offset = copy.dst_offset - base_offset; 686 const u64 dst_offset = copy.dst_offset - base_offset;
686 const u8* read_mapped_memory = base + dst_offset; 687 const u8* read_mapped_memory = base + dst_offset;
687 ForEachInOverlapCounter( 688 ForEachInOverlapCounter(
688 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { 689 async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) {
689 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], 690 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
690 end - start); 691 end - start);
691 if (count == 1) { 692 if (count == 1) {
692 const IntervalType base_interval{start, end}; 693 const IntervalType base_interval{start, end};
693 common_ranges.subtract(base_interval); 694 common_ranges.subtract(base_interval);
694 } 695 }
695 }); 696 });
696 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; 697 const IntervalType subtract_interval{device_addr, device_addr + copy.size};
697 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); 698 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
698 } 699 }
699 async_buffers_death_ring.emplace_back(*async_buffer); 700 async_buffers_death_ring.emplace_back(*async_buffer);
@@ -703,15 +704,15 @@ void BufferCache<P>::PopAsyncBuffers() {
703} 704}
704 705
705template <class P> 706template <class P>
706bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 707bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
707 bool is_dirty = false; 708 bool is_dirty = false;
708 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); 709 ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; });
709 return is_dirty; 710 return is_dirty;
710} 711}
711 712
712template <class P> 713template <class P>
713bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 714bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) {
714 const VAddr end_addr = addr + size; 715 const DAddr end_addr = addr + size;
715 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); 716 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
716 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { 717 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
717 const BufferId buffer_id = page_table[page]; 718 const BufferId buffer_id = page_table[page];
@@ -720,8 +721,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
720 continue; 721 continue;
721 } 722 }
722 Buffer& buffer = slot_buffers[buffer_id]; 723 Buffer& buffer = slot_buffers[buffer_id];
723 const VAddr buf_start_addr = buffer.CpuAddr(); 724 const DAddr buf_start_addr = buffer.CpuAddr();
724 const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); 725 const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
725 if (buf_start_addr < end_addr && addr < buf_end_addr) { 726 if (buf_start_addr < end_addr && addr < buf_end_addr) {
726 return true; 727 return true;
727 } 728 }
@@ -731,7 +732,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
731} 732}
732 733
733template <class P> 734template <class P>
734bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 735bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) {
735 return memory_tracker.IsRegionCpuModified(addr, size); 736 return memory_tracker.IsRegionCpuModified(addr, size);
736} 737}
737 738
@@ -739,7 +740,7 @@ template <class P>
739void BufferCache<P>::BindHostIndexBuffer() { 740void BufferCache<P>::BindHostIndexBuffer() {
740 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; 741 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
741 TouchBuffer(buffer, channel_state->index_buffer.buffer_id); 742 TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
742 const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); 743 const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
743 const u32 size = channel_state->index_buffer.size; 744 const u32 size = channel_state->index_buffer.size;
744 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 745 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
745 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { 746 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
@@ -754,7 +755,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
754 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 755 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
755 } 756 }
756 } else { 757 } else {
757 SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); 758 SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size);
758 } 759 }
759 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 760 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
760 const u32 new_offset = 761 const u32 new_offset =
@@ -777,7 +778,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
777 const Binding& binding = channel_state->vertex_buffers[index]; 778 const Binding& binding = channel_state->vertex_buffers[index];
778 Buffer& buffer = slot_buffers[binding.buffer_id]; 779 Buffer& buffer = slot_buffers[binding.buffer_id];
779 TouchBuffer(buffer, binding.buffer_id); 780 TouchBuffer(buffer, binding.buffer_id);
780 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 781 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
781 if (!flags[Dirty::VertexBuffer0 + index]) { 782 if (!flags[Dirty::VertexBuffer0 + index]) {
782 continue; 783 continue;
783 } 784 }
@@ -797,7 +798,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
797 Buffer& buffer = slot_buffers[binding.buffer_id]; 798 Buffer& buffer = slot_buffers[binding.buffer_id];
798 799
799 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 800 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
800 const u32 offset = buffer.Offset(binding.cpu_addr); 801 const u32 offset = buffer.Offset(binding.device_addr);
801 buffer.MarkUsage(offset, binding.size); 802 buffer.MarkUsage(offset, binding.size);
802 803
803 host_bindings.buffers.push_back(&buffer); 804 host_bindings.buffers.push_back(&buffer);
@@ -814,7 +815,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
814 const auto bind_buffer = [this](const Binding& binding) { 815 const auto bind_buffer = [this](const Binding& binding) {
815 Buffer& buffer = slot_buffers[binding.buffer_id]; 816 Buffer& buffer = slot_buffers[binding.buffer_id];
816 TouchBuffer(buffer, binding.buffer_id); 817 TouchBuffer(buffer, binding.buffer_id);
817 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 818 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
818 }; 819 };
819 if (current_draw_indirect->include_count) { 820 if (current_draw_indirect->include_count) {
820 bind_buffer(channel_state->count_buffer_binding); 821 bind_buffer(channel_state->count_buffer_binding);
@@ -842,13 +843,13 @@ template <class P>
842void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, 843void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
843 bool needs_bind) { 844 bool needs_bind) {
844 const Binding& binding = channel_state->uniform_buffers[stage][index]; 845 const Binding& binding = channel_state->uniform_buffers[stage][index];
845 const VAddr cpu_addr = binding.cpu_addr; 846 const DAddr device_addr = binding.device_addr;
846 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); 847 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
847 Buffer& buffer = slot_buffers[binding.buffer_id]; 848 Buffer& buffer = slot_buffers[binding.buffer_id];
848 TouchBuffer(buffer, binding.buffer_id); 849 TouchBuffer(buffer, binding.buffer_id);
849 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 850 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
850 size <= channel_state->uniform_buffer_skip_cache_size && 851 size <= channel_state->uniform_buffer_skip_cache_size &&
851 !memory_tracker.IsRegionGpuModified(cpu_addr, size); 852 !memory_tracker.IsRegionGpuModified(device_addr, size);
852 if (use_fast_buffer) { 853 if (use_fast_buffer) {
853 if constexpr (IS_OPENGL) { 854 if constexpr (IS_OPENGL) {
854 if (runtime.HasFastBufferSubData()) { 855 if (runtime.HasFastBufferSubData()) {
@@ -862,7 +863,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
862 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; 863 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
863 runtime.BindFastUniformBuffer(stage, binding_index, size); 864 runtime.BindFastUniformBuffer(stage, binding_index, size);
864 } 865 }
865 const auto span = ImmediateBufferWithData(cpu_addr, size); 866 const auto span = ImmediateBufferWithData(device_addr, size);
866 runtime.PushFastUniformBuffer(stage, binding_index, span); 867 runtime.PushFastUniformBuffer(stage, binding_index, span);
867 return; 868 return;
868 } 869 }
@@ -873,11 +874,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
873 } 874 }
874 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 875 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
875 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 876 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
876 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 877 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
877 return; 878 return;
878 } 879 }
879 // Classic cached path 880 // Classic cached path
880 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); 881 const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size);
881 if (sync_cached) { 882 if (sync_cached) {
882 ++channel_state->uniform_cache_hits[0]; 883 ++channel_state->uniform_cache_hits[0];
883 } 884 }
@@ -892,7 +893,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
892 if (!needs_bind) { 893 if (!needs_bind) {
893 return; 894 return;
894 } 895 }
895 const u32 offset = buffer.Offset(cpu_addr); 896 const u32 offset = buffer.Offset(device_addr);
896 if constexpr (IS_OPENGL) { 897 if constexpr (IS_OPENGL) {
897 // Fast buffer will be unbound 898 // Fast buffer will be unbound
898 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); 899 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
@@ -920,14 +921,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
920 Buffer& buffer = slot_buffers[binding.buffer_id]; 921 Buffer& buffer = slot_buffers[binding.buffer_id];
921 TouchBuffer(buffer, binding.buffer_id); 922 TouchBuffer(buffer, binding.buffer_id);
922 const u32 size = binding.size; 923 const u32 size = binding.size;
923 SynchronizeBuffer(buffer, binding.cpu_addr, size); 924 SynchronizeBuffer(buffer, binding.device_addr, size);
924 925
925 const u32 offset = buffer.Offset(binding.cpu_addr); 926 const u32 offset = buffer.Offset(binding.device_addr);
926 buffer.MarkUsage(offset, size); 927 buffer.MarkUsage(offset, size);
927 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; 928 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
928 929
929 if (is_written) { 930 if (is_written) {
930 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 931 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
931 } 932 }
932 933
933 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 934 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -945,14 +946,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
945 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; 946 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
946 Buffer& buffer = slot_buffers[binding.buffer_id]; 947 Buffer& buffer = slot_buffers[binding.buffer_id];
947 const u32 size = binding.size; 948 const u32 size = binding.size;
948 SynchronizeBuffer(buffer, binding.cpu_addr, size); 949 SynchronizeBuffer(buffer, binding.device_addr, size);
949 950
950 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; 951 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
951 if (is_written) { 952 if (is_written) {
952 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 953 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
953 } 954 }
954 955
955 const u32 offset = buffer.Offset(binding.cpu_addr); 956 const u32 offset = buffer.Offset(binding.device_addr);
956 const PixelFormat format = binding.format; 957 const PixelFormat format = binding.format;
957 buffer.MarkUsage(offset, size); 958 buffer.MarkUsage(offset, size);
958 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 959 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -982,11 +983,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
982 Buffer& buffer = slot_buffers[binding.buffer_id]; 983 Buffer& buffer = slot_buffers[binding.buffer_id];
983 TouchBuffer(buffer, binding.buffer_id); 984 TouchBuffer(buffer, binding.buffer_id);
984 const u32 size = binding.size; 985 const u32 size = binding.size;
985 SynchronizeBuffer(buffer, binding.cpu_addr, size); 986 SynchronizeBuffer(buffer, binding.device_addr, size);
986 987
987 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 988 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
988 989
989 const u32 offset = buffer.Offset(binding.cpu_addr); 990 const u32 offset = buffer.Offset(binding.device_addr);
990 buffer.MarkUsage(offset, size); 991 buffer.MarkUsage(offset, size);
991 host_bindings.buffers.push_back(&buffer); 992 host_bindings.buffers.push_back(&buffer);
992 host_bindings.offsets.push_back(offset); 993 host_bindings.offsets.push_back(offset);
@@ -1011,9 +1012,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1011 TouchBuffer(buffer, binding.buffer_id); 1012 TouchBuffer(buffer, binding.buffer_id);
1012 const u32 size = 1013 const u32 size =
1013 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); 1014 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
1014 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1015 SynchronizeBuffer(buffer, binding.device_addr, size);
1015 1016
1016 const u32 offset = buffer.Offset(binding.cpu_addr); 1017 const u32 offset = buffer.Offset(binding.device_addr);
1017 buffer.MarkUsage(offset, size); 1018 buffer.MarkUsage(offset, size);
1018 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1019 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
1019 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); 1020 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
@@ -1032,15 +1033,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1032 Buffer& buffer = slot_buffers[binding.buffer_id]; 1033 Buffer& buffer = slot_buffers[binding.buffer_id];
1033 TouchBuffer(buffer, binding.buffer_id); 1034 TouchBuffer(buffer, binding.buffer_id);
1034 const u32 size = binding.size; 1035 const u32 size = binding.size;
1035 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1036 SynchronizeBuffer(buffer, binding.device_addr, size);
1036 1037
1037 const u32 offset = buffer.Offset(binding.cpu_addr); 1038 const u32 offset = buffer.Offset(binding.device_addr);
1038 buffer.MarkUsage(offset, size); 1039 buffer.MarkUsage(offset, size);
1039 const bool is_written = 1040 const bool is_written =
1040 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; 1041 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
1041 1042
1042 if (is_written) { 1043 if (is_written) {
1043 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1044 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1044 } 1045 }
1045 1046
1046 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 1047 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -1058,15 +1059,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1058 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; 1059 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
1059 Buffer& buffer = slot_buffers[binding.buffer_id]; 1060 Buffer& buffer = slot_buffers[binding.buffer_id];
1060 const u32 size = binding.size; 1061 const u32 size = binding.size;
1061 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1062 SynchronizeBuffer(buffer, binding.device_addr, size);
1062 1063
1063 const bool is_written = 1064 const bool is_written =
1064 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; 1065 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
1065 if (is_written) { 1066 if (is_written) {
1066 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1067 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1067 } 1068 }
1068 1069
1069 const u32 offset = buffer.Offset(binding.cpu_addr); 1070 const u32 offset = buffer.Offset(binding.device_addr);
1070 const PixelFormat format = binding.format; 1071 const PixelFormat format = binding.format;
1071 buffer.MarkUsage(offset, size); 1072 buffer.MarkUsage(offset, size);
1072 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 1073 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1131,7 +1132,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1131 inline_buffer_id = CreateBuffer(0, buffer_size); 1132 inline_buffer_id = CreateBuffer(0, buffer_size);
1132 } 1133 }
1133 channel_state->index_buffer = Binding{ 1134 channel_state->index_buffer = Binding{
1134 .cpu_addr = 0, 1135 .device_addr = 0,
1135 .size = inline_index_size, 1136 .size = inline_index_size,
1136 .buffer_id = inline_buffer_id, 1137 .buffer_id = inline_buffer_id,
1137 }; 1138 };
@@ -1140,19 +1141,19 @@ void BufferCache<P>::UpdateIndexBuffer() {
1140 1141
1141 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); 1142 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1142 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); 1143 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1143 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1144 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1144 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1145 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1145 const u32 draw_size = 1146 const u32 draw_size =
1146 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); 1147 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1147 const u32 size = std::min(address_size, draw_size); 1148 const u32 size = std::min(address_size, draw_size);
1148 if (size == 0 || !cpu_addr) { 1149 if (size == 0 || !device_addr) {
1149 channel_state->index_buffer = NULL_BINDING; 1150 channel_state->index_buffer = NULL_BINDING;
1150 return; 1151 return;
1151 } 1152 }
1152 channel_state->index_buffer = Binding{ 1153 channel_state->index_buffer = Binding{
1153 .cpu_addr = *cpu_addr, 1154 .device_addr = *device_addr,
1154 .size = size, 1155 .size = size,
1155 .buffer_id = FindBuffer(*cpu_addr, size), 1156 .buffer_id = FindBuffer(*device_addr, size),
1156 }; 1157 };
1157} 1158}
1158 1159
@@ -1178,19 +1179,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1178 const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; 1179 const auto& limit = maxwell3d->regs.vertex_stream_limits[index];
1179 const GPUVAddr gpu_addr_begin = array.Address(); 1180 const GPUVAddr gpu_addr_begin = array.Address();
1180 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1181 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1181 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1182 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1182 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1183 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1183 u32 size = address_size; // TODO: Analyze stride and number of vertices 1184 u32 size = address_size; // TODO: Analyze stride and number of vertices
1184 if (array.enable == 0 || size == 0 || !cpu_addr) { 1185 if (array.enable == 0 || size == 0 || !device_addr) {
1185 channel_state->vertex_buffers[index] = NULL_BINDING; 1186 channel_state->vertex_buffers[index] = NULL_BINDING;
1186 return; 1187 return;
1187 } 1188 }
1188 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1189 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1189 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1190 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1190 } 1191 }
1191 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1192 const BufferId buffer_id = FindBuffer(*device_addr, size);
1192 channel_state->vertex_buffers[index] = Binding{ 1193 channel_state->vertex_buffers[index] = Binding{
1193 .cpu_addr = *cpu_addr, 1194 .device_addr = *device_addr,
1194 .size = size, 1195 .size = size,
1195 .buffer_id = buffer_id, 1196 .buffer_id = buffer_id,
1196 }; 1197 };
@@ -1199,15 +1200,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1199template <class P> 1200template <class P>
1200void BufferCache<P>::UpdateDrawIndirect() { 1201void BufferCache<P>::UpdateDrawIndirect() {
1201 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { 1202 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
1202 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1203 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1203 if (!cpu_addr) { 1204 if (!device_addr) {
1204 binding = NULL_BINDING; 1205 binding = NULL_BINDING;
1205 return; 1206 return;
1206 } 1207 }
1207 binding = Binding{ 1208 binding = Binding{
1208 .cpu_addr = *cpu_addr, 1209 .device_addr = *device_addr,
1209 .size = static_cast<u32>(size), 1210 .size = static_cast<u32>(size),
1210 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), 1211 .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)),
1211 }; 1212 };
1212 }; 1213 };
1213 if (current_draw_indirect->include_count) { 1214 if (current_draw_indirect->include_count) {
@@ -1231,7 +1232,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1231 channel_state->dirty_uniform_buffers[stage] |= 1U << index; 1232 channel_state->dirty_uniform_buffers[stage] |= 1U << index;
1232 } 1233 }
1233 // Resolve buffer 1234 // Resolve buffer
1234 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1235 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1235 }); 1236 });
1236} 1237}
1237 1238
@@ -1240,7 +1241,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1240 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { 1241 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
1241 // Resolve buffer 1242 // Resolve buffer
1242 Binding& binding = channel_state->storage_buffers[stage][index]; 1243 Binding& binding = channel_state->storage_buffers[stage][index];
1243 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1244 const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size);
1244 binding.buffer_id = buffer_id; 1245 binding.buffer_id = buffer_id;
1245 }); 1246 });
1246} 1247}
@@ -1249,7 +1250,7 @@ template <class P>
1249void BufferCache<P>::UpdateTextureBuffers(size_t stage) { 1250void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1250 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { 1251 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
1251 Binding& binding = channel_state->texture_buffers[stage][index]; 1252 Binding& binding = channel_state->texture_buffers[stage][index];
1252 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1253 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1253 }); 1254 });
1254} 1255}
1255 1256
@@ -1268,14 +1269,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1268 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; 1269 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index];
1269 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; 1270 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset;
1270 const u32 size = binding.size; 1271 const u32 size = binding.size;
1271 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1272 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1272 if (binding.enable == 0 || size == 0 || !cpu_addr) { 1273 if (binding.enable == 0 || size == 0 || !device_addr) {
1273 channel_state->transform_feedback_buffers[index] = NULL_BINDING; 1274 channel_state->transform_feedback_buffers[index] = NULL_BINDING;
1274 return; 1275 return;
1275 } 1276 }
1276 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1277 const BufferId buffer_id = FindBuffer(*device_addr, size);
1277 channel_state->transform_feedback_buffers[index] = Binding{ 1278 channel_state->transform_feedback_buffers[index] = Binding{
1278 .cpu_addr = *cpu_addr, 1279 .device_addr = *device_addr,
1279 .size = size, 1280 .size = size,
1280 .buffer_id = buffer_id, 1281 .buffer_id = buffer_id,
1281 }; 1282 };
@@ -1289,13 +1290,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1289 const auto& launch_desc = kepler_compute->launch_description; 1290 const auto& launch_desc = kepler_compute->launch_description;
1290 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1291 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
1291 const auto& cbuf = launch_desc.const_buffer_config[index]; 1292 const auto& cbuf = launch_desc.const_buffer_config[index];
1292 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); 1293 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
1293 if (cpu_addr) { 1294 if (device_addr) {
1294 binding.cpu_addr = *cpu_addr; 1295 binding.device_addr = *device_addr;
1295 binding.size = cbuf.size; 1296 binding.size = cbuf.size;
1296 } 1297 }
1297 } 1298 }
1298 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1299 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1299 }); 1300 });
1300} 1301}
1301 1302
@@ -1304,7 +1305,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1304 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { 1305 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
1305 // Resolve buffer 1306 // Resolve buffer
1306 Binding& binding = channel_state->compute_storage_buffers[index]; 1307 Binding& binding = channel_state->compute_storage_buffers[index];
1307 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1308 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1308 }); 1309 });
1309} 1310}
1310 1311
@@ -1312,45 +1313,63 @@ template <class P>
1312void BufferCache<P>::UpdateComputeTextureBuffers() { 1313void BufferCache<P>::UpdateComputeTextureBuffers() {
1313 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { 1314 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
1314 Binding& binding = channel_state->compute_texture_buffers[index]; 1315 Binding& binding = channel_state->compute_texture_buffers[index];
1315 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1316 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1316 }); 1317 });
1317} 1318}
1318 1319
1319template <class P> 1320template <class P>
1320void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1321void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
1321 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); 1322 memory_tracker.MarkRegionAsGpuModified(device_addr, size);
1322 1323
1323 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1324 const IntervalType base_interval{device_addr, device_addr + size};
1324 common_ranges.add(base_interval); 1325 common_ranges.add(base_interval);
1325 uncommitted_ranges.add(base_interval); 1326 uncommitted_ranges.add(base_interval);
1326} 1327}
1327 1328
1328template <class P> 1329template <class P>
1329BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { 1330BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) {
1330 if (cpu_addr == 0) { 1331 if (device_addr == 0) {
1331 return NULL_BUFFER_ID; 1332 return NULL_BUFFER_ID;
1332 } 1333 }
1333 const u64 page = cpu_addr >> CACHING_PAGEBITS; 1334 const u64 page = device_addr >> CACHING_PAGEBITS;
1334 const BufferId buffer_id = page_table[page]; 1335 const BufferId buffer_id = page_table[page];
1335 if (!buffer_id) { 1336 if (!buffer_id) {
1336 return CreateBuffer(cpu_addr, size); 1337 return CreateBuffer(device_addr, size);
1337 } 1338 }
1338 const Buffer& buffer = slot_buffers[buffer_id]; 1339 const Buffer& buffer = slot_buffers[buffer_id];
1339 if (buffer.IsInBounds(cpu_addr, size)) { 1340 if (buffer.IsInBounds(device_addr, size)) {
1340 return buffer_id; 1341 return buffer_id;
1341 } 1342 }
1342 return CreateBuffer(cpu_addr, size); 1343 return CreateBuffer(device_addr, size);
1343} 1344}
1344 1345
1345template <class P> 1346template <class P>
1346typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1347typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr,
1347 u32 wanted_size) { 1348 u32 wanted_size) {
1348 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1349 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1349 boost::container::small_vector<BufferId, 16> overlap_ids; 1350 boost::container::small_vector<BufferId, 16> overlap_ids;
1350 VAddr begin = cpu_addr; 1351 DAddr begin = device_addr;
1351 VAddr end = cpu_addr + wanted_size; 1352 DAddr end = device_addr + wanted_size;
1352 int stream_score = 0; 1353 int stream_score = 0;
1353 bool has_stream_leap = false; 1354 bool has_stream_leap = false;
1355 auto expand_begin = [&](DAddr add_value) {
1356 static constexpr DAddr min_page = CACHING_PAGESIZE + Core::DEVICE_PAGESIZE;
1357 if (add_value > begin - min_page) {
1358 begin = min_page;
1359 device_addr = Core::DEVICE_PAGESIZE;
1360 return;
1361 }
1362 begin -= add_value;
1363 device_addr = begin - CACHING_PAGESIZE;
1364 };
1365 auto expand_end = [&](DAddr add_value) {
1366 static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS;
1367 if (add_value > max_page - end) {
1368 end = max_page;
1369 return;
1370 }
1371 end += add_value;
1372 };
1354 if (begin == 0) { 1373 if (begin == 0) {
1355 return OverlapResult{ 1374 return OverlapResult{
1356 .ids = std::move(overlap_ids), 1375 .ids = std::move(overlap_ids),
@@ -1359,9 +1378,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1359 .has_stream_leap = has_stream_leap, 1378 .has_stream_leap = has_stream_leap,
1360 }; 1379 };
1361 } 1380 }
1362 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); 1381 for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1363 cpu_addr += CACHING_PAGESIZE) { 1382 device_addr += CACHING_PAGESIZE) {
1364 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; 1383 const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS];
1365 if (!overlap_id) { 1384 if (!overlap_id) {
1366 continue; 1385 continue;
1367 } 1386 }
@@ -1371,12 +1390,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1371 } 1390 }
1372 overlap_ids.push_back(overlap_id); 1391 overlap_ids.push_back(overlap_id);
1373 overlap.Pick(); 1392 overlap.Pick();
1374 const VAddr overlap_cpu_addr = overlap.CpuAddr(); 1393 const DAddr overlap_device_addr = overlap.CpuAddr();
1375 const bool expands_left = overlap_cpu_addr < begin; 1394 const bool expands_left = overlap_device_addr < begin;
1376 if (expands_left) { 1395 if (expands_left) {
1377 begin = overlap_cpu_addr; 1396 begin = overlap_device_addr;
1378 } 1397 }
1379 const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); 1398 const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes();
1380 const bool expands_right = overlap_end > end; 1399 const bool expands_right = overlap_end > end;
1381 if (overlap_end > end) { 1400 if (overlap_end > end) {
1382 end = overlap_end; 1401 end = overlap_end;
@@ -1387,11 +1406,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1387 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1406 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1388 has_stream_leap = true; 1407 has_stream_leap = true;
1389 if (expands_right) { 1408 if (expands_right) {
1390 begin -= CACHING_PAGESIZE * 256; 1409 expand_begin(CACHING_PAGESIZE * 128);
1391 cpu_addr = begin - CACHING_PAGESIZE;
1392 } 1410 }
1393 if (expands_left) { 1411 if (expands_left) {
1394 end += CACHING_PAGESIZE * 256; 1412 expand_end(CACHING_PAGESIZE * 128);
1395 } 1413 }
1396 } 1414 }
1397 } 1415 }
@@ -1424,13 +1442,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1424} 1442}
1425 1443
1426template <class P> 1444template <class P>
1427BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1445BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) {
1428 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); 1446 DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE);
1429 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); 1447 device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE);
1430 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); 1448 wanted_size = static_cast<u32>(device_addr_end - device_addr);
1431 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1449 const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
1432 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1450 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1433 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1451 const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size);
1434 auto& new_buffer = slot_buffers[new_buffer_id]; 1452 auto& new_buffer = slot_buffers[new_buffer_id];
1435 const size_t size_bytes = new_buffer.SizeBytes(); 1453 const size_t size_bytes = new_buffer.SizeBytes();
1436 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); 1454 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
@@ -1465,10 +1483,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1465 total_used_memory -= Common::AlignUp(size, 1024); 1483 total_used_memory -= Common::AlignUp(size, 1024);
1466 lru_cache.Free(buffer.getLRUID()); 1484 lru_cache.Free(buffer.getLRUID());
1467 } 1485 }
1468 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1486 const DAddr device_addr_begin = buffer.CpuAddr();
1469 const VAddr cpu_addr_end = cpu_addr_begin + size; 1487 const DAddr device_addr_end = device_addr_begin + size;
1470 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; 1488 const u64 page_begin = device_addr_begin / CACHING_PAGESIZE;
1471 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); 1489 const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE);
1472 for (u64 page = page_begin; page != page_end; ++page) { 1490 for (u64 page = page_begin; page != page_end; ++page) {
1473 if constexpr (insert) { 1491 if constexpr (insert) {
1474 page_table[page] = buffer_id; 1492 page_table[page] = buffer_id;
@@ -1486,15 +1504,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1486} 1504}
1487 1505
1488template <class P> 1506template <class P>
1489bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1507bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) {
1490 boost::container::small_vector<BufferCopy, 4> copies; 1508 boost::container::small_vector<BufferCopy, 4> copies;
1491 u64 total_size_bytes = 0; 1509 u64 total_size_bytes = 0;
1492 u64 largest_copy = 0; 1510 u64 largest_copy = 0;
1493 VAddr buffer_start = buffer.CpuAddr(); 1511 DAddr buffer_start = buffer.CpuAddr();
1494 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1512 memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1495 copies.push_back(BufferCopy{ 1513 copies.push_back(BufferCopy{
1496 .src_offset = total_size_bytes, 1514 .src_offset = total_size_bytes,
1497 .dst_offset = cpu_addr_out - buffer_start, 1515 .dst_offset = device_addr_out - buffer_start,
1498 .size = range_size, 1516 .size = range_size,
1499 }); 1517 });
1500 total_size_bytes += range_size; 1518 total_size_bytes += range_size;
@@ -1526,14 +1544,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1526 std::span<u8> immediate_buffer; 1544 std::span<u8> immediate_buffer;
1527 for (const BufferCopy& copy : copies) { 1545 for (const BufferCopy& copy : copies) {
1528 std::span<const u8> upload_span; 1546 std::span<const u8> upload_span;
1529 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1547 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1530 if (IsRangeGranular(cpu_addr, copy.size)) { 1548 if (IsRangeGranular(device_addr, copy.size)) {
1531 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1549 upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
1532 } else { 1550 } else {
1533 if (immediate_buffer.empty()) { 1551 if (immediate_buffer.empty()) {
1534 immediate_buffer = ImmediateBuffer(largest_copy); 1552 immediate_buffer = ImmediateBuffer(largest_copy);
1535 } 1553 }
1536 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1554 device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
1537 upload_span = immediate_buffer.subspan(0, copy.size); 1555 upload_span = immediate_buffer.subspan(0, copy.size);
1538 } 1556 }
1539 buffer.ImmediateUpload(copy.dst_offset, upload_span); 1557 buffer.ImmediateUpload(copy.dst_offset, upload_span);
@@ -1550,8 +1568,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1550 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1568 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1551 for (BufferCopy& copy : copies) { 1569 for (BufferCopy& copy : copies) {
1552 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1570 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1553 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1571 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1554 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1572 device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
1555 1573
1556 // Apply the staging offset 1574 // Apply the staging offset
1557 copy.src_offset += upload_staging.offset; 1575 copy.src_offset += upload_staging.offset;
@@ -1562,14 +1580,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1562} 1580}
1563 1581
1564template <class P> 1582template <class P>
1565bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, 1583bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
1566 std::span<const u8> inlined_buffer) { 1584 std::span<const u8> inlined_buffer) {
1567 const bool is_dirty = IsRegionRegistered(dest_address, copy_size); 1585 const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
1568 if (!is_dirty) { 1586 if (!is_dirty) {
1569 return false; 1587 return false;
1570 } 1588 }
1571 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); 1589 DAddr aligned_start = Common::AlignDown(dest_address, DEVICE_PAGESIZE);
1572 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); 1590 DAddr aligned_end = Common::AlignUp(dest_address + copy_size, DEVICE_PAGESIZE);
1573 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 1591 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1574 return false; 1592 return false;
1575 } 1593 }
@@ -1580,7 +1598,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1580} 1598}
1581 1599
1582template <class P> 1600template <class P>
1583void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 1601void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
1584 std::span<const u8> inlined_buffer) { 1602 std::span<const u8> inlined_buffer) {
1585 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1603 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1586 ClearDownload(subtract_interval); 1604 ClearDownload(subtract_interval);
@@ -1612,14 +1630,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
1612} 1630}
1613 1631
1614template <class P> 1632template <class P>
1615void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { 1633void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) {
1616 boost::container::small_vector<BufferCopy, 1> copies; 1634 boost::container::small_vector<BufferCopy, 1> copies;
1617 u64 total_size_bytes = 0; 1635 u64 total_size_bytes = 0;
1618 u64 largest_copy = 0; 1636 u64 largest_copy = 0;
1619 memory_tracker.ForEachDownloadRangeAndClear( 1637 memory_tracker.ForEachDownloadRangeAndClear(
1620 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1638 device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1621 const VAddr buffer_addr = buffer.CpuAddr(); 1639 const DAddr buffer_addr = buffer.CpuAddr();
1622 const auto add_download = [&](VAddr start, VAddr end) { 1640 const auto add_download = [&](DAddr start, DAddr end) {
1623 const u64 new_offset = start - buffer_addr; 1641 const u64 new_offset = start - buffer_addr;
1624 const u64 new_size = end - start; 1642 const u64 new_size = end - start;
1625 copies.push_back(BufferCopy{ 1643 copies.push_back(BufferCopy{
@@ -1634,8 +1652,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1634 largest_copy = std::max(largest_copy, new_size); 1652 largest_copy = std::max(largest_copy, new_size);
1635 }; 1653 };
1636 1654
1637 const VAddr start_address = cpu_addr_out; 1655 const DAddr start_address = device_addr_out;
1638 const VAddr end_address = start_address + range_size; 1656 const DAddr end_address = start_address + range_size;
1639 ForEachInRangeSet(common_ranges, start_address, range_size, add_download); 1657 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1640 const IntervalType subtract_interval{start_address, end_address}; 1658 const IntervalType subtract_interval{start_address, end_address};
1641 ClearDownload(subtract_interval); 1659 ClearDownload(subtract_interval);
@@ -1658,18 +1676,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1658 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); 1676 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
1659 runtime.Finish(); 1677 runtime.Finish();
1660 for (const BufferCopy& copy : copies) { 1678 for (const BufferCopy& copy : copies) {
1661 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1679 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1662 // Undo the modified offset 1680 // Undo the modified offset
1663 const u64 dst_offset = copy.dst_offset - download_staging.offset; 1681 const u64 dst_offset = copy.dst_offset - download_staging.offset;
1664 const u8* copy_mapped_memory = mapped_memory + dst_offset; 1682 const u8* copy_mapped_memory = mapped_memory + dst_offset;
1665 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); 1683 device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
1666 } 1684 }
1667 } else { 1685 } else {
1668 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 1686 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
1669 for (const BufferCopy& copy : copies) { 1687 for (const BufferCopy& copy : copies) {
1670 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 1688 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
1671 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1689 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1672 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); 1690 device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size);
1673 } 1691 }
1674 } 1692 }
1675} 1693}
@@ -1758,20 +1776,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1758 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); 1776 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1759 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; 1777 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
1760 1778
1761 const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); 1779 const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1762 if (!aligned_cpu_addr || size == 0) { 1780 if (!aligned_device_addr || size == 0) {
1763 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1781 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
1764 return NULL_BINDING; 1782 return NULL_BINDING;
1765 } 1783 }
1766 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1784 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1767 ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", 1785 ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}",
1768 cbuf_index); 1786 cbuf_index);
1769 // The end address used for size calculation does not need to be aligned 1787 // The end address used for size calculation does not need to be aligned
1770 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1788 const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::DEVICE_PAGESIZE);
1771 1789
1772 const Binding binding{ 1790 const Binding binding{
1773 .cpu_addr = *aligned_cpu_addr, 1791 .device_addr = *aligned_device_addr,
1774 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), 1792 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr),
1775 .buffer_id = BufferId{}, 1793 .buffer_id = BufferId{},
1776 }; 1794 };
1777 return binding; 1795 return binding;
@@ -1780,15 +1798,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1780template <class P> 1798template <class P>
1781TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 1799TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
1782 PixelFormat format) { 1800 PixelFormat format) {
1783 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1801 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1784 TextureBufferBinding binding; 1802 TextureBufferBinding binding;
1785 if (!cpu_addr || size == 0) { 1803 if (!device_addr || size == 0) {
1786 binding.cpu_addr = 0; 1804 binding.device_addr = 0;
1787 binding.size = 0; 1805 binding.size = 0;
1788 binding.buffer_id = NULL_BUFFER_ID; 1806 binding.buffer_id = NULL_BUFFER_ID;
1789 binding.format = PixelFormat::Invalid; 1807 binding.format = PixelFormat::Invalid;
1790 } else { 1808 } else {
1791 binding.cpu_addr = *cpu_addr; 1809 binding.device_addr = *device_addr;
1792 binding.size = size; 1810 binding.size = size;
1793 binding.buffer_id = BufferId{}; 1811 binding.buffer_id = BufferId{};
1794 binding.format = format; 1812 binding.format = format;
@@ -1797,14 +1815,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr,
1797} 1815}
1798 1816
1799template <class P> 1817template <class P>
1800std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1818std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) {
1801 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1819 u8* const base_pointer = device_memory.GetPointer<u8>(device_addr);
1802 if (IsRangeGranular(cpu_addr, size) || 1820 if (IsRangeGranular(device_addr, size) ||
1803 base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { 1821 base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) {
1804 return std::span(base_pointer, size); 1822 return std::span(base_pointer, size);
1805 } else { 1823 } else {
1806 const std::span<u8> span = ImmediateBuffer(size); 1824 const std::span<u8> span = ImmediateBuffer(size);
1807 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1825 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
1808 return span; 1826 return span;
1809 } 1827 }
1810} 1828}
@@ -1828,13 +1846,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
1828template <class P> 1846template <class P>
1829std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { 1847std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
1830 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; 1848 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
1831 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); 1849 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr));
1832} 1850}
1833 1851
1834template <class P> 1852template <class P>
1835std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { 1853std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
1836 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; 1854 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
1837 return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); 1855 return std::make_pair(&buffer,
1856 buffer.Offset(channel_state->indirect_buffer_binding.device_addr));
1838} 1857}
1839 1858
1840} // namespace VideoCommon 1859} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index d6d696d8c..80dbb81e7 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -32,7 +32,6 @@
32#include "common/microprofile.h" 32#include "common/microprofile.h"
33#include "common/scope_exit.h" 33#include "common/scope_exit.h"
34#include "common/settings.h" 34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h" 35#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h" 36#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h" 37#include "video_core/delayed_destruction_ring.h"
@@ -41,7 +40,6 @@
41#include "video_core/engines/kepler_compute.h" 40#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h" 41#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h" 42#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h" 43#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h" 44#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h" 45#include "video_core/texture_cache/types.h"
@@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0};
94static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); 92static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
95 93
96struct Binding { 94struct Binding {
97 VAddr cpu_addr{}; 95 DAddr device_addr{};
98 u32 size{}; 96 u32 size{};
99 BufferId buffer_id; 97 BufferId buffer_id;
100}; 98};
@@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding {
104}; 102};
105 103
106static constexpr Binding NULL_BINDING{ 104static constexpr Binding NULL_BINDING{
107 .cpu_addr = 0, 105 .device_addr = 0,
108 .size = 0, 106 .size = 0,
109 .buffer_id = NULL_BUFFER_ID, 107 .buffer_id = NULL_BUFFER_ID,
110}; 108};
@@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
204 using Async_Buffer = typename P::Async_Buffer; 202 using Async_Buffer = typename P::Async_Buffer;
205 using MemoryTracker = typename P::MemoryTracker; 203 using MemoryTracker = typename P::MemoryTracker;
206 204
207 using IntervalCompare = std::less<VAddr>; 205 using IntervalCompare = std::less<DAddr>;
208 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; 206 using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
209 using IntervalAllocator = boost::fast_pool_allocator<VAddr>; 207 using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
210 using IntervalSet = boost::icl::interval_set<VAddr>; 208 using IntervalSet = boost::icl::interval_set<DAddr>;
211 using IntervalType = typename IntervalSet::interval_type; 209 using IntervalType = typename IntervalSet::interval_type;
212 210
213 template <typename Type> 211 template <typename Type>
@@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
230 228
231 using OverlapCombine = counter_add_functor<int>; 229 using OverlapCombine = counter_add_functor<int>;
232 using OverlapSection = boost::icl::inter_section<int>; 230 using OverlapSection = boost::icl::inter_section<int>;
233 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 231 using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
234 232
235 struct OverlapResult { 233 struct OverlapResult {
236 boost::container::small_vector<BufferId, 16> ids; 234 boost::container::small_vector<BufferId, 16> ids;
237 VAddr begin; 235 DAddr begin;
238 VAddr end; 236 DAddr end;
239 bool has_stream_leap = false; 237 bool has_stream_leap = false;
240 }; 238 };
241 239
242public: 240public:
243 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 241 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
244 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
245 242
246 void TickFrame(); 243 void TickFrame();
247 244
248 void WriteMemory(VAddr cpu_addr, u64 size); 245 void WriteMemory(DAddr device_addr, u64 size);
249 246
250 void CachedWriteMemory(VAddr cpu_addr, u64 size); 247 void CachedWriteMemory(DAddr device_addr, u64 size);
251 248
252 bool OnCPUWrite(VAddr cpu_addr, u64 size); 249 bool OnCPUWrite(DAddr device_addr, u64 size);
253 250
254 void DownloadMemory(VAddr cpu_addr, u64 size); 251 void DownloadMemory(DAddr device_addr, u64 size);
255 252
256 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 253 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size);
257 254
258 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); 255 bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
259 256
260 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); 257 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
261 258
@@ -300,7 +297,7 @@ public:
300 ObtainBufferSynchronize sync_info, 297 ObtainBufferSynchronize sync_info,
301 ObtainBufferOperation post_op); 298 ObtainBufferOperation post_op);
302 299
303 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, 300 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size,
304 ObtainBufferSynchronize sync_info, 301 ObtainBufferSynchronize sync_info,
305 ObtainBufferOperation post_op); 302 ObtainBufferOperation post_op);
306 void FlushCachedWrites(); 303 void FlushCachedWrites();
@@ -326,13 +323,13 @@ public:
326 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); 323 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
327 324
328 /// Return true when a CPU region is modified from the GPU 325 /// Return true when a CPU region is modified from the GPU
329 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 326 [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size);
330 327
331 /// Return true when a region is registered on the cache 328 /// Return true when a region is registered on the cache
332 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); 329 [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size);
333 330
334 /// Return true when a CPU region is modified from the CPU 331 /// Return true when a CPU region is modified from the CPU
335 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 332 [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size);
336 333
337 void SetDrawIndirect( 334 void SetDrawIndirect(
338 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { 335 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
@@ -366,9 +363,9 @@ private:
366 } 363 }
367 364
368 template <typename Func> 365 template <typename Func>
369 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { 366 void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) {
370 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); 367 const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
371 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { 368 for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
372 const BufferId buffer_id = page_table[page]; 369 const BufferId buffer_id = page_table[page];
373 if (!buffer_id) { 370 if (!buffer_id) {
374 ++page; 371 ++page;
@@ -377,15 +374,15 @@ private:
377 Buffer& buffer = slot_buffers[buffer_id]; 374 Buffer& buffer = slot_buffers[buffer_id];
378 func(buffer_id, buffer); 375 func(buffer_id, buffer);
379 376
380 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 377 const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
381 page = Common::DivCeil(end_addr, CACHING_PAGESIZE); 378 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
382 } 379 }
383 } 380 }
384 381
385 template <typename Func> 382 template <typename Func>
386 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { 383 void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
387 const VAddr start_address = cpu_addr; 384 const DAddr start_address = device_addr;
388 const VAddr end_address = start_address + size; 385 const DAddr end_address = start_address + size;
389 const IntervalType search_interval{start_address, end_address}; 386 const IntervalType search_interval{start_address, end_address};
390 auto it = current_range.lower_bound(search_interval); 387 auto it = current_range.lower_bound(search_interval);
391 if (it == current_range.end()) { 388 if (it == current_range.end()) {
@@ -393,8 +390,8 @@ private:
393 } 390 }
394 auto end_it = current_range.upper_bound(search_interval); 391 auto end_it = current_range.upper_bound(search_interval);
395 for (; it != end_it; it++) { 392 for (; it != end_it; it++) {
396 VAddr inter_addr_end = it->upper(); 393 DAddr inter_addr_end = it->upper();
397 VAddr inter_addr = it->lower(); 394 DAddr inter_addr = it->lower();
398 if (inter_addr_end > end_address) { 395 if (inter_addr_end > end_address) {
399 inter_addr_end = end_address; 396 inter_addr_end = end_address;
400 } 397 }
@@ -406,10 +403,10 @@ private:
406 } 403 }
407 404
408 template <typename Func> 405 template <typename Func>
409 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, 406 void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
410 Func&& func) { 407 Func&& func) {
411 const VAddr start_address = cpu_addr; 408 const DAddr start_address = device_addr;
412 const VAddr end_address = start_address + size; 409 const DAddr end_address = start_address + size;
413 const IntervalType search_interval{start_address, end_address}; 410 const IntervalType search_interval{start_address, end_address};
414 auto it = current_range.lower_bound(search_interval); 411 auto it = current_range.lower_bound(search_interval);
415 if (it == current_range.end()) { 412 if (it == current_range.end()) {
@@ -418,8 +415,8 @@ private:
418 auto end_it = current_range.upper_bound(search_interval); 415 auto end_it = current_range.upper_bound(search_interval);
419 for (; it != end_it; it++) { 416 for (; it != end_it; it++) {
420 auto& inter = it->first; 417 auto& inter = it->first;
421 VAddr inter_addr_end = inter.upper(); 418 DAddr inter_addr_end = inter.upper();
422 VAddr inter_addr = inter.lower(); 419 DAddr inter_addr = inter.lower();
423 if (inter_addr_end > end_address) { 420 if (inter_addr_end > end_address) {
424 inter_addr_end = end_address; 421 inter_addr_end = end_address;
425 } 422 }
@@ -451,9 +448,9 @@ private:
451 } while (any_removals); 448 } while (any_removals);
452 } 449 }
453 450
454 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 451 static bool IsRangeGranular(DAddr device_addr, size_t size) {
455 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == 452 return (device_addr & ~Core::DEVICE_PAGEMASK) ==
456 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); 453 ((device_addr + size) & ~Core::DEVICE_PAGEMASK);
457 } 454 }
458 455
459 void RunGarbageCollector(); 456 void RunGarbageCollector();
@@ -508,15 +505,15 @@ private:
508 505
509 void UpdateComputeTextureBuffers(); 506 void UpdateComputeTextureBuffers();
510 507
511 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 508 void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size);
512 509
513 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 510 [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size);
514 511
515 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); 512 [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size);
516 513
517 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); 514 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
518 515
519 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); 516 [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size);
520 517
521 void Register(BufferId buffer_id); 518 void Register(BufferId buffer_id);
522 519
@@ -527,7 +524,7 @@ private:
527 524
528 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; 525 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
529 526
530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 527 bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size);
531 528
532 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 529 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
533 std::span<BufferCopy> copies); 530 std::span<BufferCopy> copies);
@@ -539,7 +536,7 @@ private:
539 536
540 void DownloadBufferMemory(Buffer& buffer_id); 537 void DownloadBufferMemory(Buffer& buffer_id);
541 538
542 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); 539 void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size);
543 540
544 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); 541 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
545 542
@@ -549,7 +546,7 @@ private:
549 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 546 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
550 PixelFormat format); 547 PixelFormat format);
551 548
552 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 549 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size);
553 550
554 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 551 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
555 552
@@ -557,11 +554,10 @@ private:
557 554
558 void ClearDownload(IntervalType subtract_interval); 555 void ClearDownload(IntervalType subtract_interval);
559 556
560 void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 557 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
561 std::span<const u8> inlined_buffer); 558 std::span<const u8> inlined_buffer);
562 559
563 VideoCore::RasterizerInterface& rasterizer; 560 Tegra::MaxwellDeviceMemoryManager& device_memory;
564 Core::Memory::Memory& cpu_memory;
565 561
566 SlotVector<Buffer> slot_buffers; 562 SlotVector<Buffer> slot_buffers;
567 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 563 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -598,7 +594,7 @@ private:
598 u64 critical_memory = 0; 594 u64 critical_memory = 0;
599 BufferId inline_buffer_id; 595 BufferId inline_buffer_id;
600 596
601 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 597 std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
602 Common::ScratchBuffer<u8> tmp_buffer; 598 Common::ScratchBuffer<u8> tmp_buffer;
603}; 599};
604 600
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
index 6c1c8287b..c95eed1f6 100644
--- a/src/video_core/buffer_cache/memory_tracker_base.h
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -17,19 +17,19 @@
17 17
18namespace VideoCommon { 18namespace VideoCommon {
19 19
20template <class RasterizerInterface> 20template <typename DeviceTracker>
21class MemoryTrackerBase { 21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39; 22 static constexpr size_t MAX_CPU_PAGE_BITS = 34;
23 static constexpr size_t HIGHER_PAGE_BITS = 22; 23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; 24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; 25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); 26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32; 27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; 28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; 29 using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>;
30 30
31public: 31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} 32 MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {}
33 ~MemoryTrackerBase() = default; 33 ~MemoryTrackerBase() = default;
34 34
35 /// Returns the inclusive CPU modified range in a begin end pair 35 /// Returns the inclusive CPU modified range in a begin end pair
@@ -74,7 +74,7 @@ public:
74 }); 74 });
75 } 75 }
76 76
77 /// Mark region as CPU modified, notifying the rasterizer about this change 77 /// Mark region as CPU modified, notifying the device_tracker about this change
78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
79 IteratePages<true>(dirty_cpu_addr, query_size, 79 IteratePages<true>(dirty_cpu_addr, query_size,
80 [](Manager* manager, u64 offset, size_t size) { 80 [](Manager* manager, u64 offset, size_t size) {
@@ -83,7 +83,7 @@ public:
83 }); 83 });
84 } 84 }
85 85
86 /// Unmark region as CPU modified, notifying the rasterizer about this change 86 /// Unmark region as CPU modified, notifying the device_tracker about this change
87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
88 IteratePages<true>(dirty_cpu_addr, query_size, 88 IteratePages<true>(dirty_cpu_addr, query_size,
89 [](Manager* manager, u64 offset, size_t size) { 89 [](Manager* manager, u64 offset, size_t size) {
@@ -139,7 +139,7 @@ public:
139 }); 139 });
140 } 140 }
141 141
142 /// Flushes cached CPU writes, and notify the rasterizer about the deltas 142 /// Flushes cached CPU writes, and notify the device_tracker about the deltas
143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { 143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
144 IteratePages<false>(query_cpu_addr, query_size, 144 IteratePages<false>(query_cpu_addr, query_size,
145 [](Manager* manager, [[maybe_unused]] u64 offset, 145 [](Manager* manager, [[maybe_unused]] u64 offset,
@@ -280,7 +280,7 @@ private:
280 manager_pool.emplace_back(); 280 manager_pool.emplace_back();
281 auto& last_pool = manager_pool.back(); 281 auto& last_pool = manager_pool.back();
282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { 282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
283 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); 283 new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE);
284 free_managers.push_back(&last_pool[i]); 284 free_managers.push_back(&last_pool[i]);
285 } 285 }
286 return on_return(); 286 return on_return();
@@ -293,7 +293,7 @@ private:
293 293
294 std::unordered_set<u32> cached_pages; 294 std::unordered_set<u32> cached_pages;
295 295
296 RasterizerInterface* rasterizer = nullptr; 296 DeviceTracker* device_tracker = nullptr;
297}; 297};
298 298
299} // namespace VideoCommon 299} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
index a336bde41..3db9d8b42 100644
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -13,12 +13,12 @@
13#include "common/common_funcs.h" 13#include "common/common_funcs.h"
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/div_ceil.h" 15#include "common/div_ceil.h"
16#include "core/memory.h" 16#include "video_core/host1x/gpu_device_memory_manager.h"
17 17
18namespace VideoCommon { 18namespace VideoCommon {
19 19
20constexpr u64 PAGES_PER_WORD = 64; 20constexpr u64 PAGES_PER_WORD = 64;
21constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; 21constexpr u64 BYTES_PER_PAGE = Core::DEVICE_PAGESIZE;
22constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; 22constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
23 23
24enum class Type { 24enum class Type {
@@ -163,11 +163,11 @@ struct Words {
163 WordsArray<stack_words> preflushable; 163 WordsArray<stack_words> preflushable;
164}; 164};
165 165
166template <class RasterizerInterface, size_t stack_words = 1> 166template <class DeviceTracker, size_t stack_words = 1>
167class WordManager { 167class WordManager {
168public: 168public:
169 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) 169 explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes)
170 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} 170 : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {}
171 171
172 explicit WordManager() = default; 172 explicit WordManager() = default;
173 173
@@ -279,7 +279,7 @@ public:
279 } 279 }
280 280
281 /** 281 /**
282 * Loop over each page in the given range, turn off those bits and notify the rasterizer if 282 * Loop over each page in the given range, turn off those bits and notify the tracker if
283 * needed. Call the given function on each turned off range. 283 * needed. Call the given function on each turned off range.
284 * 284 *
285 * @param query_cpu_range Base CPU address to loop over 285 * @param query_cpu_range Base CPU address to loop over
@@ -459,26 +459,26 @@ private:
459 } 459 }
460 460
461 /** 461 /**
462 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer 462 * Notify tracker about changes in the CPU tracking state of a word in the buffer
463 * 463 *
464 * @param word_index Index to the word to notify to the rasterizer 464 * @param word_index Index to the word to notify to the tracker
465 * @param current_bits Current state of the word 465 * @param current_bits Current state of the word
466 * @param new_bits New state of the word 466 * @param new_bits New state of the word
467 * 467 *
468 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 468 * @tparam add_to_tracker True when the tracker should start tracking the new pages
469 */ 469 */
470 template <bool add_to_rasterizer> 470 template <bool add_to_tracker>
471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { 471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
472 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 472 u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
474 IteratePages(changed_bits, [&](size_t offset, size_t size) { 474 IteratePages(changed_bits, [&](size_t offset, size_t size) {
475 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, 475 tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, size * BYTES_PER_PAGE,
476 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); 476 add_to_tracker ? 1 : -1);
477 }); 477 });
478 } 478 }
479 479
480 VAddr cpu_addr = 0; 480 VAddr cpu_addr = 0;
481 RasterizerInterface* rasterizer = nullptr; 481 DeviceTracker* tracker = nullptr;
482 Words<stack_words> words; 482 Words<stack_words> words;
483}; 483};
484 484
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 58ce0d8c2..fb2060ca4 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -5,10 +5,10 @@
5#include "common/microprofile.h" 5#include "common/microprofile.h"
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/memory.h"
9#include "video_core/dma_pusher.h" 8#include "video_core/dma_pusher.h"
10#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
11#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/guest_memory.h"
12#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
13 13
14namespace Tegra { 14namespace Tegra {
@@ -85,15 +85,15 @@ bool DmaPusher::Step() {
85 } 85 }
86 } 86 }
87 const auto safe_process = [&] { 87 const auto safe_process = [&] {
88 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 88 Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
89 Core::Memory::GuestMemoryFlags::SafeRead> 89 Tegra::Memory::GuestMemoryFlags::SafeRead>
90 headers(memory_manager, dma_state.dma_get, command_list_header.size, 90 headers(memory_manager, dma_state.dma_get, command_list_header.size,
91 &command_headers); 91 &command_headers);
92 ProcessCommands(headers); 92 ProcessCommands(headers);
93 }; 93 };
94 const auto unsafe_process = [&] { 94 const auto unsafe_process = [&] {
95 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 95 Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader,
96 Core::Memory::GuestMemoryFlags::UnsafeRead> 96 Tegra::Memory::GuestMemoryFlags::UnsafeRead>
97 headers(memory_manager, dma_state.dma_get, command_list_header.size, 97 headers(memory_manager, dma_state.dma_get, command_list_header.size,
98 &command_headers); 98 &command_headers);
99 ProcessCommands(headers); 99 ProcessCommands(headers);
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index bc64d4486..e5cc04ec4 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -5,8 +5,8 @@
5 5
6#include "common/algorithm.h" 6#include "common/algorithm.h"
7#include "common/assert.h" 7#include "common/assert.h"
8#include "core/memory.h"
9#include "video_core/engines/engine_upload.h" 8#include "video_core/engines/engine_upload.h"
9#include "video_core/guest_memory.h"
10#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/textures/decoders.h" 12#include "video_core/textures/decoders.h"
@@ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) {
68 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, 68 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
69 regs.dest.BlockHeight(), regs.dest.BlockDepth()); 69 regs.dest.BlockHeight(), regs.dest.BlockDepth());
70 70
71 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 71 Tegra::Memory::GpuGuestMemoryScoped<u8,
72 Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
72 tmp(memory_manager, address, dst_size, &tmp_buffer); 73 tmp(memory_manager, address, dst_size, &tmp_buffer);
73 74
74 Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, 75 Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height,
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 95ba4f76c..a94e1f043 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,7 +9,6 @@
9#include "common/settings.h" 9#include "common/settings.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/memory.h"
13#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
14#include "video_core/engines/draw_manager.h" 13#include "video_core/engines/draw_manager.h"
15#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 56fbff306..2ebd21fc5 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -8,9 +8,9 @@
8#include "common/polyfill_ranges.h" 8#include "common/polyfill_ranges.h"
9#include "common/settings.h" 9#include "common/settings.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/memory.h"
12#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/maxwell_dma.h" 12#include "video_core/engines/maxwell_dma.h"
13#include "video_core/guest_memory.h"
14#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
15#include "video_core/renderer_base.h" 15#include "video_core/renderer_base.h"
16#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
@@ -133,8 +133,8 @@ void MaxwellDMA::Launch() {
133 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 133 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
134 read_buffer.resize_destructive(16); 134 read_buffer.resize_destructive(16);
135 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 135 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
136 Core::Memory::GpuGuestMemoryScoped< 136 Tegra::Memory::GpuGuestMemoryScoped<
137 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 137 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
138 tmp_write_buffer(memory_manager, 138 tmp_write_buffer(memory_manager,
139 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 139 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
140 16, &read_buffer); 140 16, &read_buffer);
@@ -146,16 +146,16 @@ void MaxwellDMA::Launch() {
146 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 146 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
147 read_buffer.resize_destructive(16); 147 read_buffer.resize_destructive(16);
148 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 148 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
149 Core::Memory::GpuGuestMemoryScoped< 149 Tegra::Memory::GpuGuestMemoryScoped<
150 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 150 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
151 tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); 151 tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer);
152 tmp_write_buffer.SetAddressAndSize( 152 tmp_write_buffer.SetAddressAndSize(
153 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); 153 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16);
154 } 154 }
155 } else { 155 } else {
156 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 156 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
157 Core::Memory::GpuGuestMemoryScoped< 157 Tegra::Memory::GpuGuestMemoryScoped<
158 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 158 u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
159 tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, 159 tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in,
160 &read_buffer); 160 &read_buffer);
161 tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); 161 tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in);
@@ -226,9 +226,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
226 226
227 const size_t dst_size = dst_operand.pitch * regs.line_count; 227 const size_t dst_size = dst_operand.pitch * regs.line_count;
228 228
229 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 229 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
230 memory_manager, src_operand.address, src_size, &read_buffer); 230 memory_manager, src_operand.address, src_size, &read_buffer);
231 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> 231 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite>
232 tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); 232 tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer);
233 233
234 UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, 234 UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
@@ -290,9 +290,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
290 290
291 GPUVAddr src_addr = regs.offset_in; 291 GPUVAddr src_addr = regs.offset_in;
292 GPUVAddr dst_addr = regs.offset_out; 292 GPUVAddr dst_addr = regs.offset_out;
293 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 293 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
294 memory_manager, src_addr, src_size, &read_buffer); 294 memory_manager, src_addr, src_size, &read_buffer);
295 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> 295 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite>
296 tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); 296 tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer);
297 297
298 // If the input is linear and the output is tiled, swizzle the input and copy it over. 298 // If the input is linear and the output is tiled, swizzle the input and copy it over.
@@ -344,9 +344,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
344 344
345 intermediate_buffer.resize_destructive(mid_buffer_size); 345 intermediate_buffer.resize_destructive(mid_buffer_size);
346 346
347 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( 347 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
348 memory_manager, regs.offset_in, src_size, &read_buffer); 348 memory_manager, regs.offset_in, src_size, &read_buffer);
349 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> 349 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite>
350 tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); 350 tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer);
351 351
352 UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, 352 UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height,
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
index 67ce9134b..4bc079024 100644
--- a/src/video_core/engines/sw_blitter/blitter.cpp
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -8,6 +8,7 @@
8#include "common/scratch_buffer.h" 8#include "common/scratch_buffer.h"
9#include "video_core/engines/sw_blitter/blitter.h" 9#include "video_core/engines/sw_blitter/blitter.h"
10#include "video_core/engines/sw_blitter/converter.h" 10#include "video_core/engines/sw_blitter/converter.h"
11#include "video_core/guest_memory.h"
11#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
12#include "video_core/surface.h" 13#include "video_core/surface.h"
13#include "video_core/textures/decoders.h" 14#include "video_core/textures/decoders.h"
@@ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
160 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); 161 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
161 const size_t src_size = get_surface_size(src, src_bytes_per_pixel); 162 const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
162 163
163 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( 164 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer(
164 memory_manager, src.Address(), src_size, &impl->tmp_buffer); 165 memory_manager, src.Address(), src_size, &impl->tmp_buffer);
165 166
166 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; 167 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
@@ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
220 } 221 }
221 222
222 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); 223 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
223 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> 224 Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite>
224 tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); 225 tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer);
225 226
226 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { 227 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index 5f3bffcab..856f4bd52 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -14,7 +14,7 @@ namespace Tegra {
14 * Struct describing framebuffer configuration 14 * Struct describing framebuffer configuration
15 */ 15 */
16struct FramebufferConfig { 16struct FramebufferConfig {
17 VAddr address{}; 17 DAddr address{};
18 u32 offset{}; 18 u32 offset{};
19 u32 width{}; 19 u32 width{};
20 u32 height{}; 20 u32 height{};
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 11549d448..609704b33 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -85,7 +85,8 @@ struct GPU::Impl {
85 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { 85 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
86 renderer = std::move(renderer_); 86 renderer = std::move(renderer_);
87 rasterizer = renderer->ReadRasterizer(); 87 rasterizer = renderer->ReadRasterizer();
88 host1x.MemoryManager().BindRasterizer(rasterizer); 88 host1x.MemoryManager().BindInterface(rasterizer);
89 host1x.GMMU().BindRasterizer(rasterizer);
89 } 90 }
90 91
91 /// Flush all current written commands into the host GPU for execution. 92 /// Flush all current written commands into the host GPU for execution.
@@ -95,8 +96,8 @@ struct GPU::Impl {
95 96
96 /// Synchronizes CPU writes with Host GPU memory. 97 /// Synchronizes CPU writes with Host GPU memory.
97 void InvalidateGPUCache() { 98 void InvalidateGPUCache() {
98 std::function<void(VAddr, size_t)> callback_writes( 99 std::function<void(PAddr, size_t)> callback_writes(
99 [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); 100 [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
100 system.GatherGPUDirtyMemory(callback_writes); 101 system.GatherGPUDirtyMemory(callback_writes);
101 } 102 }
102 103
@@ -279,11 +280,11 @@ struct GPU::Impl {
279 } 280 }
280 281
281 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 282 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
282 void FlushRegion(VAddr addr, u64 size) { 283 void FlushRegion(DAddr addr, u64 size) {
283 gpu_thread.FlushRegion(addr, size); 284 gpu_thread.FlushRegion(addr, size);
284 } 285 }
285 286
286 VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { 287 VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
287 auto raster_area = rasterizer->GetFlushArea(addr, size); 288 auto raster_area = rasterizer->GetFlushArea(addr, size);
288 if (raster_area.preemtive) { 289 if (raster_area.preemtive) {
289 return raster_area; 290 return raster_area;
@@ -299,16 +300,16 @@ struct GPU::Impl {
299 } 300 }
300 301
301 /// Notify rasterizer that any caches of the specified region should be invalidated 302 /// Notify rasterizer that any caches of the specified region should be invalidated
302 void InvalidateRegion(VAddr addr, u64 size) { 303 void InvalidateRegion(DAddr addr, u64 size) {
303 gpu_thread.InvalidateRegion(addr, size); 304 gpu_thread.InvalidateRegion(addr, size);
304 } 305 }
305 306
306 bool OnCPUWrite(VAddr addr, u64 size) { 307 bool OnCPUWrite(DAddr addr, u64 size) {
307 return rasterizer->OnCPUWrite(addr, size); 308 return rasterizer->OnCPUWrite(addr, size);
308 } 309 }
309 310
310 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 311 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
311 void FlushAndInvalidateRegion(VAddr addr, u64 size) { 312 void FlushAndInvalidateRegion(DAddr addr, u64 size) {
312 gpu_thread.FlushAndInvalidateRegion(addr, size); 313 gpu_thread.FlushAndInvalidateRegion(addr, size);
313 } 314 }
314 315
@@ -437,7 +438,7 @@ void GPU::OnCommandListEnd() {
437 impl->OnCommandListEnd(); 438 impl->OnCommandListEnd();
438} 439}
439 440
440u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 441u64 GPU::RequestFlush(DAddr addr, std::size_t size) {
441 return impl->RequestSyncOperation( 442 return impl->RequestSyncOperation(
442 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); 443 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
443} 444}
@@ -557,23 +558,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
557 impl->SwapBuffers(framebuffer); 558 impl->SwapBuffers(framebuffer);
558} 559}
559 560
560VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { 561VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) {
561 return impl->OnCPURead(addr, size); 562 return impl->OnCPURead(addr, size);
562} 563}
563 564
564void GPU::FlushRegion(VAddr addr, u64 size) { 565void GPU::FlushRegion(DAddr addr, u64 size) {
565 impl->FlushRegion(addr, size); 566 impl->FlushRegion(addr, size);
566} 567}
567 568
568void GPU::InvalidateRegion(VAddr addr, u64 size) { 569void GPU::InvalidateRegion(DAddr addr, u64 size) {
569 impl->InvalidateRegion(addr, size); 570 impl->InvalidateRegion(addr, size);
570} 571}
571 572
572bool GPU::OnCPUWrite(VAddr addr, u64 size) { 573bool GPU::OnCPUWrite(DAddr addr, u64 size) {
573 return impl->OnCPUWrite(addr, size); 574 return impl->OnCPUWrite(addr, size);
574} 575}
575 576
576void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 577void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) {
577 impl->FlushAndInvalidateRegion(addr, size); 578 impl->FlushAndInvalidateRegion(addr, size);
578} 579}
579 580
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba2838b89..b3c1d15bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -158,7 +158,7 @@ public:
158 void InitAddressSpace(Tegra::MemoryManager& memory_manager); 158 void InitAddressSpace(Tegra::MemoryManager& memory_manager);
159 159
160 /// Request a host GPU memory flush from the CPU. 160 /// Request a host GPU memory flush from the CPU.
161 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 161 [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size);
162 162
163 /// Obtains current flush request fence id. 163 /// Obtains current flush request fence id.
164 [[nodiscard]] u64 CurrentSyncRequestFence() const; 164 [[nodiscard]] u64 CurrentSyncRequestFence() const;
@@ -242,20 +242,20 @@ public:
242 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 242 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
243 243
244 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 244 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
245 [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); 245 [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size);
246 246
247 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 247 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
248 void FlushRegion(VAddr addr, u64 size); 248 void FlushRegion(DAddr addr, u64 size);
249 249
250 /// Notify rasterizer that any caches of the specified region should be invalidated 250 /// Notify rasterizer that any caches of the specified region should be invalidated
251 void InvalidateRegion(VAddr addr, u64 size); 251 void InvalidateRegion(DAddr addr, u64 size);
252 252
253 /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is 253 /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
254 /// sensible, false otherwise 254 /// sensible, false otherwise
255 bool OnCPUWrite(VAddr addr, u64 size); 255 bool OnCPUWrite(DAddr addr, u64 size);
256 256
257 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 257 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
258 void FlushAndInvalidateRegion(VAddr addr, u64 size); 258 void FlushAndInvalidateRegion(DAddr addr, u64 size);
259 259
260private: 260private:
261 struct Impl; 261 struct Impl;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 2f0f9f593..788d4f61e 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
82 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 82 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
83} 83}
84 84
85void ThreadManager::FlushRegion(VAddr addr, u64 size) { 85void ThreadManager::FlushRegion(DAddr addr, u64 size) {
86 if (!is_async) { 86 if (!is_async) {
87 // Always flush with synchronous GPU mode 87 // Always flush with synchronous GPU mode
88 PushCommand(FlushRegionCommand(addr, size)); 88 PushCommand(FlushRegionCommand(addr, size));
@@ -101,11 +101,11 @@ void ThreadManager::TickGPU() {
101 PushCommand(GPUTickCommand()); 101 PushCommand(GPUTickCommand());
102} 102}
103 103
104void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 104void ThreadManager::InvalidateRegion(DAddr addr, u64 size) {
105 rasterizer->OnCacheInvalidation(addr, size); 105 rasterizer->OnCacheInvalidation(addr, size);
106} 106}
107 107
108void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 108void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) {
109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
110 rasterizer->OnCacheInvalidation(addr, size); 110 rasterizer->OnCacheInvalidation(addr, size);
111} 111}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 43940bd6d..2de25e9ef 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -54,26 +54,26 @@ struct SwapBuffersCommand final {
54 54
55/// Command to signal to the GPU thread to flush a region 55/// Command to signal to the GPU thread to flush a region
56struct FlushRegionCommand final { 56struct FlushRegionCommand final {
57 explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} 57 explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
58 58
59 VAddr addr; 59 DAddr addr;
60 u64 size; 60 u64 size;
61}; 61};
62 62
63/// Command to signal to the GPU thread to invalidate a region 63/// Command to signal to the GPU thread to invalidate a region
64struct InvalidateRegionCommand final { 64struct InvalidateRegionCommand final {
65 explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} 65 explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {}
66 66
67 VAddr addr; 67 DAddr addr;
68 u64 size; 68 u64 size;
69}; 69};
70 70
71/// Command to signal to the GPU thread to flush and invalidate a region 71/// Command to signal to the GPU thread to flush and invalidate a region
72struct FlushAndInvalidateRegionCommand final { 72struct FlushAndInvalidateRegionCommand final {
73 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) 73 explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_)
74 : addr{addr_}, size{size_} {} 74 : addr{addr_}, size{size_} {}
75 75
76 VAddr addr; 76 DAddr addr;
77 u64 size; 77 u64 size;
78}; 78};
79 79
@@ -122,13 +122,13 @@ public:
122 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 122 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
123 123
124 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 124 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
125 void FlushRegion(VAddr addr, u64 size); 125 void FlushRegion(DAddr addr, u64 size);
126 126
127 /// Notify rasterizer that any caches of the specified region should be invalidated 127 /// Notify rasterizer that any caches of the specified region should be invalidated
128 void InvalidateRegion(VAddr addr, u64 size); 128 void InvalidateRegion(DAddr addr, u64 size);
129 129
130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 130 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
131 void FlushAndInvalidateRegion(VAddr addr, u64 size); 131 void FlushAndInvalidateRegion(DAddr addr, u64 size);
132 132
133 void TickGPU(); 133 void TickGPU();
134 134
diff --git a/src/video_core/guest_memory.h b/src/video_core/guest_memory.h
new file mode 100644
index 000000000..8b6213172
--- /dev/null
+++ b/src/video_core/guest_memory.h
@@ -0,0 +1,30 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <iterator>
7#include <memory>
8#include <optional>
9#include <span>
10#include <vector>
11
12#include "common/scratch_buffer.h"
13#include "core/guest_memory.h"
14#include "video_core/memory_manager.h"
15
16namespace Tegra::Memory {
17
18using GuestMemoryFlags = Core::Memory::GuestMemoryFlags;
19
20template <typename T, GuestMemoryFlags FLAGS>
21using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
22template <typename T, GuestMemoryFlags FLAGS>
23using DeviceGuestMemoryScoped =
24 Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>;
25template <typename T, GuestMemoryFlags FLAGS>
26using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>;
27template <typename T, GuestMemoryFlags FLAGS>
28using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
29
30} // namespace Tegra::Memory
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 309a7f1d5..994591c8d 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -32,13 +32,12 @@ H264::~H264() = default;
32std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, 32std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
33 size_t* out_configuration_size, bool is_first_frame) { 33 size_t* out_configuration_size, bool is_first_frame) {
34 H264DecoderContext context; 34 H264DecoderContext context;
35 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, 35 host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
36 sizeof(H264DecoderContext));
37 36
38 const s64 frame_number = context.h264_parameter_set.frame_number.Value(); 37 const s64 frame_number = context.h264_parameter_set.frame_number.Value();
39 if (!is_first_frame && frame_number != 0) { 38 if (!is_first_frame && frame_number != 0) {
40 frame.resize_destructive(context.stream_len); 39 frame.resize_destructive(context.stream_len);
41 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); 40 host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
42 *out_configuration_size = 0; 41 *out_configuration_size = 0;
43 return frame; 42 return frame;
44 } 43 }
@@ -159,8 +158,8 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
159 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); 158 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
160 159
161 *out_configuration_size = encoded_header.size(); 160 *out_configuration_size = encoded_header.size();
162 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, 161 host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(),
163 frame.data() + encoded_header.size(), context.stream_len); 162 context.stream_len);
164 163
165 return frame; 164 return frame;
166} 165}
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
index ee6392ff9..be97e3b00 100644
--- a/src/video_core/host1x/codecs/vp8.cpp
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -14,7 +14,7 @@ VP8::~VP8() = default;
14 14
15std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { 15std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
16 VP8PictureInfo info; 16 VP8PictureInfo info;
17 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); 17 host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
18 18
19 const bool is_key_frame = info.key_frame == 1u; 19 const bool is_key_frame = info.key_frame == 1u;
20 const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); 20 const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
@@ -45,7 +45,7 @@ std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters&
45 frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); 45 frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
46 } 46 }
47 const u64 bitstream_offset = state.frame_bitstream_offset; 47 const u64 bitstream_offset = state.frame_bitstream_offset;
48 host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); 48 host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
49 49
50 return frame; 50 return frame;
51} 51}
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
index 306c3d0e8..65d6fb2d5 100644
--- a/src/video_core/host1x/codecs/vp9.cpp
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -358,7 +358,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
358 358
359Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { 359Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
360 PictureInfo picture_info; 360 PictureInfo picture_info;
361 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); 361 host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
362 Vp9PictureInfo vp9_info = picture_info.Convert(); 362 Vp9PictureInfo vp9_info = picture_info.Convert();
363 363
364 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); 364 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
@@ -373,7 +373,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters&
373 373
374void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { 374void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
375 EntropyProbs entropy; 375 EntropyProbs entropy;
376 host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); 376 host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
377 entropy.Convert(dst); 377 entropy.Convert(dst);
378} 378}
379 379
@@ -383,9 +383,8 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters
383 // gpu.SyncGuestHost(); epic, why? 383 // gpu.SyncGuestHost(); epic, why?
384 current_frame.info = GetVp9PictureInfo(state); 384 current_frame.info = GetVp9PictureInfo(state);
385 current_frame.bit_stream.resize(current_frame.info.bitstream_size); 385 current_frame.bit_stream.resize(current_frame.info.bitstream_size);
386 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, 386 host1x.GMMU().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
387 current_frame.bit_stream.data(), 387 current_frame.info.bitstream_size);
388 current_frame.info.bitstream_size);
389 } 388 }
390 if (!next_frame.bit_stream.empty()) { 389 if (!next_frame.bit_stream.empty()) {
391 Vp9FrameContainer temp{ 390 Vp9FrameContainer temp{
diff --git a/src/video_core/host1x/gpu_device_memory_manager.cpp b/src/video_core/host1x/gpu_device_memory_manager.cpp
new file mode 100644
index 000000000..668c2f08b
--- /dev/null
+++ b/src/video_core/host1x/gpu_device_memory_manager.cpp
@@ -0,0 +1,32 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "core/device_memory_manager.inc"
5#include "video_core/host1x/gpu_device_memory_manager.h"
6#include "video_core/rasterizer_interface.h"
7
8namespace Tegra {
9
10struct MaxwellDeviceMethods {
11 static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address,
12 size_t size, bool caching) {
13 interface->RasterizerMarkRegionCached(address, size, caching);
14 }
15};
16
17} // namespace Tegra
18
19template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>;
20template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>;
21
22template const u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr) const;
23template u8* Tegra::MaxwellDeviceMemoryManager::GetPointer<u8>(DAddr addr);
24
25template u8 Tegra::MaxwellDeviceMemoryManager::Read<u8>(DAddr addr) const;
26template u16 Tegra::MaxwellDeviceMemoryManager::Read<u16>(DAddr addr) const;
27template u32 Tegra::MaxwellDeviceMemoryManager::Read<u32>(DAddr addr) const;
28template u64 Tegra::MaxwellDeviceMemoryManager::Read<u64>(DAddr addr) const;
29template void Tegra::MaxwellDeviceMemoryManager::Write<u8>(DAddr addr, u8 data);
30template void Tegra::MaxwellDeviceMemoryManager::Write<u16>(DAddr addr, u16 data);
31template void Tegra::MaxwellDeviceMemoryManager::Write<u32>(DAddr addr, u32 data);
32template void Tegra::MaxwellDeviceMemoryManager::Write<u64>(DAddr addr, u64 data); \ No newline at end of file
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h
new file mode 100644
index 000000000..a9f249991
--- /dev/null
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@@ -0,0 +1,24 @@
1// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "core/device_memory_manager.h"
7
8namespace VideoCore {
9class RasterizerInterface;
10}
11
12namespace Tegra {
13
14struct MaxwellDeviceMethods;
15
16struct MaxwellDeviceTraits {
17 static constexpr size_t device_virtual_bits = 34;
18 using DeviceInterface = typename VideoCore::RasterizerInterface;
19 using DeviceMethods = MaxwellDeviceMethods;
20};
21
22using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
23
24} // namespace Tegra \ No newline at end of file
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
index 7c317a85d..c4c7a5883 100644
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -9,9 +9,12 @@ namespace Tegra {
9namespace Host1x { 9namespace Host1x {
10 10
11Host1x::Host1x(Core::System& system_) 11Host1x::Host1x(Core::System& system_)
12 : system{system_}, syncpoint_manager{}, memory_manager{system, 32, 12}, 12 : system{system_}, syncpoint_manager{},
13 memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
13 allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} 14 allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
14 15
16Host1x::~Host1x() = default;
17
15} // namespace Host1x 18} // namespace Host1x
16 19
17} // namespace Tegra 20} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index 57082ae54..d72d97b7b 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -6,6 +6,7 @@
6#include "common/common_types.h" 6#include "common/common_types.h"
7 7
8#include "common/address_space.h" 8#include "common/address_space.h"
9#include "video_core/host1x/gpu_device_memory_manager.h"
9#include "video_core/host1x/syncpoint_manager.h" 10#include "video_core/host1x/syncpoint_manager.h"
10#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
11 12
@@ -20,6 +21,7 @@ namespace Host1x {
20class Host1x { 21class Host1x {
21public: 22public:
22 explicit Host1x(Core::System& system); 23 explicit Host1x(Core::System& system);
24 ~Host1x();
23 25
24 SyncpointManager& GetSyncpointManager() { 26 SyncpointManager& GetSyncpointManager() {
25 return syncpoint_manager; 27 return syncpoint_manager;
@@ -29,14 +31,22 @@ public:
29 return syncpoint_manager; 31 return syncpoint_manager;
30 } 32 }
31 33
32 Tegra::MemoryManager& MemoryManager() { 34 Tegra::MaxwellDeviceMemoryManager& MemoryManager() {
33 return memory_manager; 35 return memory_manager;
34 } 36 }
35 37
36 const Tegra::MemoryManager& MemoryManager() const { 38 const Tegra::MaxwellDeviceMemoryManager& MemoryManager() const {
37 return memory_manager; 39 return memory_manager;
38 } 40 }
39 41
42 Tegra::MemoryManager& GMMU() {
43 return gmmu_manager;
44 }
45
46 const Tegra::MemoryManager& GMMU() const {
47 return gmmu_manager;
48 }
49
40 Common::FlatAllocator<u32, 0, 32>& Allocator() { 50 Common::FlatAllocator<u32, 0, 32>& Allocator() {
41 return *allocator; 51 return *allocator;
42 } 52 }
@@ -48,7 +58,8 @@ public:
48private: 58private:
49 Core::System& system; 59 Core::System& system;
50 SyncpointManager syncpoint_manager; 60 SyncpointManager syncpoint_manager;
51 Tegra::MemoryManager memory_manager; 61 Tegra::MaxwellDeviceMemoryManager memory_manager;
62 Tegra::MemoryManager gmmu_manager;
52 std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator; 63 std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
53}; 64};
54 65
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
index 2a5eba415..d154746af 100644
--- a/src/video_core/host1x/vic.cpp
+++ b/src/video_core/host1x/vic.cpp
@@ -81,7 +81,7 @@ void Vic::Execute() {
81 LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); 81 LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
82 return; 82 return;
83 } 83 }
84 const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; 84 const VicConfig config{host1x.GMMU().Read<u64>(config_struct_address + 0x20)};
85 auto frame = nvdec_processor->GetFrame(); 85 auto frame = nvdec_processor->GetFrame();
86 if (!frame) { 86 if (!frame) {
87 return; 87 return;
@@ -162,12 +162,12 @@ void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
162 Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, 162 Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
163 block_height, 0, width * 4); 163 block_height, 0, width * 4);
164 164
165 host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); 165 host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
166 } else { 166 } else {
167 // send pitch linear frame 167 // send pitch linear frame
168 const size_t linear_size = width * height * 4; 168 const size_t linear_size = width * height * 4;
169 host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, 169 host1x.GMMU().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
170 linear_size); 170 linear_size);
171 } 171 }
172} 172}
173 173
@@ -193,8 +193,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
193 const std::size_t dst = y * aligned_width; 193 const std::size_t dst = y * aligned_width;
194 std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); 194 std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
195 } 195 }
196 host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), 196 host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), luma_buffer.size());
197 luma_buffer.size());
198 197
199 // Chroma 198 // Chroma
200 const std::size_t half_height = frame_height / 2; 199 const std::size_t half_height = frame_height / 2;
@@ -233,8 +232,8 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
233 ASSERT(false); 232 ASSERT(false);
234 break; 233 break;
235 } 234 }
236 host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), 235 host1x.GMMU().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
237 chroma_buffer.size()); 236 chroma_buffer.size());
238} 237}
239 238
240} // namespace Host1x 239} // namespace Host1x
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index d16040613..a52f8e486 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -7,25 +7,26 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/device_memory.h"
11#include "core/hle/kernel/k_page_table.h" 10#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 11#include "core/hle/kernel/k_process.h"
12#include "video_core/guest_memory.h"
13#include "video_core/host1x/host1x.h"
13#include "video_core/invalidation_accumulator.h" 14#include "video_core/invalidation_accumulator.h"
14#include "video_core/memory_manager.h" 15#include "video_core/memory_manager.h"
15#include "video_core/rasterizer_interface.h" 16#include "video_core/rasterizer_interface.h"
16#include "video_core/renderer_base.h" 17#include "video_core/renderer_base.h"
17 18
18namespace Tegra { 19namespace Tegra {
19using Core::Memory::GuestMemoryFlags; 20using Tegra::Memory::GuestMemoryFlags;
20 21
21std::atomic<size_t> MemoryManager::unique_identifier_generator{}; 22std::atomic<size_t> MemoryManager::unique_identifier_generator{};
22 23
23MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, 24MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
24 u64 page_bits_) 25 u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
25 : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, 26 : system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
26 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, 27 page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
27 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, 28 page_table{address_space_bits, address_space_bits + page_bits - 38,
28 page_bits != big_page_bits ? page_bits : 0}, 29 page_bits != big_page_bits ? page_bits : 0},
29 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 30 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
30 1, std::memory_order_acq_rel)}, 31 1, std::memory_order_acq_rel)},
31 accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { 32 accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
@@ -42,11 +43,16 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
42 big_page_table_mask = big_page_table_size - 1; 43 big_page_table_mask = big_page_table_size - 1;
43 44
44 big_entries.resize(big_page_table_size / 32, 0); 45 big_entries.resize(big_page_table_size / 32, 0);
45 big_page_table_cpu.resize(big_page_table_size); 46 big_page_table_dev.resize(big_page_table_size);
46 big_page_continuous.resize(big_page_table_size / continuous_bits, 0); 47 big_page_continuous.resize(big_page_table_size / continuous_bits, 0);
47 entries.resize(page_table_size / 32, 0); 48 entries.resize(page_table_size / 32, 0);
48} 49}
49 50
51MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
52 u64 page_bits_)
53 : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
54 page_bits_) {}
55
50MemoryManager::~MemoryManager() = default; 56MemoryManager::~MemoryManager() = default;
51 57
52template <bool is_big_page> 58template <bool is_big_page>
@@ -100,7 +106,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu
100} 106}
101 107
102template <MemoryManager::EntryType entry_type> 108template <MemoryManager::EntryType entry_type>
103GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 109GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
104 PTEKind kind) { 110 PTEKind kind) {
105 [[maybe_unused]] u64 remaining_size{size}; 111 [[maybe_unused]] u64 remaining_size{size};
106 if constexpr (entry_type == EntryType::Mapped) { 112 if constexpr (entry_type == EntryType::Mapped) {
@@ -114,9 +120,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
114 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); 120 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
115 } 121 }
116 if constexpr (entry_type == EntryType::Mapped) { 122 if constexpr (entry_type == EntryType::Mapped) {
117 const VAddr current_cpu_addr = cpu_addr + offset; 123 const DAddr current_dev_addr = dev_addr + offset;
118 const auto index = PageEntryIndex<false>(current_gpu_addr); 124 const auto index = PageEntryIndex<false>(current_gpu_addr);
119 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); 125 const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits);
120 page_table[index] = sub_value; 126 page_table[index] = sub_value;
121 } 127 }
122 remaining_size -= page_size; 128 remaining_size -= page_size;
@@ -126,7 +132,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
126} 132}
127 133
128template <MemoryManager::EntryType entry_type> 134template <MemoryManager::EntryType entry_type>
129GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, 135GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr,
130 size_t size, PTEKind kind) { 136 size_t size, PTEKind kind) {
131 [[maybe_unused]] u64 remaining_size{size}; 137 [[maybe_unused]] u64 remaining_size{size};
132 for (u64 offset{}; offset < size; offset += big_page_size) { 138 for (u64 offset{}; offset < size; offset += big_page_size) {
@@ -137,20 +143,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
137 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); 143 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
138 } 144 }
139 if constexpr (entry_type == EntryType::Mapped) { 145 if constexpr (entry_type == EntryType::Mapped) {
140 const VAddr current_cpu_addr = cpu_addr + offset; 146 const DAddr current_dev_addr = dev_addr + offset;
141 const auto index = PageEntryIndex<true>(current_gpu_addr); 147 const auto index = PageEntryIndex<true>(current_gpu_addr);
142 const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); 148 const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits);
143 big_page_table_cpu[index] = sub_value; 149 big_page_table_dev[index] = sub_value;
144 const bool is_continuous = ([&] { 150 const bool is_continuous = ([&] {
145 uintptr_t base_ptr{ 151 uintptr_t base_ptr{
146 reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; 152 reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))};
147 if (base_ptr == 0) { 153 if (base_ptr == 0) {
148 return false; 154 return false;
149 } 155 }
150 for (VAddr start_cpu = current_cpu_addr + page_size; 156 for (DAddr start_cpu = current_dev_addr + page_size;
151 start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { 157 start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) {
152 base_ptr += page_size; 158 base_ptr += page_size;
153 auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); 159 auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu));
154 if (next_ptr == 0 || base_ptr != next_ptr) { 160 if (next_ptr == 0 || base_ptr != next_ptr) {
155 return false; 161 return false;
156 } 162 }
@@ -172,12 +178,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
172 rasterizer = rasterizer_; 178 rasterizer = rasterizer_;
173} 179}
174 180
175GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, 181GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind,
176 bool is_big_pages) { 182 bool is_big_pages) {
177 if (is_big_pages) [[likely]] { 183 if (is_big_pages) [[likely]] {
178 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); 184 return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind);
179 } 185 }
180 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); 186 return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind);
181} 187}
182 188
183GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { 189GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
@@ -202,7 +208,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
202 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 208 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
203} 209}
204 210
205std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { 211std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
206 if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { 212 if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] {
207 return std::nullopt; 213 return std::nullopt;
208 } 214 }
@@ -211,17 +217,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
211 return std::nullopt; 217 return std::nullopt;
212 } 218 }
213 219
214 const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) 220 const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)])
215 << cpu_page_bits; 221 << cpu_page_bits;
216 return cpu_addr_base + (gpu_addr & page_mask); 222 return dev_addr_base + (gpu_addr & page_mask);
217 } 223 }
218 224
219 const VAddr cpu_addr_base = 225 const DAddr dev_addr_base =
220 static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; 226 static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits;
221 return cpu_addr_base + (gpu_addr & big_page_mask); 227 return dev_addr_base + (gpu_addr & big_page_mask);
222} 228}
223 229
224std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { 230std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
225 size_t page_index{addr >> page_bits}; 231 size_t page_index{addr >> page_bits};
226 const size_t page_last{(addr + size + page_size - 1) >> page_bits}; 232 const size_t page_last{(addr + size + page_size - 1) >> page_bits};
227 while (page_index < page_last) { 233 while (page_index < page_last) {
@@ -274,7 +280,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) {
274 return {}; 280 return {};
275 } 281 }
276 282
277 return memory.GetPointer(*address); 283 return memory.GetPointer<u8>(*address);
278} 284}
279 285
280const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { 286const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
@@ -283,7 +289,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const {
283 return {}; 289 return {};
284 } 290 }
285 291
286 return memory.GetPointer(*address); 292 return memory.GetPointer<u8>(*address);
287} 293}
288 294
289#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. 295#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
@@ -367,25 +373,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
367 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 373 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
368 }; 374 };
369 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 375 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
370 const VAddr cpu_addr_base = 376 const DAddr dev_addr_base =
371 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 377 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
372 if constexpr (is_safe) { 378 if constexpr (is_safe) {
373 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 379 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
374 } 380 }
375 u8* physical = memory.GetPointer(cpu_addr_base); 381 u8* physical = memory.GetPointer<u8>(dev_addr_base);
376 std::memcpy(dest_buffer, physical, copy_amount); 382 std::memcpy(dest_buffer, physical, copy_amount);
377 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 383 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
378 }; 384 };
379 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 385 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
380 const VAddr cpu_addr_base = 386 const DAddr dev_addr_base =
381 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 387 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
382 if constexpr (is_safe) { 388 if constexpr (is_safe) {
383 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 389 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
384 } 390 }
385 if (!IsBigPageContinuous(page_index)) [[unlikely]] { 391 if (!IsBigPageContinuous(page_index)) [[unlikely]] {
386 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); 392 memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount);
387 } else { 393 } else {
388 u8* physical = memory.GetPointer(cpu_addr_base); 394 u8* physical = memory.GetPointer<u8>(dev_addr_base);
389 std::memcpy(dest_buffer, physical, copy_amount); 395 std::memcpy(dest_buffer, physical, copy_amount);
390 } 396 }
391 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 397 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
@@ -416,25 +422,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe
416 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 422 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
417 }; 423 };
418 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 424 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
419 const VAddr cpu_addr_base = 425 const DAddr dev_addr_base =
420 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 426 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
421 if constexpr (is_safe) { 427 if constexpr (is_safe) {
422 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 428 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
423 } 429 }
424 u8* physical = memory.GetPointer(cpu_addr_base); 430 u8* physical = memory.GetPointer<u8>(dev_addr_base);
425 std::memcpy(physical, src_buffer, copy_amount); 431 std::memcpy(physical, src_buffer, copy_amount);
426 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 432 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
427 }; 433 };
428 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 434 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
429 const VAddr cpu_addr_base = 435 const DAddr dev_addr_base =
430 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 436 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
431 if constexpr (is_safe) { 437 if constexpr (is_safe) {
432 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 438 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
433 } 439 }
434 if (!IsBigPageContinuous(page_index)) [[unlikely]] { 440 if (!IsBigPageContinuous(page_index)) [[unlikely]] {
435 memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); 441 memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount);
436 } else { 442 } else {
437 u8* physical = memory.GetPointer(cpu_addr_base); 443 u8* physical = memory.GetPointer<u8>(dev_addr_base);
438 std::memcpy(physical, src_buffer, copy_amount); 444 std::memcpy(physical, src_buffer, copy_amount);
439 } 445 }
440 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; 446 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
@@ -470,14 +476,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
470 [[maybe_unused]] std::size_t copy_amount) {}; 476 [[maybe_unused]] std::size_t copy_amount) {};
471 477
472 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 478 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
473 const VAddr cpu_addr_base = 479 const DAddr dev_addr_base =
474 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 480 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
475 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 481 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
476 }; 482 };
477 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 483 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
478 const VAddr cpu_addr_base = 484 const DAddr dev_addr_base =
479 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 485 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
480 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 486 rasterizer->FlushRegion(dev_addr_base, copy_amount, which);
481 }; 487 };
482 auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, 488 auto flush_short_pages = [&](std::size_t page_index, std::size_t offset,
483 std::size_t copy_amount) { 489 std::size_t copy_amount) {
@@ -495,15 +501,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
495 [[maybe_unused]] std::size_t copy_amount) { return false; }; 501 [[maybe_unused]] std::size_t copy_amount) { return false; };
496 502
497 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 503 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
498 const VAddr cpu_addr_base = 504 const DAddr dev_addr_base =
499 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 505 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
500 result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); 506 result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which);
501 return result; 507 return result;
502 }; 508 };
503 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 509 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
504 const VAddr cpu_addr_base = 510 const DAddr dev_addr_base =
505 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 511 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
506 result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); 512 result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which);
507 return result; 513 return result;
508 }; 514 };
509 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 515 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -517,7 +523,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size,
517} 523}
518 524
519size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { 525size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
520 std::optional<VAddr> old_page_addr{}; 526 std::optional<DAddr> old_page_addr{};
521 size_t range_so_far = 0; 527 size_t range_so_far = 0;
522 bool result{false}; 528 bool result{false};
523 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, 529 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
@@ -526,24 +532,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
526 return true; 532 return true;
527 }; 533 };
528 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 534 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
529 const VAddr cpu_addr_base = 535 const DAddr dev_addr_base =
530 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 536 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
531 if (old_page_addr && *old_page_addr != cpu_addr_base) { 537 if (old_page_addr && *old_page_addr != dev_addr_base) {
532 result = true; 538 result = true;
533 return true; 539 return true;
534 } 540 }
535 range_so_far += copy_amount; 541 range_so_far += copy_amount;
536 old_page_addr = {cpu_addr_base + copy_amount}; 542 old_page_addr = {dev_addr_base + copy_amount};
537 return false; 543 return false;
538 }; 544 };
539 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 545 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
540 const VAddr cpu_addr_base = 546 const DAddr dev_addr_base =
541 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 547 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
542 if (old_page_addr && *old_page_addr != cpu_addr_base) { 548 if (old_page_addr && *old_page_addr != dev_addr_base) {
543 return true; 549 return true;
544 } 550 }
545 range_so_far += copy_amount; 551 range_so_far += copy_amount;
546 old_page_addr = {cpu_addr_base + copy_amount}; 552 old_page_addr = {dev_addr_base + copy_amount};
547 return false; 553 return false;
548 }; 554 };
549 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 555 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -568,14 +574,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
568 [[maybe_unused]] std::size_t copy_amount) {}; 574 [[maybe_unused]] std::size_t copy_amount) {};
569 575
570 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 576 auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
571 const VAddr cpu_addr_base = 577 const DAddr dev_addr_base =
572 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 578 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
573 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 579 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
574 }; 580 };
575 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 581 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
576 const VAddr cpu_addr_base = 582 const DAddr dev_addr_base =
577 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 583 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
578 rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); 584 rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which);
579 }; 585 };
580 auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, 586 auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset,
581 std::size_t copy_amount) { 587 std::size_t copy_amount) {
@@ -587,7 +593,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 593
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 594void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 595 VideoCommon::CacheType which) {
590 Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( 596 Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data(
591 *this, gpu_src_addr, size); 597 *this, gpu_src_addr, size);
592 data.SetAddressAndSize(gpu_dest_addr, size); 598 data.SetAddressAndSize(gpu_dest_addr, size);
593 FlushRegion(gpu_dest_addr, size, which); 599 FlushRegion(gpu_dest_addr, size, which);
@@ -600,18 +606,18 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
600 const std::size_t page{(page_index & big_page_mask) + size}; 606 const std::size_t page{(page_index & big_page_mask) + size};
601 return page <= big_page_size; 607 return page <= big_page_size;
602 } 608 }
603 const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size}; 609 const std::size_t page{(gpu_addr & Core::DEVICE_PAGEMASK) + size};
604 return page <= Core::Memory::YUZU_PAGESIZE; 610 return page <= Core::DEVICE_PAGESIZE;
605 } 611 }
606 if (GetEntry<false>(gpu_addr) != EntryType::Mapped) { 612 if (GetEntry<false>(gpu_addr) != EntryType::Mapped) {
607 return false; 613 return false;
608 } 614 }
609 const std::size_t page{(gpu_addr & Core::Memory::YUZU_PAGEMASK) + size}; 615 const std::size_t page{(gpu_addr & Core::DEVICE_PAGEMASK) + size};
610 return page <= Core::Memory::YUZU_PAGESIZE; 616 return page <= Core::DEVICE_PAGESIZE;
611} 617}
612 618
613bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { 619bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const {
614 std::optional<VAddr> old_page_addr{}; 620 std::optional<DAddr> old_page_addr{};
615 bool result{true}; 621 bool result{true};
616 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, 622 auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
617 std::size_t copy_amount) { 623 std::size_t copy_amount) {
@@ -619,23 +625,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const
619 return true; 625 return true;
620 }; 626 };
621 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 627 auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
622 const VAddr cpu_addr_base = 628 const DAddr dev_addr_base =
623 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 629 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
624 if (old_page_addr && *old_page_addr != cpu_addr_base) { 630 if (old_page_addr && *old_page_addr != dev_addr_base) {
625 result = false; 631 result = false;
626 return true; 632 return true;
627 } 633 }
628 old_page_addr = {cpu_addr_base + copy_amount}; 634 old_page_addr = {dev_addr_base + copy_amount};
629 return false; 635 return false;
630 }; 636 };
631 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 637 auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
632 const VAddr cpu_addr_base = 638 const DAddr dev_addr_base =
633 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 639 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
634 if (old_page_addr && *old_page_addr != cpu_addr_base) { 640 if (old_page_addr && *old_page_addr != dev_addr_base) {
635 result = false; 641 result = false;
636 return true; 642 return true;
637 } 643 }
638 old_page_addr = {cpu_addr_base + copy_amount}; 644 old_page_addr = {dev_addr_base + copy_amount};
639 return false; 645 return false;
640 }; 646 };
641 auto check_short_pages = [&](std::size_t page_index, std::size_t offset, 647 auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
@@ -678,11 +684,11 @@ template <bool is_gpu_address>
678void MemoryManager::GetSubmappedRangeImpl( 684void MemoryManager::GetSubmappedRangeImpl(
679 GPUVAddr gpu_addr, std::size_t size, 685 GPUVAddr gpu_addr, std::size_t size,
680 boost::container::small_vector< 686 boost::container::small_vector<
681 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) 687 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result)
682 const { 688 const {
683 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> 689 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>>
684 last_segment{}; 690 last_segment{};
685 std::optional<VAddr> old_page_addr{}; 691 std::optional<DAddr> old_page_addr{};
686 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, 692 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
687 [[maybe_unused]] std::size_t offset, 693 [[maybe_unused]] std::size_t offset,
688 [[maybe_unused]] std::size_t copy_amount) { 694 [[maybe_unused]] std::size_t copy_amount) {
@@ -694,20 +700,20 @@ void MemoryManager::GetSubmappedRangeImpl(
694 const auto extend_size_big = [this, &split, &old_page_addr, 700 const auto extend_size_big = [this, &split, &old_page_addr,
695 &last_segment](std::size_t page_index, std::size_t offset, 701 &last_segment](std::size_t page_index, std::size_t offset,
696 std::size_t copy_amount) { 702 std::size_t copy_amount) {
697 const VAddr cpu_addr_base = 703 const DAddr dev_addr_base =
698 (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; 704 (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset;
699 if (old_page_addr) { 705 if (old_page_addr) {
700 if (*old_page_addr != cpu_addr_base) { 706 if (*old_page_addr != dev_addr_base) {
701 split(0, 0, 0); 707 split(0, 0, 0);
702 } 708 }
703 } 709 }
704 old_page_addr = {cpu_addr_base + copy_amount}; 710 old_page_addr = {dev_addr_base + copy_amount};
705 if (!last_segment) { 711 if (!last_segment) {
706 if constexpr (is_gpu_address) { 712 if constexpr (is_gpu_address) {
707 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; 713 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
708 last_segment = {new_base_addr, copy_amount}; 714 last_segment = {new_base_addr, copy_amount};
709 } else { 715 } else {
710 last_segment = {cpu_addr_base, copy_amount}; 716 last_segment = {dev_addr_base, copy_amount};
711 } 717 }
712 } else { 718 } else {
713 last_segment->second += copy_amount; 719 last_segment->second += copy_amount;
@@ -716,20 +722,20 @@ void MemoryManager::GetSubmappedRangeImpl(
716 const auto extend_size_short = [this, &split, &old_page_addr, 722 const auto extend_size_short = [this, &split, &old_page_addr,
717 &last_segment](std::size_t page_index, std::size_t offset, 723 &last_segment](std::size_t page_index, std::size_t offset,
718 std::size_t copy_amount) { 724 std::size_t copy_amount) {
719 const VAddr cpu_addr_base = 725 const DAddr dev_addr_base =
720 (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; 726 (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset;
721 if (old_page_addr) { 727 if (old_page_addr) {
722 if (*old_page_addr != cpu_addr_base) { 728 if (*old_page_addr != dev_addr_base) {
723 split(0, 0, 0); 729 split(0, 0, 0);
724 } 730 }
725 } 731 }
726 old_page_addr = {cpu_addr_base + copy_amount}; 732 old_page_addr = {dev_addr_base + copy_amount};
727 if (!last_segment) { 733 if (!last_segment) {
728 if constexpr (is_gpu_address) { 734 if constexpr (is_gpu_address) {
729 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; 735 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
730 last_segment = {new_base_addr, copy_amount}; 736 last_segment = {new_base_addr, copy_amount};
731 } else { 737 } else {
732 last_segment = {cpu_addr_base, copy_amount}; 738 last_segment = {dev_addr_base, copy_amount};
733 } 739 }
734 } else { 740 } else {
735 last_segment->second += copy_amount; 741 last_segment->second += copy_amount;
@@ -756,9 +762,12 @@ void MemoryManager::FlushCaching() {
756} 762}
757 763
758const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { 764const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const {
759 auto cpu_addr = GpuToCpuAddress(src_addr); 765 if (!IsContinuousRange(src_addr, size)) {
760 if (cpu_addr) { 766 return nullptr;
761 return memory.GetSpan(*cpu_addr, size); 767 }
768 auto dev_addr = GpuToCpuAddress(src_addr);
769 if (dev_addr) {
770 return memory.GetSpan(*dev_addr, size);
762 } 771 }
763 return nullptr; 772 return nullptr;
764} 773}
@@ -767,9 +776,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) {
767 if (!IsContinuousRange(src_addr, size)) { 776 if (!IsContinuousRange(src_addr, size)) {
768 return nullptr; 777 return nullptr;
769 } 778 }
770 auto cpu_addr = GpuToCpuAddress(src_addr); 779 auto dev_addr = GpuToCpuAddress(src_addr);
771 if (cpu_addr) { 780 if (dev_addr) {
772 return memory.GetSpan(*cpu_addr, size); 781 return memory.GetSpan(*dev_addr, size);
773 } 782 }
774 return nullptr; 783 return nullptr;
775} 784}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 9b311b9e5..c5255f36c 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -15,8 +15,8 @@
15#include "common/range_map.h" 15#include "common/range_map.h"
16#include "common/scratch_buffer.h" 16#include "common/scratch_buffer.h"
17#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
18#include "core/memory.h"
19#include "video_core/cache_types.h" 18#include "video_core/cache_types.h"
19#include "video_core/host1x/gpu_device_memory_manager.h"
20#include "video_core/pte_kind.h" 20#include "video_core/pte_kind.h"
21 21
22namespace VideoCore { 22namespace VideoCore {
@@ -28,10 +28,6 @@ class InvalidationAccumulator;
28} 28}
29 29
30namespace Core { 30namespace Core {
31class DeviceMemory;
32namespace Memory {
33class Memory;
34} // namespace Memory
35class System; 31class System;
36} // namespace Core 32} // namespace Core
37 33
@@ -41,6 +37,9 @@ class MemoryManager final {
41public: 37public:
42 explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, 38 explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
43 u64 big_page_bits_ = 16, u64 page_bits_ = 12); 39 u64 big_page_bits_ = 16, u64 page_bits_ = 12);
40 explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
41 u64 address_space_bits_ = 40, u64 big_page_bits_ = 16,
42 u64 page_bits_ = 12);
44 ~MemoryManager(); 43 ~MemoryManager();
45 44
46 size_t GetID() const { 45 size_t GetID() const {
@@ -50,9 +49,9 @@ public:
50 /// Binds a renderer to the memory manager. 49 /// Binds a renderer to the memory manager.
51 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 50 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
52 51
53 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; 52 [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const;
54 53
55 [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; 54 [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
56 55
57 template <typename T> 56 template <typename T>
58 [[nodiscard]] T Read(GPUVAddr addr) const; 57 [[nodiscard]] T Read(GPUVAddr addr) const;
@@ -69,7 +68,7 @@ public:
69 if (!address) { 68 if (!address) {
70 return {}; 69 return {};
71 } 70 }
72 return memory.GetPointer(*address); 71 return memory.GetPointer<T>(*address);
73 } 72 }
74 73
75 template <typename T> 74 template <typename T>
@@ -110,7 +109,7 @@ public:
110 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; 109 [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
111 110
112 /** 111 /**
113 * Checks if a gpu region is mapped by a single range of cpu addresses. 112 * Checks if a gpu region is mapped by a single range of device addresses.
114 */ 113 */
115 [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; 114 [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const;
116 115
@@ -120,14 +119,14 @@ public:
120 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; 119 [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
121 120
122 /** 121 /**
123 * Returns a vector with all the subranges of cpu addresses mapped beneath. 122 * Returns a vector with all the subranges of device addresses mapped beneath.
124 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty 123 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
125 * vector will be returned; 124 * vector will be returned;
126 */ 125 */
127 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( 126 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
128 GPUVAddr gpu_addr, std::size_t size) const; 127 GPUVAddr gpu_addr, std::size_t size) const;
129 128
130 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 129 GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size,
131 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 130 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
132 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); 131 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
133 void Unmap(GPUVAddr gpu_addr, std::size_t size); 132 void Unmap(GPUVAddr gpu_addr, std::size_t size);
@@ -186,12 +185,11 @@ private:
186 void GetSubmappedRangeImpl( 185 void GetSubmappedRangeImpl(
187 GPUVAddr gpu_addr, std::size_t size, 186 GPUVAddr gpu_addr, std::size_t size,
188 boost::container::small_vector< 187 boost::container::small_vector<
189 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& 188 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>&
190 result) const; 189 result) const;
191 190
192 Core::System& system; 191 Core::System& system;
193 Core::Memory::Memory& memory; 192 MaxwellDeviceMemoryManager& memory;
194 Core::DeviceMemory& device_memory;
195 193
196 const u64 address_space_bits; 194 const u64 address_space_bits;
197 const u64 page_bits; 195 const u64 page_bits;
@@ -218,11 +216,11 @@ private:
218 std::vector<u64> big_entries; 216 std::vector<u64> big_entries;
219 217
220 template <EntryType entry_type> 218 template <EntryType entry_type>
221 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 219 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
222 PTEKind kind); 220 PTEKind kind);
223 221
224 template <EntryType entry_type> 222 template <EntryType entry_type>
225 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, 223 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size,
226 PTEKind kind); 224 PTEKind kind);
227 225
228 template <bool is_big_page> 226 template <bool is_big_page>
@@ -233,11 +231,11 @@ private:
233 231
234 Common::MultiLevelPageTable<u32> page_table; 232 Common::MultiLevelPageTable<u32> page_table;
235 Common::RangeMap<GPUVAddr, PTEKind> kind_map; 233 Common::RangeMap<GPUVAddr, PTEKind> kind_map;
236 Common::VirtualBuffer<u32> big_page_table_cpu; 234 Common::VirtualBuffer<u32> big_page_table_dev;
237 235
238 std::vector<u64> big_page_continuous; 236 std::vector<u64> big_page_continuous;
239 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; 237 boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{};
240 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; 238 boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{};
241 239
242 mutable std::mutex guard; 240 mutable std::mutex guard;
243 241
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index a64404ce4..4861b123a 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -18,9 +18,9 @@
18 18
19#include "common/assert.h" 19#include "common/assert.h"
20#include "common/settings.h" 20#include "common/settings.h"
21#include "core/memory.h"
22#include "video_core/control/channel_state_cache.h" 21#include "video_core/control/channel_state_cache.h"
23#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/host1x/gpu_device_memory_manager.h"
24#include "video_core/memory_manager.h" 24#include "video_core/memory_manager.h"
25#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h" 26#include "video_core/texture_cache/slot_vector.h"
@@ -102,18 +102,19 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo
102class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 102class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
103public: 103public:
104 explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, 104 explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_,
105 Core::Memory::Memory& cpu_memory_) 105 Tegra::MaxwellDeviceMemoryManager& device_memory_)
106 : rasterizer{rasterizer_}, 106 : rasterizer{rasterizer_},
107 // Use reinterpret_cast instead of static_cast as workaround for 107 // Use reinterpret_cast instead of static_cast as workaround for
108 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) 108 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
109 cpu_memory{cpu_memory_}, streams{{ 109 device_memory{device_memory_},
110 {CounterStream{reinterpret_cast<QueryCache&>(*this), 110 streams{{
111 VideoCore::QueryType::SamplesPassed}}, 111 {CounterStream{reinterpret_cast<QueryCache&>(*this),
112 {CounterStream{reinterpret_cast<QueryCache&>(*this), 112 VideoCore::QueryType::SamplesPassed}},
113 VideoCore::QueryType::PrimitivesGenerated}}, 113 {CounterStream{reinterpret_cast<QueryCache&>(*this),
114 {CounterStream{reinterpret_cast<QueryCache&>(*this), 114 VideoCore::QueryType::PrimitivesGenerated}},
115 VideoCore::QueryType::TfbPrimitivesWritten}}, 115 {CounterStream{reinterpret_cast<QueryCache&>(*this),
116 }} { 116 VideoCore::QueryType::TfbPrimitivesWritten}},
117 }} {
117 (void)slot_async_jobs.insert(); // Null value 118 (void)slot_async_jobs.insert(); // Null value
118 } 119 }
119 120
@@ -322,13 +323,14 @@ private:
322 local_lock.unlock(); 323 local_lock.unlock();
323 if (timestamp) { 324 if (timestamp) {
324 u64 timestamp_value = *timestamp; 325 u64 timestamp_value = *timestamp;
325 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64)); 326 device_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value,
326 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); 327 sizeof(u64));
328 device_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
327 rasterizer.InvalidateRegion(address, sizeof(u64) * 2, 329 rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
328 VideoCommon::CacheType::NoQueryCache); 330 VideoCommon::CacheType::NoQueryCache);
329 } else { 331 } else {
330 u32 small_value = static_cast<u32>(value); 332 u32 small_value = static_cast<u32>(value);
331 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); 333 device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
332 rasterizer.InvalidateRegion(address, sizeof(u32), 334 rasterizer.InvalidateRegion(address, sizeof(u32),
333 VideoCommon::CacheType::NoQueryCache); 335 VideoCommon::CacheType::NoQueryCache);
334 } 336 }
@@ -342,7 +344,7 @@ private:
342 SlotVector<AsyncJob> slot_async_jobs; 344 SlotVector<AsyncJob> slot_async_jobs;
343 345
344 VideoCore::RasterizerInterface& rasterizer; 346 VideoCore::RasterizerInterface& rasterizer;
345 Core::Memory::Memory& cpu_memory; 347 Tegra::MaxwellDeviceMemoryManager& device_memory;
346 348
347 mutable std::recursive_mutex mutex; 349 mutable std::recursive_mutex mutex;
348 350
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 1d786b3a7..aca6a6447 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23 23
24class QueryBase { 24class QueryBase {
25public: 25public:
26 VAddr guest_address{}; 26 DAddr guest_address{};
27 QueryFlagBits flags{}; 27 QueryFlagBits flags{};
28 u64 value{}; 28 u64 value{};
29 29
@@ -32,7 +32,7 @@ protected:
32 QueryBase() = default; 32 QueryBase() = default;
33 33
34 // Parameterized constructor 34 // Parameterized constructor
35 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) 35 QueryBase(DAddr address, QueryFlagBits flags_, u64 value_)
36 : guest_address(address), flags(flags_), value{value_} {} 36 : guest_address(address), flags(flags_), value{value_} {}
37}; 37};
38 38
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 94f0c4466..08b779055 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -15,9 +15,9 @@
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/scope_exit.h" 16#include "common/scope_exit.h"
17#include "common/settings.h" 17#include "common/settings.h"
18#include "core/memory.h"
19#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h" 19#include "video_core/gpu.h"
20#include "video_core/host1x/gpu_device_memory_manager.h"
21#include "video_core/memory_manager.h" 21#include "video_core/memory_manager.h"
22#include "video_core/query_cache/bank_base.h" 22#include "video_core/query_cache/bank_base.h"
23#include "video_core/query_cache/query_base.h" 23#include "video_core/query_cache/query_base.h"
@@ -113,9 +113,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
113 using RuntimeType = typename Traits::RuntimeType; 113 using RuntimeType = typename Traits::RuntimeType;
114 114
115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, 115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
116 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) 116 Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
117 Tegra::GPU& gpu_)
117 : owner{owner_}, rasterizer{rasterizer_}, 118 : owner{owner_}, rasterizer{rasterizer_},
118 cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { 119 device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} {
119 streamer_mask = 0; 120 streamer_mask = 0;
120 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { 121 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
121 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); 122 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@@ -158,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
158 159
159 QueryCacheBase<Traits>* owner; 160 QueryCacheBase<Traits>* owner;
160 VideoCore::RasterizerInterface& rasterizer; 161 VideoCore::RasterizerInterface& rasterizer;
161 Core::Memory::Memory& cpu_memory; 162 Tegra::MaxwellDeviceMemoryManager& device_memory;
162 RuntimeType& runtime; 163 RuntimeType& runtime;
163 Tegra::GPU& gpu; 164 Tegra::GPU& gpu;
164 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; 165 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
@@ -171,10 +172,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
171template <typename Traits> 172template <typename Traits>
172QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, 173QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
173 VideoCore::RasterizerInterface& rasterizer_, 174 VideoCore::RasterizerInterface& rasterizer_,
174 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) 175 Tegra::MaxwellDeviceMemoryManager& device_memory_,
176 RuntimeType& runtime_)
175 : cached_queries{} { 177 : cached_queries{} {
176 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( 178 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
177 this, rasterizer_, cpu_memory_, runtime_, gpu_); 179 this, rasterizer_, device_memory_, runtime_, gpu_);
178} 180}
179 181
180template <typename Traits> 182template <typename Traits>
@@ -240,7 +242,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
240 if (!cpu_addr_opt) [[unlikely]] { 242 if (!cpu_addr_opt) [[unlikely]] {
241 return; 243 return;
242 } 244 }
243 VAddr cpu_addr = *cpu_addr_opt; 245 DAddr cpu_addr = *cpu_addr_opt;
244 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); 246 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
245 auto* query = streamer->GetQuery(new_query_id); 247 auto* query = streamer->GetQuery(new_query_id);
246 if (is_fence) { 248 if (is_fence) {
@@ -250,13 +252,12 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
250 query_location.stream_id.Assign(static_cast<u32>(streamer_id)); 252 query_location.stream_id.Assign(static_cast<u32>(streamer_id));
251 query_location.query_id.Assign(static_cast<u32>(new_query_id)); 253 query_location.query_id.Assign(static_cast<u32>(new_query_id));
252 const auto gen_caching_indexing = [](VAddr cur_addr) { 254 const auto gen_caching_indexing = [](VAddr cur_addr) {
253 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 255 return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS,
254 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 256 static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK));
255 }; 257 };
256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); 258 u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); 259 u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence; 260 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
259
260 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, 261 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
261 pointer, pointer_timestamp] { 262 pointer, pointer_timestamp] {
262 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { 263 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
@@ -323,8 +324,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
323template <typename Traits> 324template <typename Traits>
324void QueryCacheBase<Traits>::UnregisterPending() { 325void QueryCacheBase<Traits>::UnregisterPending() {
325 const auto gen_caching_indexing = [](VAddr cur_addr) { 326 const auto gen_caching_indexing = [](VAddr cur_addr) {
326 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 327 return std::make_pair<u64, u32>(cur_addr >> Core::DEVICE_PAGEBITS,
327 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 328 static_cast<u32>(cur_addr & Core::DEVICE_PAGEMASK));
328 }; 329 };
329 std::scoped_lock lock(cache_mutex); 330 std::scoped_lock lock(cache_mutex);
330 for (QueryLocation loc : impl->pending_unregister) { 331 for (QueryLocation loc : impl->pending_unregister) {
@@ -388,7 +389,7 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
388 } 389 }
389 VAddr cpu_addr = *cpu_addr_opt; 390 VAddr cpu_addr = *cpu_addr_opt;
390 std::scoped_lock lock(cache_mutex); 391 std::scoped_lock lock(cache_mutex);
391 auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); 392 auto it1 = cached_queries.find(cpu_addr >> Core::DEVICE_PAGEBITS);
392 if (it1 == cached_queries.end()) { 393 if (it1 == cached_queries.end()) {
393 return VideoCommon::LookupData{ 394 return VideoCommon::LookupData{
394 .address = cpu_addr, 395 .address = cpu_addr,
@@ -396,10 +397,10 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
396 }; 397 };
397 } 398 }
398 auto& sub_container = it1->second; 399 auto& sub_container = it1->second;
399 auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); 400 auto it_current = sub_container.find(cpu_addr & Core::DEVICE_PAGEMASK);
400 401
401 if (it_current == sub_container.end()) { 402 if (it_current == sub_container.end()) {
402 auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); 403 auto it_current_2 = sub_container.find((cpu_addr & Core::DEVICE_PAGEMASK) + 4);
403 if (it_current_2 == sub_container.end()) { 404 if (it_current_2 == sub_container.end()) {
404 return VideoCommon::LookupData{ 405 return VideoCommon::LookupData{
405 .address = cpu_addr, 406 .address = cpu_addr,
@@ -559,7 +560,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
559 } 560 }
560 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && 561 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
561 False(query_base->flags & QueryFlagBits::IsGuestSynced)) { 562 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
562 auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); 563 auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
563 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { 564 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
564 std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); 565 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
565 return false; 566 return false;
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
index 07be421c6..c12fb75ef 100644
--- a/src/video_core/query_cache/query_cache_base.h
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -13,15 +13,11 @@
13#include "common/assert.h" 13#include "common/assert.h"
14#include "common/bit_field.h" 14#include "common/bit_field.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "core/memory.h"
17#include "video_core/control/channel_state_cache.h" 16#include "video_core/control/channel_state_cache.h"
17#include "video_core/host1x/gpu_device_memory_manager.h"
18#include "video_core/query_cache/query_base.h" 18#include "video_core/query_cache/query_base.h"
19#include "video_core/query_cache/types.h" 19#include "video_core/query_cache/types.h"
20 20
21namespace Core::Memory {
22class Memory;
23}
24
25namespace VideoCore { 21namespace VideoCore {
26class RasterizerInterface; 22class RasterizerInterface;
27} 23}
@@ -53,7 +49,8 @@ public:
53 }; 49 };
54 50
55 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, 51 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); 52 Tegra::MaxwellDeviceMemoryManager& device_memory_,
53 RuntimeType& runtime_);
57 54
58 ~QueryCacheBase(); 55 ~QueryCacheBase();
59 56
@@ -125,10 +122,10 @@ protected:
125 const u64 addr_begin = addr; 122 const u64 addr_begin = addr;
126 const u64 addr_end = addr_begin + size; 123 const u64 addr_end = addr_begin + size;
127 124
128 const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; 125 const u64 page_end = addr_end >> Core::DEVICE_PAGEBITS;
129 std::scoped_lock lock(cache_mutex); 126 std::scoped_lock lock(cache_mutex);
130 for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { 127 for (u64 page = addr_begin >> Core::DEVICE_PAGEBITS; page <= page_end; ++page) {
131 const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; 128 const u64 page_start = page << Core::DEVICE_PAGEBITS;
132 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { 129 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
133 const u64 cache_begin = page_start + query_location; 130 const u64 cache_begin = page_start + query_location;
134 const u64 cache_end = cache_begin + sizeof(u32); 131 const u64 cache_end = cache_begin + sizeof(u32);
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
deleted file mode 100644
index f200a650f..000000000
--- a/src/video_core/rasterizer_accelerated.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <atomic>
5
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/div_ceil.h"
9#include "core/memory.h"
10#include "video_core/rasterizer_accelerated.h"
11
12namespace VideoCore {
13
14using namespace Core::Memory;
15
16RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_)
17 : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {}
18
19RasterizerAccelerated::~RasterizerAccelerated() = default;
20
21void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
22 u64 uncache_begin = 0;
23 u64 cache_begin = 0;
24 u64 uncache_bytes = 0;
25 u64 cache_bytes = 0;
26
27 std::atomic_thread_fence(std::memory_order_acquire);
28 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
29 for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
30 std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
31
32 if (delta > 0) {
33 ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
34 } else if (delta < 0) {
35 ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
36 } else {
37 ASSERT_MSG(false, "Delta must be non-zero!");
38 }
39
40 // Adds or subtracts 1, as count is a unsigned 8-bit value
41 count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
42
43 // Assume delta is either -1 or 1
44 if (count.load(std::memory_order::relaxed) == 0) {
45 if (uncache_bytes == 0) {
46 uncache_begin = page;
47 }
48 uncache_bytes += YUZU_PAGESIZE;
49 } else if (uncache_bytes > 0) {
50 cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes,
51 false);
52 uncache_bytes = 0;
53 }
54 if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
55 if (cache_bytes == 0) {
56 cache_begin = page;
57 }
58 cache_bytes += YUZU_PAGESIZE;
59 } else if (cache_bytes > 0) {
60 cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
61 cache_bytes = 0;
62 }
63 }
64 if (uncache_bytes > 0) {
65 cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false);
66 }
67 if (cache_bytes > 0) {
68 cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
69 }
70}
71
72} // namespace VideoCore
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
deleted file mode 100644
index e6c0ea87a..000000000
--- a/src/video_core/rasterizer_accelerated.h
+++ /dev/null
@@ -1,49 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <atomic>
8
9#include "common/common_types.h"
10#include "video_core/rasterizer_interface.h"
11
12namespace Core::Memory {
13class Memory;
14}
15
16namespace VideoCore {
17
18/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
19class RasterizerAccelerated : public RasterizerInterface {
20public:
21 explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_);
22 ~RasterizerAccelerated() override;
23
24 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
25
26private:
27 class CacheEntry final {
28 public:
29 CacheEntry() = default;
30
31 std::atomic_uint16_t& Count(std::size_t page) {
32 return values[page & 3];
33 }
34
35 const std::atomic_uint16_t& Count(std::size_t page) const {
36 return values[page & 3];
37 }
38
39 private:
40 std::array<std::atomic_uint16_t, 4> values{};
41 };
42 static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
43
44 using CachedPages = std::array<CacheEntry, 0x2000000>;
45 std::unique_ptr<CachedPages> cached_pages;
46 Core::Memory::Memory& cpu_memory;
47};
48
49} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 49224ca85..8fa4e4d9a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -86,35 +86,35 @@ public:
86 virtual void FlushAll() = 0; 86 virtual void FlushAll() = 0;
87 87
88 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 88 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
89 virtual void FlushRegion(VAddr addr, u64 size, 89 virtual void FlushRegion(DAddr addr, u64 size,
90 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 90 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
91 91
92 /// Check if the the specified memory area requires flushing to CPU Memory. 92 /// Check if the the specified memory area requires flushing to CPU Memory.
93 virtual bool MustFlushRegion(VAddr addr, u64 size, 93 virtual bool MustFlushRegion(DAddr addr, u64 size,
94 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 94 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
95 95
96 virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; 96 virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0;
97 97
98 /// Notify rasterizer that any caches of the specified region should be invalidated 98 /// Notify rasterizer that any caches of the specified region should be invalidated
99 virtual void InvalidateRegion(VAddr addr, u64 size, 99 virtual void InvalidateRegion(DAddr addr, u64 size,
100 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
101 101
102 virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { 102 virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) {
103 for (const auto& [cpu_addr, size] : sequences) { 103 for (const auto& [cpu_addr, size] : sequences) {
104 InvalidateRegion(cpu_addr, size); 104 InvalidateRegion(cpu_addr, size);
105 } 105 }
106 } 106 }
107 107
108 /// Notify rasterizer that any caches of the specified region are desync with guest 108 /// Notify rasterizer that any caches of the specified region are desync with guest
109 virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; 109 virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0;
110 110
111 virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; 111 virtual bool OnCPUWrite(PAddr addr, u64 size) = 0;
112 112
113 /// Sync memory between guest and host. 113 /// Sync memory between guest and host.
114 virtual void InvalidateGPUCache() = 0; 114 virtual void InvalidateGPUCache() = 0;
115 115
116 /// Unmap memory range 116 /// Unmap memory range
117 virtual void UnmapMemory(VAddr addr, u64 size) = 0; 117 virtual void UnmapMemory(DAddr addr, u64 size) = 0;
118 118
119 /// Remap GPU memory range. This means underneath backing memory changed 119 /// Remap GPU memory range. This means underneath backing memory changed
120 virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; 120 virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0;
@@ -122,7 +122,7 @@ public:
122 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 122 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
123 /// and invalidated 123 /// and invalidated
124 virtual void FlushAndInvalidateRegion( 124 virtual void FlushAndInvalidateRegion(
125 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 125 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
126 126
127 /// Notify the host renderer to wait for previous primitive and compute operations. 127 /// Notify the host renderer to wait for previous primitive and compute operations.
128 virtual void WaitForIdle() = 0; 128 virtual void WaitForIdle() = 0;
@@ -157,13 +157,10 @@ public:
157 157
158 /// Attempt to use a faster method to display the framebuffer to screen 158 /// Attempt to use a faster method to display the framebuffer to screen
159 [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, 159 [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config,
160 VAddr framebuffer_addr, u32 pixel_stride) { 160 DAddr framebuffer_addr, u32 pixel_stride) {
161 return false; 161 return false;
162 } 162 }
163 163
164 /// Increase/decrease the number of object in pages touching the specified region
165 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
166
167 /// Initialize disk cached resources for the game being emulated 164 /// Initialize disk cached resources for the game being emulated
168 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 165 virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
169 const DiskResourceLoadCallback& callback) {} 166 const DiskResourceLoadCallback& callback) {}
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 4f1d5b548..abfabb65b 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -2,7 +2,6 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/alignment.h" 4#include "common/alignment.h"
5#include "core/memory.h"
6#include "video_core/control/channel_state.h" 5#include "video_core/control/channel_state.h"
7#include "video_core/host1x/host1x.h" 6#include "video_core/host1x/host1x.h"
8#include "video_core/memory_manager.h" 7#include "video_core/memory_manager.h"
@@ -19,8 +18,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
19 return true; 18 return true;
20} 19}
21 20
22RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) 21RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {}
23 : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {}
24RasterizerNull::~RasterizerNull() = default; 22RasterizerNull::~RasterizerNull() = default;
25 23
26void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} 24void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
@@ -45,25 +43,25 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
45 u32 size) {} 43 u32 size) {}
46void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} 44void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {}
47void RasterizerNull::FlushAll() {} 45void RasterizerNull::FlushAll() {}
48void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 46void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
49bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { 47bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {
50 return false; 48 return false;
51} 49}
52void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 50void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
53bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { 51bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) {
54 return false; 52 return false;
55} 53}
56void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} 54void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {}
57VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { 55VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) {
58 VideoCore::RasterizerDownloadArea new_area{ 56 VideoCore::RasterizerDownloadArea new_area{
59 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), 57 .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE),
60 .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), 58 .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE),
61 .preemtive = true, 59 .preemtive = true,
62 }; 60 };
63 return new_area; 61 return new_area;
64} 62}
65void RasterizerNull::InvalidateGPUCache() {} 63void RasterizerNull::InvalidateGPUCache() {}
66void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} 64void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {}
67void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} 65void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {}
68void RasterizerNull::SignalFence(std::function<void()>&& func) { 66void RasterizerNull::SignalFence(std::function<void()>&& func) {
69 func(); 67 func();
@@ -78,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) {
78} 76}
79void RasterizerNull::SignalReference() {} 77void RasterizerNull::SignalReference() {}
80void RasterizerNull::ReleaseFences(bool) {} 78void RasterizerNull::ReleaseFences(bool) {}
81void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 79void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {}
82void RasterizerNull::WaitForIdle() {} 80void RasterizerNull::WaitForIdle() {}
83void RasterizerNull::FragmentBarrier() {} 81void RasterizerNull::FragmentBarrier() {}
84void RasterizerNull::TiledCacheBarrier() {} 82void RasterizerNull::TiledCacheBarrier() {}
@@ -95,7 +93,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac
95void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 93void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
96 std::span<const u8> memory) {} 94 std::span<const u8> memory) {}
97bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, 95bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config,
98 VAddr framebuffer_addr, u32 pixel_stride) { 96 DAddr framebuffer_addr, u32 pixel_stride) {
99 return true; 97 return true;
100} 98}
101void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, 99void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 23001eeb8..a5789604f 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -6,7 +6,6 @@
6#include "common/common_types.h" 6#include "common/common_types.h"
7#include "video_core/control/channel_state_cache.h" 7#include "video_core/control/channel_state_cache.h"
8#include "video_core/engines/maxwell_dma.h" 8#include "video_core/engines/maxwell_dma.h"
9#include "video_core/rasterizer_accelerated.h"
10#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
11 10
12namespace Core { 11namespace Core {
@@ -32,10 +31,10 @@ public:
32 } 31 }
33}; 32};
34 33
35class RasterizerNull final : public VideoCore::RasterizerAccelerated, 34class RasterizerNull final : public VideoCore::RasterizerInterface,
36 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 35 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
37public: 36public:
38 explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); 37 explicit RasterizerNull(Tegra::GPU& gpu);
39 ~RasterizerNull() override; 38 ~RasterizerNull() override;
40 39
41 void Draw(bool is_indexed, u32 instance_count) override; 40 void Draw(bool is_indexed, u32 instance_count) override;
@@ -48,17 +47,17 @@ public:
48 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 47 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
49 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 48 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
50 void FlushAll() override; 49 void FlushAll() override;
51 void FlushRegion(VAddr addr, u64 size, 50 void FlushRegion(DAddr addr, u64 size,
52 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 51 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
53 bool MustFlushRegion(VAddr addr, u64 size, 52 bool MustFlushRegion(DAddr addr, u64 size,
54 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 53 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
55 void InvalidateRegion(VAddr addr, u64 size, 54 void InvalidateRegion(DAddr addr, u64 size,
56 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 55 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
57 void OnCacheInvalidation(VAddr addr, u64 size) override; 56 void OnCacheInvalidation(DAddr addr, u64 size) override;
58 bool OnCPUWrite(VAddr addr, u64 size) override; 57 bool OnCPUWrite(DAddr addr, u64 size) override;
59 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 58 VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override;
60 void InvalidateGPUCache() override; 59 void InvalidateGPUCache() override;
61 void UnmapMemory(VAddr addr, u64 size) override; 60 void UnmapMemory(DAddr addr, u64 size) override;
62 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 61 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
63 void SignalFence(std::function<void()>&& func) override; 62 void SignalFence(std::function<void()>&& func) override;
64 void SyncOperation(std::function<void()>&& func) override; 63 void SyncOperation(std::function<void()>&& func) override;
@@ -66,7 +65,7 @@ public:
66 void SignalReference() override; 65 void SignalReference() override;
67 void ReleaseFences(bool force) override; 66 void ReleaseFences(bool force) override;
68 void FlushAndInvalidateRegion( 67 void FlushAndInvalidateRegion(
69 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 68 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
70 void WaitForIdle() override; 69 void WaitForIdle() override;
71 void FragmentBarrier() override; 70 void FragmentBarrier() override;
72 void TiledCacheBarrier() override; 71 void TiledCacheBarrier() override;
@@ -78,7 +77,7 @@ public:
78 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 77 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
79 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 78 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
80 std::span<const u8> memory) override; 79 std::span<const u8> memory) override;
81 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 80 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
82 u32 pixel_stride) override; 81 u32 pixel_stride) override;
83 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 82 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
84 const VideoCore::DiskResourceLoadCallback& callback) override; 83 const VideoCore::DiskResourceLoadCallback& callback) override;
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp
index be92cc2f4..078feb925 100644
--- a/src/video_core/renderer_null/renderer_null.cpp
+++ b/src/video_core/renderer_null/renderer_null.cpp
@@ -7,10 +7,9 @@
7 7
8namespace Null { 8namespace Null {
9 9
10RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 10RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
11 Tegra::GPU& gpu,
12 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 11 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
13 : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} 12 : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {}
14 13
15RendererNull::~RendererNull() = default; 14RendererNull::~RendererNull() = default;
16 15
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h
index 967ff5645..9531b43f6 100644
--- a/src/video_core/renderer_null/renderer_null.h
+++ b/src/video_core/renderer_null/renderer_null.h
@@ -13,8 +13,7 @@ namespace Null {
13 13
14class RendererNull final : public VideoCore::RendererBase { 14class RendererNull final : public VideoCore::RendererBase {
15public: 15public:
16 explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, 16 explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
17 Tegra::GPU& gpu,
18 std::unique_ptr<Core::Frontend::GraphicsContext> context); 17 std::unique_ptr<Core::Frontend::GraphicsContext> context);
19 ~RendererNull() override; 18 ~RendererNull() override;
20 19
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 517ac14dd..ade72e1f9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{
47} // Anonymous namespace 47} // Anonymous namespace
48 48
49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) 49Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
50 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} 50 : VideoCommon::BufferBase(null_params) {}
51 51
52Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 52Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_)
53 VAddr cpu_addr_, u64 size_bytes_) 53 : VideoCommon::BufferBase(cpu_addr_, size_bytes_) {
54 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) {
55 buffer.Create(); 54 buffer.Create();
56 if (runtime.device.HasDebuggingToolAttached()) { 55 if (runtime.device.HasDebuggingToolAttached()) {
57 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); 56 const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr());
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 2c18de166..af34c272b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_cache_base.h" 11#include "video_core/buffer_cache/buffer_cache_base.h"
12#include "video_core/buffer_cache/memory_tracker_base.h" 12#include "video_core/buffer_cache/memory_tracker_base.h"
13#include "video_core/rasterizer_interface.h"
14#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" 15#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
@@ -19,10 +18,9 @@ namespace OpenGL {
19 18
20class BufferCacheRuntime; 19class BufferCacheRuntime;
21 20
22class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { 21class Buffer : public VideoCommon::BufferBase {
23public: 22public:
24 explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, 23 explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr, u64 size_bytes);
25 u64 size_bytes);
26 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); 24 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams);
27 25
28 void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; 26 void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept;
@@ -244,7 +242,7 @@ struct BufferCacheParams {
244 using Runtime = OpenGL::BufferCacheRuntime; 242 using Runtime = OpenGL::BufferCacheRuntime;
245 using Buffer = OpenGL::Buffer; 243 using Buffer = OpenGL::Buffer;
246 using Async_Buffer = OpenGL::StagingBufferMap; 244 using Async_Buffer = OpenGL::StagingBufferMap;
247 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; 245 using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>;
248 246
249 static constexpr bool IS_OPENGL = true; 247 static constexpr bool IS_OPENGL = true;
250 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; 248 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index fef7360ed..2147d587f 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -35,8 +35,9 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
35 35
36} // Anonymous namespace 36} // Anonymous namespace
37 37
38QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) 38QueryCache::QueryCache(RasterizerOpenGL& rasterizer_,
39 : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { 39 Tegra::MaxwellDeviceMemoryManager& device_memory_)
40 : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} {
40 EnableCounters(); 41 EnableCounters();
41} 42}
42 43
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 0721e0b3d..38118f355 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/host1x/gpu_device_memory_manager.h"
11#include "video_core/query_cache.h" 12#include "video_core/query_cache.h"
12#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -28,7 +29,8 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28class QueryCache final 29class QueryCache final
29 : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { 30 : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 31public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); 32 explicit QueryCache(RasterizerOpenGL& rasterizer_,
33 Tegra::MaxwellDeviceMemoryManager& device_memory_);
32 ~QueryCache(); 34 ~QueryCache();
33 35
34 OGLQuery AllocateQuery(VideoCore::QueryType type); 36 OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7a5fad735..d5354ef2d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy
70} // Anonymous namespace 70} // Anonymous namespace
71 71
72RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 72RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
73 Core::Memory::Memory& cpu_memory_, const Device& device_, 73 Tegra::MaxwellDeviceMemoryManager& device_memory_,
74 ScreenInfo& screen_info_, ProgramManager& program_manager_, 74 const Device& device_, ScreenInfo& screen_info_,
75 StateTracker& state_tracker_) 75 ProgramManager& program_manager_, StateTracker& state_tracker_)
76 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), 76 : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_),
77 program_manager(program_manager_), state_tracker(state_tracker_), 77 program_manager(program_manager_), state_tracker(state_tracker_),
78 texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), 78 texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
79 texture_cache(texture_cache_runtime, *this), 79 texture_cache(texture_cache_runtime, device_memory_),
80 buffer_cache_runtime(device, staging_buffer_pool), 80 buffer_cache_runtime(device, staging_buffer_pool),
81 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 81 buffer_cache(device_memory_, buffer_cache_runtime),
82 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, 82 shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache,
83 state_tracker, gpu.ShaderNotify()), 83 program_manager, state_tracker, gpu.ShaderNotify()),
84 query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), 84 query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache),
85 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 85 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
86 blit_image(program_manager_) {} 86 blit_image(program_manager_) {}
87 87
@@ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
475 475
476void RasterizerOpenGL::FlushAll() {} 476void RasterizerOpenGL::FlushAll() {}
477 477
478void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 478void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
479 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 479 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
480 if (addr == 0 || size == 0) { 480 if (addr == 0 || size == 0) {
481 return; 481 return;
@@ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
493 } 493 }
494} 494}
495 495
496bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 496bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
497 if ((True(which & VideoCommon::CacheType::BufferCache))) { 497 if ((True(which & VideoCommon::CacheType::BufferCache))) {
498 std::scoped_lock lock{buffer_cache.mutex}; 498 std::scoped_lock lock{buffer_cache.mutex};
499 if (buffer_cache.IsRegionGpuModified(addr, size)) { 499 if (buffer_cache.IsRegionGpuModified(addr, size)) {
@@ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
510 return false; 510 return false;
511} 511}
512 512
513VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { 513VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) {
514 { 514 {
515 std::scoped_lock lock{texture_cache.mutex}; 515 std::scoped_lock lock{texture_cache.mutex};
516 auto area = texture_cache.GetFlushArea(addr, size); 516 auto area = texture_cache.GetFlushArea(addr, size);
@@ -526,14 +526,14 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64
526 } 526 }
527 } 527 }
528 VideoCore::RasterizerDownloadArea new_area{ 528 VideoCore::RasterizerDownloadArea new_area{
529 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), 529 .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE),
530 .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), 530 .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE),
531 .preemtive = true, 531 .preemtive = true,
532 }; 532 };
533 return new_area; 533 return new_area;
534} 534}
535 535
536void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 536void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
537 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 537 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
538 if (addr == 0 || size == 0) { 538 if (addr == 0 || size == 0) {
539 return; 539 return;
@@ -554,7 +554,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
554 } 554 }
555} 555}
556 556
557bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { 557bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) {
558 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 558 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
559 if (addr == 0 || size == 0) { 559 if (addr == 0 || size == 0) {
560 return false; 560 return false;
@@ -576,8 +576,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
576 return false; 576 return false;
577} 577}
578 578
579void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { 579void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) {
580 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 580 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
581
581 if (addr == 0 || size == 0) { 582 if (addr == 0 || size == 0) {
582 return; 583 return;
583 } 584 }
@@ -596,7 +597,7 @@ void RasterizerOpenGL::InvalidateGPUCache() {
596 gpu.InvalidateGPUCache(); 597 gpu.InvalidateGPUCache();
597} 598}
598 599
599void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { 600void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) {
600 { 601 {
601 std::scoped_lock lock{texture_cache.mutex}; 602 std::scoped_lock lock{texture_cache.mutex};
602 texture_cache.UnmapMemory(addr, size); 603 texture_cache.UnmapMemory(addr, size);
@@ -635,7 +636,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) {
635 fence_manager.WaitPendingFences(force); 636 fence_manager.WaitPendingFences(force);
636} 637}
637 638
638void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, 639void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size,
639 VideoCommon::CacheType which) { 640 VideoCommon::CacheType which) {
640 if (Settings::IsGPULevelExtreme()) { 641 if (Settings::IsGPULevelExtreme()) {
641 FlushRegion(addr, size, which); 642 FlushRegion(addr, size, which);
@@ -739,7 +740,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
739} 740}
740 741
741bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, 742bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
742 VAddr framebuffer_addr, u32 pixel_stride) { 743 DAddr framebuffer_addr, u32 pixel_stride) {
743 if (framebuffer_addr == 0) { 744 if (framebuffer_addr == 0) {
744 return false; 745 return false;
745 } 746 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ce3460938..34aa73526 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -14,7 +14,6 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "video_core/control/channel_state_cache.h" 15#include "video_core/control/channel_state_cache.h"
16#include "video_core/engines/maxwell_dma.h" 16#include "video_core/engines/maxwell_dma.h"
17#include "video_core/rasterizer_accelerated.h"
18#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/blit_image.h" 18#include "video_core/renderer_opengl/blit_image.h"
20#include "video_core/renderer_opengl/gl_buffer_cache.h" 19#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -72,13 +71,13 @@ private:
72 TextureCache& texture_cache; 71 TextureCache& texture_cache;
73}; 72};
74 73
75class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, 74class RasterizerOpenGL : public VideoCore::RasterizerInterface,
76 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 75 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
77public: 76public:
78 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 77 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
79 Core::Memory::Memory& cpu_memory_, const Device& device_, 78 Tegra::MaxwellDeviceMemoryManager& device_memory_,
80 ScreenInfo& screen_info_, ProgramManager& program_manager_, 79 const Device& device_, ScreenInfo& screen_info_,
81 StateTracker& state_tracker_); 80 ProgramManager& program_manager_, StateTracker& state_tracker_);
82 ~RasterizerOpenGL() override; 81 ~RasterizerOpenGL() override;
83 82
84 void Draw(bool is_indexed, u32 instance_count) override; 83 void Draw(bool is_indexed, u32 instance_count) override;
@@ -92,17 +91,17 @@ public:
92 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 91 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
93 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 92 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
94 void FlushAll() override; 93 void FlushAll() override;
95 void FlushRegion(VAddr addr, u64 size, 94 void FlushRegion(DAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 95 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
97 bool MustFlushRegion(VAddr addr, u64 size, 96 bool MustFlushRegion(DAddr addr, u64 size,
98 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 97 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
99 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 98 VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override;
100 void InvalidateRegion(VAddr addr, u64 size, 99 void InvalidateRegion(DAddr addr, u64 size,
101 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
102 void OnCacheInvalidation(VAddr addr, u64 size) override; 101 void OnCacheInvalidation(PAddr addr, u64 size) override;
103 bool OnCPUWrite(VAddr addr, u64 size) override; 102 bool OnCPUWrite(PAddr addr, u64 size) override;
104 void InvalidateGPUCache() override; 103 void InvalidateGPUCache() override;
105 void UnmapMemory(VAddr addr, u64 size) override; 104 void UnmapMemory(DAddr addr, u64 size) override;
106 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 105 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
107 void SignalFence(std::function<void()>&& func) override; 106 void SignalFence(std::function<void()>&& func) override;
108 void SyncOperation(std::function<void()>&& func) override; 107 void SyncOperation(std::function<void()>&& func) override;
@@ -110,7 +109,7 @@ public:
110 void SignalReference() override; 109 void SignalReference() override;
111 void ReleaseFences(bool force = true) override; 110 void ReleaseFences(bool force = true) override;
112 void FlushAndInvalidateRegion( 111 void FlushAndInvalidateRegion(
113 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 112 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
114 void WaitForIdle() override; 113 void WaitForIdle() override;
115 void FragmentBarrier() override; 114 void FragmentBarrier() override;
116 void TiledCacheBarrier() override; 115 void TiledCacheBarrier() override;
@@ -123,7 +122,7 @@ public:
123 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 122 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
124 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 123 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
125 std::span<const u8> memory) override; 124 std::span<const u8> memory) override;
126 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 125 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
127 u32 pixel_stride) override; 126 u32 pixel_stride) override;
128 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 127 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
129 const VideoCore::DiskResourceLoadCallback& callback) override; 128 const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -235,6 +234,7 @@ private:
235 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); 234 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
236 235
237 Tegra::GPU& gpu; 236 Tegra::GPU& gpu;
237 Tegra::MaxwellDeviceMemoryManager& device_memory;
238 238
239 const Device& device; 239 const Device& device;
240 ScreenInfo& screen_info; 240 ScreenInfo& screen_info;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 30df41b7d..50462cdde 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
168} 168}
169} // Anonymous namespace 169} // Anonymous namespace
170 170
171ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 171ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
172 const Device& device_, TextureCache& texture_cache_, 172 Core::Frontend::EmuWindow& emu_window_, const Device& device_,
173 BufferCache& buffer_cache_, ProgramManager& program_manager_, 173 TextureCache& texture_cache_, BufferCache& buffer_cache_,
174 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) 174 ProgramManager& program_manager_, StateTracker& state_tracker_,
175 : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, 175 VideoCore::ShaderNotify& shader_notify_)
176 : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_},
176 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, 177 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
177 state_tracker{state_tracker_}, shader_notify{shader_notify_}, 178 state_tracker{state_tracker_}, shader_notify{shader_notify_},
178 use_asynchronous_shaders{device.UseAsynchronousShaders()}, 179 use_asynchronous_shaders{device.UseAsynchronousShaders()},
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6b9732fca..5ac413529 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -17,7 +17,7 @@
17 17
18namespace Tegra { 18namespace Tegra {
19class MemoryManager; 19class MemoryManager;
20} 20} // namespace Tegra
21 21
22namespace OpenGL { 22namespace OpenGL {
23 23
@@ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
28 28
29class ShaderCache : public VideoCommon::ShaderCache { 29class ShaderCache : public VideoCommon::ShaderCache {
30public: 30public:
31 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 31 explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
32 const Device& device_, TextureCache& texture_cache_, 32 Core::Frontend::EmuWindow& emu_window_, const Device& device_,
33 BufferCache& buffer_cache_, ProgramManager& program_manager_, 33 TextureCache& texture_cache_, BufferCache& buffer_cache_,
34 StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); 34 ProgramManager& program_manager_, StateTracker& state_tracker_,
35 VideoCore::ShaderNotify& shader_notify_);
35 ~ShaderCache(); 36 ~ShaderCache();
36 37
37 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 38 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2933718b6..b75376fdb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -15,7 +15,6 @@
15#include "common/telemetry.h" 15#include "common/telemetry.h"
16#include "core/core_timing.h" 16#include "core/core_timing.h"
17#include "core/frontend/emu_window.h" 17#include "core/frontend/emu_window.h"
18#include "core/memory.h"
19#include "core/telemetry_session.h" 18#include "core/telemetry_session.h"
20#include "video_core/host_shaders/ffx_a_h.h" 19#include "video_core/host_shaders/ffx_a_h.h"
21#include "video_core/host_shaders/ffx_fsr1_h.h" 20#include "video_core/host_shaders/ffx_fsr1_h.h"
@@ -144,12 +143,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
144 143
145RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, 144RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
146 Core::Frontend::EmuWindow& emu_window_, 145 Core::Frontend::EmuWindow& emu_window_,
147 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 146 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
148 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 147 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
149 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, 148 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
150 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, 149 emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_},
151 state_tracker{}, program_manager{device}, 150 state_tracker{}, program_manager{device},
152 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { 151 rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager,
152 state_tracker) {
153 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { 153 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
154 glEnable(GL_DEBUG_OUTPUT); 154 glEnable(GL_DEBUG_OUTPUT);
155 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); 155 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
@@ -242,7 +242,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
242 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; 242 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
243 const u64 size_in_bytes{Tegra::Texture::CalculateSize( 243 const u64 size_in_bytes{Tegra::Texture::CalculateSize(
244 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; 244 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
245 const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; 245 const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
246 const std::span<const u8> input_data(host_ptr, size_in_bytes); 246 const std::span<const u8> input_data(host_ptr, size_in_bytes);
247 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, 247 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
248 framebuffer.width, framebuffer.height, 1, block_height_log2, 248 framebuffer.width, framebuffer.height, 1, block_height_log2,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b70607635..18699610a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase {
61public: 61public:
62 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, 62 explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_,
63 Core::Frontend::EmuWindow& emu_window_, 63 Core::Frontend::EmuWindow& emu_window_,
64 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 64 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
65 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 65 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
66 ~RendererOpenGL() override; 66 ~RendererOpenGL() override;
67 67
@@ -101,7 +101,7 @@ private:
101 101
102 Core::TelemetrySession& telemetry_session; 102 Core::TelemetrySession& telemetry_session;
103 Core::Frontend::EmuWindow& emu_window; 103 Core::Frontend::EmuWindow& emu_window;
104 Core::Memory::Memory& cpu_memory; 104 Tegra::MaxwellDeviceMemoryManager& device_memory;
105 Tegra::GPU& gpu; 105 Tegra::GPU& gpu;
106 106
107 Device device; 107 Device device;
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index 71c783709..850c34a3a 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -12,7 +12,6 @@
12#include "shader_recompiler/shader_info.h" 12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_vulkan/vk_texture_cache.h" 13#include "video_core/renderer_vulkan/vk_texture_cache.h"
14#include "video_core/renderer_vulkan/vk_update_descriptor.h" 14#include "video_core/renderer_vulkan/vk_update_descriptor.h"
15#include "video_core/texture_cache/texture_cache.h"
16#include "video_core/texture_cache/types.h" 15#include "video_core/texture_cache/types.h"
17#include "video_core/vulkan_common/vulkan_device.h" 16#include "video_core/vulkan_common/vulkan_device.h"
18 17
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 100b70918..1631276c6 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
82 82
83RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 83RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
84 Core::Frontend::EmuWindow& emu_window, 84 Core::Frontend::EmuWindow& emu_window,
85 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 85 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
86 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try 86 std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
87 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), 87 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
88 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), 88 device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
89 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 89 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
90 Settings::values.renderer_debug.GetValue())), 90 Settings::values.renderer_debug.GetValue())),
91 debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) 91 debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance)
@@ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
97 render_window.GetFramebufferLayout().height), 97 render_window.GetFramebufferLayout().height),
98 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, 98 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
99 surface), 99 surface),
100 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, 100 blit_screen(device_memory, render_window, device, memory_allocator, swapchain,
101 scheduler, screen_info), 101 present_manager, scheduler, screen_info),
102 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 102 rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator,
103 state_tracker, scheduler) { 103 state_tracker, scheduler) {
104 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { 104 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
105 turbo_mode.emplace(instance, dld); 105 turbo_mode.emplace(instance, dld);
@@ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
128 screen_info.width = framebuffer->width; 128 screen_info.width = framebuffer->width;
129 screen_info.height = framebuffer->height; 129 screen_info.height = framebuffer->height;
130 130
131 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 131 const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
132 const bool use_accelerated = 132 const bool use_accelerated =
133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 133 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
134 RenderScreenshot(*framebuffer, use_accelerated); 134 RenderScreenshot(*framebuffer, use_accelerated);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 14e257cf7..11c52287a 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -7,12 +7,12 @@
7#include <string> 7#include <string>
8#include <variant> 8#include <variant>
9 9
10#include "video_core/renderer_vulkan/vk_rasterizer.h"
11
12#include "common/dynamic_library.h" 10#include "common/dynamic_library.h"
11#include "video_core/host1x/gpu_device_memory_manager.h"
13#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
14#include "video_core/renderer_vulkan/vk_blit_screen.h" 13#include "video_core/renderer_vulkan/vk_blit_screen.h"
15#include "video_core/renderer_vulkan/vk_present_manager.h" 14#include "video_core/renderer_vulkan/vk_present_manager.h"
15#include "video_core/renderer_vulkan/vk_rasterizer.h"
16#include "video_core/renderer_vulkan/vk_scheduler.h" 16#include "video_core/renderer_vulkan/vk_scheduler.h"
17#include "video_core/renderer_vulkan/vk_state_tracker.h" 17#include "video_core/renderer_vulkan/vk_state_tracker.h"
18#include "video_core/renderer_vulkan/vk_swapchain.h" 18#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -42,7 +42,7 @@ class RendererVulkan final : public VideoCore::RendererBase {
42public: 42public:
43 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 43 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
44 Core::Frontend::EmuWindow& emu_window, 44 Core::Frontend::EmuWindow& emu_window,
45 Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, 45 Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_,
46 std::unique_ptr<Core::Frontend::GraphicsContext> context_); 46 std::unique_ptr<Core::Frontend::GraphicsContext> context_);
47 ~RendererVulkan() override; 47 ~RendererVulkan() override;
48 48
@@ -62,7 +62,7 @@ private:
62 void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); 62 void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
63 63
64 Core::TelemetrySession& telemetry_session; 64 Core::TelemetrySession& telemetry_session;
65 Core::Memory::Memory& cpu_memory; 65 Tegra::MaxwellDeviceMemoryManager& device_memory;
66 Tegra::GPU& gpu; 66 Tegra::GPU& gpu;
67 67
68 std::shared_ptr<Common::DynamicLibrary> library; 68 std::shared_ptr<Common::DynamicLibrary> library;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 60432f5ad..610f27c84 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -14,8 +14,8 @@
14#include "common/settings.h" 14#include "common/settings.h"
15#include "core/core.h" 15#include "core/core.h"
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/memory.h"
18#include "video_core/gpu.h" 17#include "video_core/gpu.h"
18#include "video_core/host1x/gpu_device_memory_manager.h"
19#include "video_core/host_shaders/fxaa_frag_spv.h" 19#include "video_core/host_shaders/fxaa_frag_spv.h"
20#include "video_core/host_shaders/fxaa_vert_spv.h" 20#include "video_core/host_shaders/fxaa_vert_spv.h"
21#include "video_core/host_shaders/present_bicubic_frag_spv.h" 21#include "video_core/host_shaders/present_bicubic_frag_spv.h"
@@ -121,11 +121,12 @@ struct BlitScreen::BufferData {
121 // Unaligned image data goes here 121 // Unaligned image data goes here
122}; 122};
123 123
124BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, 124BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_,
125 const Device& device_, MemoryAllocator& memory_allocator_, 125 Core::Frontend::EmuWindow& render_window_, const Device& device_,
126 Swapchain& swapchain_, PresentManager& present_manager_, 126 MemoryAllocator& memory_allocator_, Swapchain& swapchain_,
127 Scheduler& scheduler_, const ScreenInfo& screen_info_) 127 PresentManager& present_manager_, Scheduler& scheduler_,
128 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, 128 const ScreenInfo& screen_info_)
129 : device_memory{device_memory_}, render_window{render_window_}, device{device_},
129 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, 130 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
130 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 131 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
131 resource_ticks.resize(image_count); 132 resource_ticks.resize(image_count);
@@ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
219 if (!use_accelerated) { 220 if (!use_accelerated) {
220 const u64 image_offset = GetRawImageOffset(framebuffer); 221 const u64 image_offset = GetRawImageOffset(framebuffer);
221 222
222 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 223 const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
223 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 224 const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr);
224 225
225 // TODO(Rodrigo): Read this from HLE 226 // TODO(Rodrigo): Read this from HLE
226 constexpr u32 block_height_log2 = 4; 227 constexpr u32 block_height_log2 = 4;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 78b32416d..3eff76009 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -6,6 +6,7 @@
6#include <memory> 6#include <memory>
7 7
8#include "core/frontend/framebuffer_layout.h" 8#include "core/frontend/framebuffer_layout.h"
9#include "video_core/host1x/gpu_device_memory_manager.h"
9#include "video_core/vulkan_common/vulkan_memory_allocator.h" 10#include "video_core/vulkan_common/vulkan_memory_allocator.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 11#include "video_core/vulkan_common/vulkan_wrapper.h"
11 12
@@ -13,10 +14,6 @@ namespace Core {
13class System; 14class System;
14} 15}
15 16
16namespace Core::Memory {
17class Memory;
18}
19
20namespace Core::Frontend { 17namespace Core::Frontend {
21class EmuWindow; 18class EmuWindow;
22} 19}
@@ -56,8 +53,9 @@ struct ScreenInfo {
56 53
57class BlitScreen { 54class BlitScreen {
58public: 55public:
59 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, 56 explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory,
60 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, 57 Core::Frontend::EmuWindow& render_window, const Device& device,
58 MemoryAllocator& memory_manager, Swapchain& swapchain,
61 PresentManager& present_manager, Scheduler& scheduler, 59 PresentManager& present_manager, Scheduler& scheduler,
62 const ScreenInfo& screen_info); 60 const ScreenInfo& screen_info);
63 ~BlitScreen(); 61 ~BlitScreen();
@@ -109,7 +107,7 @@ private:
109 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; 107 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
110 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; 108 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
111 109
112 Core::Memory::Memory& cpu_memory; 110 Tegra::MaxwellDeviceMemoryManager& device_memory;
113 Core::Frontend::EmuWindow& render_window; 111 Core::Frontend::EmuWindow& render_window;
114 const Device& device; 112 const Device& device;
115 MemoryAllocator& memory_allocator; 113 MemoryAllocator& memory_allocator;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 3c61799fa..31001d142 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
79} // Anonymous namespace 79} // Anonymous namespace
80 80
81Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) 81Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params)
82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { 82 : VideoCommon::BufferBase(null_params), tracker{4096} {
83 if (runtime.device.HasNullDescriptor()) { 83 if (runtime.device.HasNullDescriptor()) {
84 return; 84 return;
85 } 85 }
@@ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p
88 is_null = true; 88 is_null = true;
89} 89}
90 90
91Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 91Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_)
92 VAddr cpu_addr_, u64 size_bytes_) 92 : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device},
93 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), 93 buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} {
94 device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())},
95 tracker{SizeBytes()} {
96 if (runtime.device.HasDebuggingToolAttached()) { 94 if (runtime.device.HasDebuggingToolAttached()) {
97 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 95 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
98 } 96 }
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index dc300d7cb..e273f4988 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -23,11 +23,10 @@ struct HostVertexBinding;
23 23
24class BufferCacheRuntime; 24class BufferCacheRuntime;
25 25
26class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { 26class Buffer : public VideoCommon::BufferBase {
27public: 27public:
28 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); 28 explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params);
29 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 29 explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_);
30 VAddr cpu_addr_, u64 size_bytes_);
31 30
32 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); 31 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
33 32
@@ -173,7 +172,7 @@ struct BufferCacheParams {
173 using Runtime = Vulkan::BufferCacheRuntime; 172 using Runtime = Vulkan::BufferCacheRuntime;
174 using Buffer = Vulkan::Buffer; 173 using Buffer = Vulkan::Buffer;
175 using Async_Buffer = Vulkan::StagingBufferRef; 174 using Async_Buffer = Vulkan::StagingBufferRef;
176 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; 175 using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>;
177 176
178 static constexpr bool IS_OPENGL = false; 177 static constexpr bool IS_OPENGL = false;
179 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; 178 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index f2fd2670f..ec6b3a4b0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -19,6 +19,7 @@
19#include "video_core/renderer_vulkan/vk_texture_cache.h" 19#include "video_core/renderer_vulkan/vk_texture_cache.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h" 20#include "video_core/renderer_vulkan/vk_update_descriptor.h"
21#include "video_core/shader_notify.h" 21#include "video_core/shader_notify.h"
22#include "video_core/texture_cache/texture_cache.h"
22#include "video_core/vulkan_common/vulkan_device.h" 23#include "video_core/vulkan_common/vulkan_device.h"
23 24
24#if defined(_MSC_VER) && defined(NDEBUG) 25#if defined(_MSC_VER) && defined(NDEBUG)
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index d1841198d..1e1821b10 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -30,7 +30,6 @@
30#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 30#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
31#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 31#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
32#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
33#include "video_core/renderer_vulkan/vk_rasterizer.h"
34#include "video_core/renderer_vulkan/vk_scheduler.h" 33#include "video_core/renderer_vulkan/vk_scheduler.h"
35#include "video_core/renderer_vulkan/vk_shader_util.h" 34#include "video_core/renderer_vulkan/vk_shader_util.h"
36#include "video_core/renderer_vulkan/vk_update_descriptor.h" 35#include "video_core/renderer_vulkan/vk_update_descriptor.h"
@@ -299,12 +298,13 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
299 return std::memcmp(&rhs, this, Size()) == 0; 298 return std::memcmp(&rhs, this, Size()) == 0;
300} 299}
301 300
302PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, 301PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
303 Scheduler& scheduler_, DescriptorPool& descriptor_pool_, 302 const Device& device_, Scheduler& scheduler_,
303 DescriptorPool& descriptor_pool_,
304 GuestDescriptorQueue& guest_descriptor_queue_, 304 GuestDescriptorQueue& guest_descriptor_queue_,
305 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, 305 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
306 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) 306 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
307 : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, 307 : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_},
308 descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, 308 descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_},
309 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, 309 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
310 texture_cache{texture_cache_}, shader_notify{shader_notify_}, 310 texture_cache{texture_cache_}, shader_notify{shader_notify_},
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e323ea0fd..797700128 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -20,6 +20,7 @@
20#include "shader_recompiler/object_pool.h" 20#include "shader_recompiler/object_pool.h"
21#include "shader_recompiler/profile.h" 21#include "shader_recompiler/profile.h"
22#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/host1x/gpu_device_memory_manager.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 24#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/vk_buffer_cache.h" 25#include "video_core/renderer_vulkan/vk_buffer_cache.h"
25#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 26#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
@@ -79,7 +80,6 @@ class ComputePipeline;
79class DescriptorPool; 80class DescriptorPool;
80class Device; 81class Device;
81class PipelineStatistics; 82class PipelineStatistics;
82class RasterizerVulkan;
83class RenderPassCache; 83class RenderPassCache;
84class Scheduler; 84class Scheduler;
85 85
@@ -99,8 +99,8 @@ struct ShaderPools {
99 99
100class PipelineCache : public VideoCommon::ShaderCache { 100class PipelineCache : public VideoCommon::ShaderCache {
101public: 101public:
102 explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, 102 explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device,
103 DescriptorPool& descriptor_pool, 103 Scheduler& scheduler, DescriptorPool& descriptor_pool,
104 GuestDescriptorQueue& guest_descriptor_queue, 104 GuestDescriptorQueue& guest_descriptor_queue,
105 RenderPassCache& render_pass_cache, BufferCache& buffer_cache, 105 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
106 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); 106 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ad4caf688..7cbc9c73c 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -13,9 +13,10 @@
13 13
14#include "common/bit_util.h" 14#include "common/bit_util.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "core/memory.h"
17#include "video_core/engines/draw_manager.h" 16#include "video_core/engines/draw_manager.h"
17#include "video_core/host1x/gpu_device_memory_manager.h"
18#include "video_core/query_cache/query_cache.h" 18#include "video_core/query_cache/query_cache.h"
19#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
20#include "video_core/renderer_vulkan/vk_compute_pass.h" 21#include "video_core/renderer_vulkan/vk_compute_pass.h"
21#include "video_core/renderer_vulkan/vk_query_cache.h" 22#include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -102,7 +103,7 @@ private:
102using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; 103using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>;
103 104
104struct HostSyncValues { 105struct HostSyncValues {
105 VAddr address; 106 DAddr address;
106 size_t size; 107 size_t size;
107 size_t offset; 108 size_t offset;
108 109
@@ -317,7 +318,7 @@ public:
317 pending_sync.clear(); 318 pending_sync.clear();
318 } 319 }
319 320
320 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 321 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
321 [[maybe_unused]] std::optional<u32> subreport) override { 322 [[maybe_unused]] std::optional<u32> subreport) override {
322 PauseCounter(); 323 PauseCounter();
323 auto index = BuildQuery(); 324 auto index = BuildQuery();
@@ -738,7 +739,7 @@ public:
738 pending_sync.clear(); 739 pending_sync.clear();
739 } 740 }
740 741
741 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 742 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
742 std::optional<u32> subreport_) override { 743 std::optional<u32> subreport_) override {
743 auto index = BuildQuery(); 744 auto index = BuildQuery();
744 auto* new_query = GetQuery(index); 745 auto* new_query = GetQuery(index);
@@ -769,9 +770,9 @@ public:
769 return index; 770 return index;
770 } 771 }
771 772
772 std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { 773 std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) {
773 if (last_queries[stream] != 0) { 774 if (last_queries[stream] != 0) {
774 std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); 775 std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
775 return result; 776 return result;
776 } 777 }
777 return std::nullopt; 778 return std::nullopt;
@@ -974,7 +975,7 @@ private:
974 size_t buffers_count{}; 975 size_t buffers_count{};
975 std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; 976 std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
976 std::array<VkDeviceSize, NUM_STREAMS> offsets{}; 977 std::array<VkDeviceSize, NUM_STREAMS> offsets{};
977 std::array<VAddr, NUM_STREAMS> last_queries; 978 std::array<DAddr, NUM_STREAMS> last_queries;
978 std::array<size_t, NUM_STREAMS> last_queries_stride; 979 std::array<size_t, NUM_STREAMS> last_queries_stride;
979 Maxwell3D::Regs::PrimitiveTopology out_topology; 980 Maxwell3D::Regs::PrimitiveTopology out_topology;
980 u64 streams_mask; 981 u64 streams_mask;
@@ -987,7 +988,7 @@ public:
987 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} 988 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
988 989
989 // Parameterized constructor 990 // Parameterized constructor
990 PrimitivesQueryBase(bool has_timestamp, VAddr address) 991 PrimitivesQueryBase(bool has_timestamp, DAddr address)
991 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { 992 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
992 if (has_timestamp) { 993 if (has_timestamp) {
993 flags |= VideoCommon::QueryFlagBits::HasTimestamp; 994 flags |= VideoCommon::QueryFlagBits::HasTimestamp;
@@ -995,7 +996,7 @@ public:
995 } 996 }
996 997
997 u64 stride{}; 998 u64 stride{};
998 VAddr dependant_address{}; 999 DAddr dependant_address{};
999 Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; 1000 Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
1000 size_t dependant_index{}; 1001 size_t dependant_index{};
1001 bool dependant_manage{}; 1002 bool dependant_manage{};
@@ -1005,15 +1006,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive
1005public: 1006public:
1006 explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, 1007 explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
1007 TFBCounterStreamer& tfb_streamer_, 1008 TFBCounterStreamer& tfb_streamer_,
1008 Core::Memory::Memory& cpu_memory_) 1009 Tegra::MaxwellDeviceMemoryManager& device_memory_)
1009 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, 1010 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
1010 tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { 1011 tfb_streamer{tfb_streamer_}, device_memory{device_memory_} {
1011 MakeDependent(&tfb_streamer); 1012 MakeDependent(&tfb_streamer);
1012 } 1013 }
1013 1014
1014 ~PrimitivesSucceededStreamer() = default; 1015 ~PrimitivesSucceededStreamer() = default;
1015 1016
1016 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 1017 size_t WriteCounter(DAddr address, bool has_timestamp, u32 value,
1017 std::optional<u32> subreport_) override { 1018 std::optional<u32> subreport_) override {
1018 auto index = BuildQuery(); 1019 auto index = BuildQuery();
1019 auto* new_query = GetQuery(index); 1020 auto* new_query = GetQuery(index);
@@ -1063,6 +1064,8 @@ public:
1063 } 1064 }
1064 }); 1065 });
1065 } 1066 }
1067 auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address);
1068 ASSERT(ptr != nullptr);
1066 1069
1067 new_query->dependant_manage = must_manage_dependance; 1070 new_query->dependant_manage = must_manage_dependance;
1068 pending_flush_queries.push_back(index); 1071 pending_flush_queries.push_back(index);
@@ -1100,7 +1103,7 @@ public:
1100 num_vertices = dependant_query->value / query->stride; 1103 num_vertices = dependant_query->value / query->stride;
1101 tfb_streamer.Free(query->dependant_index); 1104 tfb_streamer.Free(query->dependant_index);
1102 } else { 1105 } else {
1103 u8* pointer = cpu_memory.GetPointer(query->dependant_address); 1106 u8* pointer = device_memory.GetPointer<u8>(query->dependant_address);
1104 u32 result; 1107 u32 result;
1105 std::memcpy(&result, pointer, sizeof(u32)); 1108 std::memcpy(&result, pointer, sizeof(u32));
1106 num_vertices = static_cast<u64>(result) / query->stride; 1109 num_vertices = static_cast<u64>(result) / query->stride;
@@ -1137,7 +1140,7 @@ public:
1137private: 1140private:
1138 QueryCacheRuntime& runtime; 1141 QueryCacheRuntime& runtime;
1139 TFBCounterStreamer& tfb_streamer; 1142 TFBCounterStreamer& tfb_streamer;
1140 Core::Memory::Memory& cpu_memory; 1143 Tegra::MaxwellDeviceMemoryManager& device_memory;
1141 1144
1142 // syncing queue 1145 // syncing queue
1143 std::vector<size_t> pending_sync; 1146 std::vector<size_t> pending_sync;
@@ -1152,12 +1155,13 @@ private:
1152 1155
1153struct QueryCacheRuntimeImpl { 1156struct QueryCacheRuntimeImpl {
1154 QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, 1157 QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_,
1155 Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, 1158 Tegra::MaxwellDeviceMemoryManager& device_memory_,
1156 const Device& device_, const MemoryAllocator& memory_allocator_, 1159 Vulkan::BufferCache& buffer_cache_, const Device& device_,
1157 Scheduler& scheduler_, StagingBufferPool& staging_pool_, 1160 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
1161 StagingBufferPool& staging_pool_,
1158 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 1162 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
1159 DescriptorPool& descriptor_pool) 1163 DescriptorPool& descriptor_pool)
1160 : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, 1164 : rasterizer{rasterizer_}, device_memory{device_memory_},
1161 buffer_cache{buffer_cache_}, device{device_}, 1165 buffer_cache{buffer_cache_}, device{device_},
1162 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, 1166 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
1163 guest_streamer(0, runtime), 1167 guest_streamer(0, runtime),
@@ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl {
1168 scheduler, memory_allocator, staging_pool), 1172 scheduler, memory_allocator, staging_pool),
1169 primitives_succeeded_streamer( 1173 primitives_succeeded_streamer(
1170 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, 1174 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
1171 cpu_memory_), 1175 device_memory_),
1172 primitives_needed_minus_succeeded_streamer( 1176 primitives_needed_minus_succeeded_streamer(
1173 static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), 1177 static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
1174 hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { 1178 hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} {
@@ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl {
1195 } 1199 }
1196 1200
1197 VideoCore::RasterizerInterface* rasterizer; 1201 VideoCore::RasterizerInterface* rasterizer;
1198 Core::Memory::Memory& cpu_memory; 1202 Tegra::MaxwellDeviceMemoryManager& device_memory;
1199 Vulkan::BufferCache& buffer_cache; 1203 Vulkan::BufferCache& buffer_cache;
1200 1204
1201 const Device& device; 1205 const Device& device;
@@ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl {
1210 PrimitivesSucceededStreamer primitives_succeeded_streamer; 1214 PrimitivesSucceededStreamer primitives_succeeded_streamer;
1211 VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; 1215 VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer;
1212 1216
1213 std::vector<std::pair<VAddr, VAddr>> little_cache; 1217 std::vector<std::pair<DAddr, DAddr>> little_cache;
1214 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; 1218 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
1215 std::vector<size_t> redirect_cache; 1219 std::vector<size_t> redirect_cache;
1216 std::vector<std::vector<VkBufferCopy>> copies_setup; 1220 std::vector<std::vector<VkBufferCopy>> copies_setup;
@@ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl {
1229}; 1233};
1230 1234
1231QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, 1235QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
1232 Core::Memory::Memory& cpu_memory_, 1236 Tegra::MaxwellDeviceMemoryManager& device_memory_,
1233 Vulkan::BufferCache& buffer_cache_, const Device& device_, 1237 Vulkan::BufferCache& buffer_cache_, const Device& device_,
1234 const MemoryAllocator& memory_allocator_, 1238 const MemoryAllocator& memory_allocator_,
1235 Scheduler& scheduler_, StagingBufferPool& staging_pool_, 1239 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
1236 ComputePassDescriptorQueue& compute_pass_descriptor_queue, 1240 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
1237 DescriptorPool& descriptor_pool) { 1241 DescriptorPool& descriptor_pool) {
1238 impl = std::make_unique<QueryCacheRuntimeImpl>( 1242 impl = std::make_unique<QueryCacheRuntimeImpl>(
1239 *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, 1243 *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_,
1240 staging_pool_, compute_pass_descriptor_queue, descriptor_pool); 1244 staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
1241} 1245}
1242 1246
@@ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
1309 ResumeHostConditionalRendering(); 1313 ResumeHostConditionalRendering();
1310} 1314}
1311 1315
1312void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { 1316void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
1313 VkBuffer to_resolve; 1317 VkBuffer to_resolve;
1314 u32 to_resolve_offset; 1318 u32 to_resolve_offset;
1315 { 1319 {
@@ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
1350 return false; 1354 return false;
1351 } 1355 }
1352 1356
1353 const auto check_in_bc = [&](VAddr address) { 1357 const auto check_in_bc = [&](DAddr address) {
1354 return impl->buffer_cache.IsRegionGpuModified(address, 8); 1358 return impl->buffer_cache.IsRegionGpuModified(address, 8);
1355 }; 1359 };
1356 const auto check_value = [&](VAddr address) { 1360 const auto check_value = [&](DAddr address) {
1357 u8* ptr = impl->cpu_memory.GetPointer(address); 1361 u8* ptr = impl->device_memory.GetPointer<u8>(address);
1358 u64 value{}; 1362 u64 value{};
1359 std::memcpy(&value, ptr, sizeof(value)); 1363 std::memcpy(&value, ptr, sizeof(value));
1360 return value == 0; 1364 return value == 0;
@@ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba
1477 for (auto& sync_val : values) { 1481 for (auto& sync_val : values) {
1478 total_size += sync_val.size; 1482 total_size += sync_val.size;
1479 bool found = false; 1483 bool found = false;
1480 VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); 1484 DAddr base = Common::AlignDown(sync_val.address, Core::DEVICE_PAGESIZE);
1481 VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; 1485 DAddr base_end = base + Core::DEVICE_PAGESIZE;
1482 for (size_t i = 0; i < impl->little_cache.size(); i++) { 1486 for (size_t i = 0; i < impl->little_cache.size(); i++) {
1483 const auto set_found = [&] { 1487 const auto set_found = [&] {
1484 impl->redirect_cache.push_back(i); 1488 impl->redirect_cache.push_back(i);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index e9a1ea169..f6151123e 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl;
27class QueryCacheRuntime { 27class QueryCacheRuntime {
28public: 28public:
29 explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, 29 explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
30 Core::Memory::Memory& cpu_memory_, 30 Tegra::MaxwellDeviceMemoryManager& device_memory_,
31 Vulkan::BufferCache& buffer_cache_, const Device& device_, 31 Vulkan::BufferCache& buffer_cache_, const Device& device_,
32 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, 32 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
33 StagingBufferPool& staging_pool_, 33 StagingBufferPool& staging_pool_,
@@ -61,7 +61,7 @@ public:
61 61
62private: 62private:
63 void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); 63 void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal);
64 void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); 64 void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal);
65 friend struct QueryCacheRuntimeImpl; 65 friend struct QueryCacheRuntimeImpl;
66 std::unique_ptr<QueryCacheRuntimeImpl> impl; 66 std::unique_ptr<QueryCacheRuntimeImpl> impl;
67}; 67};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 241fc34be..5bf41b81f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -18,6 +18,7 @@
18#include "video_core/engines/draw_manager.h" 18#include "video_core/engines/draw_manager.h"
19#include "video_core/engines/kepler_compute.h" 19#include "video_core/engines/kepler_compute.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/host1x/gpu_device_memory_manager.h"
21#include "video_core/renderer_vulkan/blit_image.h" 22#include "video_core/renderer_vulkan/blit_image.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/maxwell_to_vk.h" 24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
@@ -163,10 +164,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
163} // Anonymous namespace 164} // Anonymous namespace
164 165
165RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 166RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
166 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, 167 Tegra::MaxwellDeviceMemoryManager& device_memory_,
167 const Device& device_, MemoryAllocator& memory_allocator_, 168 ScreenInfo& screen_info_, const Device& device_,
168 StateTracker& state_tracker_, Scheduler& scheduler_) 169 MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
169 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, 170 Scheduler& scheduler_)
171 : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_},
170 memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, 172 memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
171 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 173 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
172 guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), 174 guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
@@ -174,14 +176,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
174 texture_cache_runtime{ 176 texture_cache_runtime{
175 device, scheduler, memory_allocator, staging_pool, 177 device, scheduler, memory_allocator, staging_pool,
176 blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, 178 blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue},
177 texture_cache(texture_cache_runtime, *this), 179 texture_cache(texture_cache_runtime, device_memory),
178 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 180 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
179 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), 181 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
180 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 182 buffer_cache(device_memory, buffer_cache_runtime),
181 query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, 183 query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler,
182 staging_pool, compute_pass_descriptor_queue, descriptor_pool), 184 staging_pool, compute_pass_descriptor_queue, descriptor_pool),
183 query_cache(gpu, *this, cpu_memory_, query_cache_runtime), 185 query_cache(gpu, *this, device_memory, query_cache_runtime),
184 pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, 186 pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue,
185 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), 187 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
186 accelerate_dma(buffer_cache, texture_cache, scheduler), 188 accelerate_dma(buffer_cache, texture_cache, scheduler),
187 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 189 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
@@ -508,7 +510,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in
508 510
509void RasterizerVulkan::FlushAll() {} 511void RasterizerVulkan::FlushAll() {}
510 512
511void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 513void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
512 if (addr == 0 || size == 0) { 514 if (addr == 0 || size == 0) {
513 return; 515 return;
514 } 516 }
@@ -525,7 +527,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType
525 } 527 }
526} 528}
527 529
528bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 530bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
529 if ((True(which & VideoCommon::CacheType::BufferCache))) { 531 if ((True(which & VideoCommon::CacheType::BufferCache))) {
530 std::scoped_lock lock{buffer_cache.mutex}; 532 std::scoped_lock lock{buffer_cache.mutex};
531 if (buffer_cache.IsRegionGpuModified(addr, size)) { 533 if (buffer_cache.IsRegionGpuModified(addr, size)) {
@@ -542,7 +544,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT
542 return false; 544 return false;
543} 545}
544 546
545VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { 547VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) {
546 { 548 {
547 std::scoped_lock lock{texture_cache.mutex}; 549 std::scoped_lock lock{texture_cache.mutex};
548 auto area = texture_cache.GetFlushArea(addr, size); 550 auto area = texture_cache.GetFlushArea(addr, size);
@@ -551,14 +553,14 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64
551 } 553 }
552 } 554 }
553 VideoCore::RasterizerDownloadArea new_area{ 555 VideoCore::RasterizerDownloadArea new_area{
554 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), 556 .start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE),
555 .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), 557 .end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE),
556 .preemtive = true, 558 .preemtive = true,
557 }; 559 };
558 return new_area; 560 return new_area;
559} 561}
560 562
561void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { 563void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
562 if (addr == 0 || size == 0) { 564 if (addr == 0 || size == 0) {
563 return; 565 return;
564 } 566 }
@@ -578,7 +580,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
578 } 580 }
579} 581}
580 582
581void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { 583void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) {
582 { 584 {
583 std::scoped_lock lock{texture_cache.mutex}; 585 std::scoped_lock lock{texture_cache.mutex};
584 for (const auto& [addr, size] : sequences) { 586 for (const auto& [addr, size] : sequences) {
@@ -599,7 +601,7 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
599 } 601 }
600} 602}
601 603
602bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 604bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) {
603 if (addr == 0 || size == 0) { 605 if (addr == 0 || size == 0) {
604 return false; 606 return false;
605 } 607 }
@@ -620,7 +622,7 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
620 return false; 622 return false;
621} 623}
622 624
623void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { 625void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) {
624 if (addr == 0 || size == 0) { 626 if (addr == 0 || size == 0) {
625 return; 627 return;
626 } 628 }
@@ -640,7 +642,7 @@ void RasterizerVulkan::InvalidateGPUCache() {
640 gpu.InvalidateGPUCache(); 642 gpu.InvalidateGPUCache();
641} 643}
642 644
643void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 645void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) {
644 { 646 {
645 std::scoped_lock lock{texture_cache.mutex}; 647 std::scoped_lock lock{texture_cache.mutex};
646 texture_cache.UnmapMemory(addr, size); 648 texture_cache.UnmapMemory(addr, size);
@@ -679,7 +681,7 @@ void RasterizerVulkan::ReleaseFences(bool force) {
679 fence_manager.WaitPendingFences(force); 681 fence_manager.WaitPendingFences(force);
680} 682}
681 683
682void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, 684void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size,
683 VideoCommon::CacheType which) { 685 VideoCommon::CacheType which) {
684 if (Settings::IsGPULevelExtreme()) { 686 if (Settings::IsGPULevelExtreme()) {
685 FlushRegion(addr, size, which); 687 FlushRegion(addr, size, which);
@@ -782,7 +784,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si
782} 784}
783 785
784bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, 786bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
785 VAddr framebuffer_addr, u32 pixel_stride) { 787 DAddr framebuffer_addr, u32 pixel_stride) {
786 if (!framebuffer_addr) { 788 if (!framebuffer_addr) {
787 return false; 789 return false;
788 } 790 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ad069556c..881ee0993 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -7,14 +7,13 @@
7 7
8#include <boost/container/static_vector.hpp> 8#include <boost/container/static_vector.hpp>
9 9
10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13#include "video_core/control/channel_state_cache.h" 11#include "video_core/control/channel_state_cache.h"
14#include "video_core/engines/maxwell_dma.h" 12#include "video_core/engines/maxwell_dma.h"
15#include "video_core/rasterizer_accelerated.h" 13#include "video_core/host1x/gpu_device_memory_manager.h"
16#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
17#include "video_core/renderer_vulkan/blit_image.h" 15#include "video_core/renderer_vulkan/blit_image.h"
16#include "video_core/renderer_vulkan/vk_buffer_cache.h"
18#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 17#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
19#include "video_core/renderer_vulkan/vk_fence_manager.h" 18#include "video_core/renderer_vulkan/vk_fence_manager.h"
20#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 19#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -34,10 +33,14 @@ namespace Core::Frontend {
34class EmuWindow; 33class EmuWindow;
35} 34}
36 35
37namespace Tegra::Engines { 36namespace Tegra {
37
38namespace Engines {
38class Maxwell3D; 39class Maxwell3D;
39} 40}
40 41
42} // namespace Tegra
43
41namespace Vulkan { 44namespace Vulkan {
42 45
43struct ScreenInfo; 46struct ScreenInfo;
@@ -70,13 +73,14 @@ private:
70 Scheduler& scheduler; 73 Scheduler& scheduler;
71}; 74};
72 75
73class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, 76class RasterizerVulkan final : public VideoCore::RasterizerInterface,
74 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 77 protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
75public: 78public:
76 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 79 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
77 Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, 80 Tegra::MaxwellDeviceMemoryManager& device_memory_,
78 const Device& device_, MemoryAllocator& memory_allocator_, 81 ScreenInfo& screen_info_, const Device& device_,
79 StateTracker& state_tracker_, Scheduler& scheduler_); 82 MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
83 Scheduler& scheduler_);
80 ~RasterizerVulkan() override; 84 ~RasterizerVulkan() override;
81 85
82 void Draw(bool is_indexed, u32 instance_count) override; 86 void Draw(bool is_indexed, u32 instance_count) override;
@@ -90,18 +94,18 @@ public:
90 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 94 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
91 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 95 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
92 void FlushAll() override; 96 void FlushAll() override;
93 void FlushRegion(VAddr addr, u64 size, 97 void FlushRegion(DAddr addr, u64 size,
94 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 98 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
95 bool MustFlushRegion(VAddr addr, u64 size, 99 bool MustFlushRegion(DAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
97 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 101 VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override;
98 void InvalidateRegion(VAddr addr, u64 size, 102 void InvalidateRegion(DAddr addr, u64 size,
99 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 103 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
100 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; 104 void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override;
101 void OnCacheInvalidation(VAddr addr, u64 size) override; 105 void OnCacheInvalidation(DAddr addr, u64 size) override;
102 bool OnCPUWrite(VAddr addr, u64 size) override; 106 bool OnCPUWrite(DAddr addr, u64 size) override;
103 void InvalidateGPUCache() override; 107 void InvalidateGPUCache() override;
104 void UnmapMemory(VAddr addr, u64 size) override; 108 void UnmapMemory(DAddr addr, u64 size) override;
105 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 109 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
106 void SignalFence(std::function<void()>&& func) override; 110 void SignalFence(std::function<void()>&& func) override;
107 void SyncOperation(std::function<void()>&& func) override; 111 void SyncOperation(std::function<void()>&& func) override;
@@ -109,7 +113,7 @@ public:
109 void SignalReference() override; 113 void SignalReference() override;
110 void ReleaseFences(bool force = true) override; 114 void ReleaseFences(bool force = true) override;
111 void FlushAndInvalidateRegion( 115 void FlushAndInvalidateRegion(
112 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 116 DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
113 void WaitForIdle() override; 117 void WaitForIdle() override;
114 void FragmentBarrier() override; 118 void FragmentBarrier() override;
115 void TiledCacheBarrier() override; 119 void TiledCacheBarrier() override;
@@ -122,7 +126,7 @@ public:
122 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 126 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
123 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, 127 void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
124 std::span<const u8> memory) override; 128 std::span<const u8> memory) override;
125 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 129 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr,
126 u32 pixel_stride) override; 130 u32 pixel_stride) override;
127 void LoadDiskResources(u64 title_id, std::stop_token stop_loading, 131 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
128 const VideoCore::DiskResourceLoadCallback& callback) override; 132 const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -176,6 +180,7 @@ private:
176 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); 180 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
177 181
178 Tegra::GPU& gpu; 182 Tegra::GPU& gpu;
183 Tegra::MaxwellDeviceMemoryManager& device_memory;
179 184
180 ScreenInfo& screen_info; 185 ScreenInfo& screen_info;
181 const Device& device; 186 const Device& device;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index e81cd031b..2af32c8f2 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -12,6 +12,7 @@
12#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/host1x/gpu_device_memory_manager.h"
15#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h" 17#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h" 18#include "video_core/shader_environment.h"
@@ -34,7 +35,8 @@ void ShaderCache::SyncGuestHost() {
34 RemovePendingShaders(); 35 RemovePendingShaders();
35} 36}
36 37
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} 38ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_)
39 : device_memory{device_memory_} {}
38 40
39bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { 41bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
40 auto& dirty{maxwell3d->dirty.flags}; 42 auto& dirty{maxwell3d->dirty.flags};
@@ -132,7 +134,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t
132 134
133 storage.push_back(std::move(data)); 135 storage.push_back(std::move(data));
134 136
135 rasterizer.UpdatePagesCachedCount(addr, size, 1); 137 device_memory.UpdatePagesCachedCount(addr, size, 1);
136} 138}
137 139
138void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { 140void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
@@ -209,7 +211,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {
209 211
210 const VAddr addr = entry->addr_start; 212 const VAddr addr = entry->addr_start;
211 const size_t size = entry->addr_end - addr; 213 const size_t size = entry->addr_end - addr;
212 rasterizer.UpdatePagesCachedCount(addr, size, -1); 214 device_memory.UpdatePagesCachedCount(addr, size, -1);
213} 215}
214 216
215void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { 217void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index a76896620..fd9bf2562 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -14,6 +14,7 @@
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/polyfill_ranges.h" 15#include "common/polyfill_ranges.h"
16#include "video_core/control/channel_state_cache.h" 16#include "video_core/control/channel_state_cache.h"
17#include "video_core/host1x/gpu_device_memory_manager.h"
17#include "video_core/rasterizer_interface.h" 18#include "video_core/rasterizer_interface.h"
18#include "video_core/shader_environment.h" 19#include "video_core/shader_environment.h"
19 20
@@ -77,7 +78,7 @@ protected:
77 } 78 }
78 }; 79 };
79 80
80 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); 81 explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory);
81 82
82 /// @brief Update the hashes and information of shader stages 83 /// @brief Update the hashes and information of shader stages
83 /// @param unique_hashes Shader hashes to store into when a stage is enabled 84 /// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -145,7 +146,7 @@ private:
145 /// @brief Create a new shader entry and register it 146 /// @brief Create a new shader entry and register it
146 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); 147 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
147 148
148 VideoCore::RasterizerInterface& rasterizer; 149 Tegra::MaxwellDeviceMemoryManager& device_memory;
149 150
150 mutable std::mutex lookup_mutex; 151 mutable std::mutex lookup_mutex;
151 std::mutex invalidation_mutex; 152 std::mutex invalidation_mutex;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0d5a1709f..7398ed2ec 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -8,10 +8,11 @@
8 8
9#include "common/alignment.h" 9#include "common/alignment.h"
10#include "common/settings.h" 10#include "common/settings.h"
11#include "core/memory.h"
12#include "video_core/control/channel_state.h" 11#include "video_core/control/channel_state.h"
13#include "video_core/dirty_flags.h" 12#include "video_core/dirty_flags.h"
14#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
14#include "video_core/guest_memory.h"
15#include "video_core/host1x/gpu_device_memory_manager.h"
15#include "video_core/texture_cache/image_view_base.h" 16#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/samples_helper.h" 17#include "video_core/texture_cache/samples_helper.h"
17#include "video_core/texture_cache/texture_cache_base.h" 18#include "video_core/texture_cache/texture_cache_base.h"
@@ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType;
27using namespace Common::Literals; 28using namespace Common::Literals;
28 29
29template <class P> 30template <class P>
30TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) 31TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
31 : runtime{runtime_}, rasterizer{rasterizer_} { 32 : runtime{runtime_}, device_memory{device_memory_} {
32 // Configure null sampler 33 // Configure null sampler
33 TSCEntry sampler_descriptor{}; 34 TSCEntry sampler_descriptor{};
34 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); 35 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
49 void(slot_samplers.insert(runtime, sampler_descriptor)); 50 void(slot_samplers.insert(runtime, sampler_descriptor));
50 51
51 if constexpr (HAS_DEVICE_MEMORY_INFO) { 52 if constexpr (HAS_DEVICE_MEMORY_INFO) {
52 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 53 const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
53 const s64 min_spacing_expected = device_memory - 1_GiB; 54 const s64 min_spacing_expected = device_local_memory - 1_GiB;
54 const s64 min_spacing_critical = device_memory - 512_MiB; 55 const s64 min_spacing_critical = device_local_memory - 512_MiB;
55 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 56 const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
56 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 57 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
57 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 58 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
58 expected_memory = static_cast<u64>( 59 expected_memory = static_cast<u64>(
59 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), 60 std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
60 DEFAULT_EXPECTED_MEMORY)); 61 DEFAULT_EXPECTED_MEMORY));
61 critical_memory = static_cast<u64>( 62 critical_memory = static_cast<u64>(
62 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), 63 std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
63 DEFAULT_CRITICAL_MEMORY)); 64 DEFAULT_CRITICAL_MEMORY));
64 minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); 65 minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2);
65 } else { 66 } else {
66 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; 67 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
67 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; 68 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
@@ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
513} 514}
514 515
515template <class P> 516template <class P>
516void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { 517void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) {
517 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { 518 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
518 if (True(image.flags & ImageFlagBits::CpuModified)) { 519 if (True(image.flags & ImageFlagBits::CpuModified)) {
519 return; 520 return;
@@ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
526} 527}
527 528
528template <class P> 529template <class P>
529void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 530void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
530 boost::container::small_vector<ImageId, 16> images; 531 boost::container::small_vector<ImageId, 16> images;
531 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { 532 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
532 if (!image.IsSafeDownload()) { 533 if (!image.IsSafeDownload()) {
@@ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
553} 554}
554 555
555template <class P> 556template <class P>
556std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, 557std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr,
557 u64 size) { 558 u64 size) {
558 std::optional<VideoCore::RasterizerDownloadArea> area{}; 559 std::optional<VideoCore::RasterizerDownloadArea> area{};
559 ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { 560 ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) {
@@ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
579} 580}
580 581
581template <class P> 582template <class P>
582void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { 583void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) {
583 boost::container::small_vector<ImageId, 16> deleted_images; 584 boost::container::small_vector<ImageId, 16> deleted_images;
584 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); 585 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
585 for (const ImageId id : deleted_images) { 586 for (const ImageId id : deleted_images) {
@@ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
713 714
714template <class P> 715template <class P>
715typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( 716typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(
716 const Tegra::FramebufferConfig& config, VAddr cpu_addr) { 717 const Tegra::FramebufferConfig& config, DAddr cpu_addr) {
717 // TODO: Properly implement this 718 // TODO: Properly implement this
718 const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); 719 const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS);
719 if (it == page_table.end()) { 720 if (it == page_table.end()) {
@@ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep
940} 941}
941 942
942template <class P> 943template <class P>
943bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 944bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
944 bool is_modified = false; 945 bool is_modified = false;
945 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { 946 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
946 if (False(image.flags & ImageFlagBits::GpuModified)) { 947 if (False(image.flags & ImageFlagBits::GpuModified)) {
@@ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
1059 return; 1060 return;
1060 } 1061 }
1061 1062
1062 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( 1063 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1063 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); 1064 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1064 1065
1065 if (True(image.flags & ImageFlagBits::Converted)) { 1066 if (True(image.flags & ImageFlagBits::Converted)) {
@@ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
1124template <class P> 1125template <class P>
1125ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 1126ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1126 RelaxedOptions options) { 1127 RelaxedOptions options) {
1127 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1128 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1128 if (!cpu_addr) { 1129 if (!cpu_addr) {
1129 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 1130 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1130 if (!cpu_addr) { 1131 if (!cpu_addr) {
@@ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
1265 1266
1266 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; 1267 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
1267 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); 1268 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1268 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( 1269 Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1269 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); 1270 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1270 1271
1271 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, 1272 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
@@ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) {
1339template <class P> 1340template <class P>
1340ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 1341ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1341 RelaxedOptions options) { 1342 RelaxedOptions options) {
1342 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1343 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1343 if (!cpu_addr) { 1344 if (!cpu_addr) {
1344 const auto size = CalculateGuestSizeInBytes(info); 1345 const auto size = CalculateGuestSizeInBytes(info);
1345 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); 1346 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
1346 if (!cpu_addr) { 1347 if (!cpu_addr) {
1347 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; 1348 const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1348 virtual_invalid_space += Common::AlignUp(size, 32); 1349 virtual_invalid_space += Common::AlignUp(size, 32);
1349 cpu_addr = std::optional<VAddr>(fake_addr); 1350 cpu_addr = std::optional<DAddr>(fake_addr);
1350 } 1351 }
1351 } 1352 }
1352 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); 1353 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
@@ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1362} 1363}
1363 1364
1364template <class P> 1365template <class P>
1365ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { 1366ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
1366 ImageInfo new_info = info; 1367 ImageInfo new_info = info;
1367 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 1368 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1368 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1369 const bool broken_views = runtime.HasBrokenTextureViewFormats();
@@ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag
1650 1651
1651template <class P> 1652template <class P>
1652ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { 1653ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) {
1653 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1654 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1654 if (!cpu_addr) { 1655 if (!cpu_addr) {
1655 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 1656 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1656 if (!cpu_addr) { 1657 if (!cpu_addr) {
@@ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd
1780 1781
1781template <class P> 1782template <class P>
1782template <typename Func> 1783template <typename Func>
1783void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { 1784void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) {
1784 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1785 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1785 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1786 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1786 boost::container::small_vector<ImageId, 32> images; 1787 boost::container::small_vector<ImageId, 32> images;
@@ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size,
1924template <class P> 1925template <class P>
1925template <typename Func> 1926template <typename Func>
1926void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1927void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1927 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1928 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type;
1928 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; 1929 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1929 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); 1930 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1930 for (const auto& [gpu_addr, size] : segments) { 1931 for (const auto& [gpu_addr, size] : segments) {
1931 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1932 std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1932 ASSERT(cpu_addr); 1933 ASSERT(cpu_addr);
1933 if constexpr (RETURNS_BOOL) { 1934 if constexpr (RETURNS_BOOL) {
1934 if (func(gpu_addr, *cpu_addr, size)) { 1935 if (func(gpu_addr, *cpu_addr, size)) {
@@ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1980 } 1981 }
1981 boost::container::small_vector<ImageViewId, 16> sparse_maps; 1982 boost::container::small_vector<ImageViewId, 16> sparse_maps;
1982 ForEachSparseSegment( 1983 ForEachSparseSegment(
1983 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1984 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) {
1984 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1985 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1985 ForEachCPUPage(cpu_addr, size, 1986 ForEachCPUPage(cpu_addr, size,
1986 [this, map_id](u64 page) { page_table[page].push_back(map_id); }); 1987 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
@@ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
2048 auto& sparse_maps = it->second; 2049 auto& sparse_maps = it->second;
2049 for (auto& map_view_id : sparse_maps) { 2050 for (auto& map_view_id : sparse_maps) {
2050 const auto& map_range = slot_map_views[map_view_id]; 2051 const auto& map_range = slot_map_views[map_view_id];
2051 const VAddr cpu_addr = map_range.cpu_addr; 2052 const DAddr cpu_addr = map_range.cpu_addr;
2052 const std::size_t size = map_range.size; 2053 const std::size_t size = map_range.size;
2053 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { 2054 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
2054 const auto page_it = page_table.find(page); 2055 const auto page_it = page_table.find(page);
@@ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
2080 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 2081 ASSERT(False(image.flags & ImageFlagBits::Tracked));
2081 image.flags |= ImageFlagBits::Tracked; 2082 image.flags |= ImageFlagBits::Tracked;
2082 if (False(image.flags & ImageFlagBits::Sparse)) { 2083 if (False(image.flags & ImageFlagBits::Sparse)) {
2083 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 2084 device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
2084 return; 2085 return;
2085 } 2086 }
2086 if (True(image.flags & ImageFlagBits::Registered)) { 2087 if (True(image.flags & ImageFlagBits::Registered)) {
@@ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
2089 auto& sparse_maps = it->second; 2090 auto& sparse_maps = it->second;
2090 for (auto& map_view_id : sparse_maps) { 2091 for (auto& map_view_id : sparse_maps) {
2091 const auto& map = slot_map_views[map_view_id]; 2092 const auto& map = slot_map_views[map_view_id];
2092 const VAddr cpu_addr = map.cpu_addr; 2093 const DAddr cpu_addr = map.cpu_addr;
2093 const std::size_t size = map.size; 2094 const std::size_t size = map.size;
2094 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 2095 device_memory.UpdatePagesCachedCount(cpu_addr, size, 1);
2095 } 2096 }
2096 return; 2097 return;
2097 } 2098 }
2098 ForEachSparseSegment(image, 2099 ForEachSparseSegment(image,
2099 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 2100 [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) {
2100 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 2101 device_memory.UpdatePagesCachedCount(cpu_addr, size, 1);
2101 }); 2102 });
2102} 2103}
2103 2104
@@ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
2106 ASSERT(True(image.flags & ImageFlagBits::Tracked)); 2107 ASSERT(True(image.flags & ImageFlagBits::Tracked));
2107 image.flags &= ~ImageFlagBits::Tracked; 2108 image.flags &= ~ImageFlagBits::Tracked;
2108 if (False(image.flags & ImageFlagBits::Sparse)) { 2109 if (False(image.flags & ImageFlagBits::Sparse)) {
2109 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); 2110 device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
2110 return; 2111 return;
2111 } 2112 }
2112 ASSERT(True(image.flags & ImageFlagBits::Registered)); 2113 ASSERT(True(image.flags & ImageFlagBits::Registered));
@@ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
2115 auto& sparse_maps = it->second; 2116 auto& sparse_maps = it->second;
2116 for (auto& map_view_id : sparse_maps) { 2117 for (auto& map_view_id : sparse_maps) {
2117 const auto& map = slot_map_views[map_view_id]; 2118 const auto& map = slot_map_views[map_view_id];
2118 const VAddr cpu_addr = map.cpu_addr; 2119 const DAddr cpu_addr = map.cpu_addr;
2119 const std::size_t size = map.size; 2120 const std::size_t size = map.size;
2120 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 2121 device_memory.UpdatePagesCachedCount(cpu_addr, size, -1);
2121 } 2122 }
2122} 2123}
2123 2124
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 6caf75b46..8699d40d4 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -36,9 +36,11 @@
36#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
37#include "video_core/textures/texture.h" 37#include "video_core/textures/texture.h"
38 38
39namespace Tegra::Control { 39namespace Tegra {
40namespace Control {
40struct ChannelState; 41struct ChannelState;
41} 42}
43} // namespace Tegra
42 44
43namespace VideoCommon { 45namespace VideoCommon {
44 46
@@ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
126 }; 128 };
127 129
128public: 130public:
129 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); 131 explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&);
130 132
131 /// Notify the cache that a new frame has been queued 133 /// Notify the cache that a new frame has been queued
132 void TickFrame(); 134 void TickFrame();
@@ -190,15 +192,15 @@ public:
190 Framebuffer* GetFramebuffer(); 192 Framebuffer* GetFramebuffer();
191 193
192 /// Mark images in a range as modified from the CPU 194 /// Mark images in a range as modified from the CPU
193 void WriteMemory(VAddr cpu_addr, size_t size); 195 void WriteMemory(DAddr cpu_addr, size_t size);
194 196
195 /// Download contents of host images to guest memory in a region 197 /// Download contents of host images to guest memory in a region
196 void DownloadMemory(VAddr cpu_addr, size_t size); 198 void DownloadMemory(DAddr cpu_addr, size_t size);
197 199
198 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 200 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size);
199 201
200 /// Remove images in a region 202 /// Remove images in a region
201 void UnmapMemory(VAddr cpu_addr, size_t size); 203 void UnmapMemory(DAddr cpu_addr, size_t size);
202 204
203 /// Remove images in a region 205 /// Remove images in a region
204 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); 206 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
@@ -210,7 +212,7 @@ public:
210 212
211 /// Try to find a cached image view in the given CPU address 213 /// Try to find a cached image view in the given CPU address
212 [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, 214 [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config,
213 VAddr cpu_addr); 215 DAddr cpu_addr);
214 216
215 /// Return true when there are uncommitted images to be downloaded 217 /// Return true when there are uncommitted images to be downloaded
216 [[nodiscard]] bool HasUncommittedFlushes() const noexcept; 218 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
@@ -235,7 +237,7 @@ public:
235 GPUVAddr address = 0, size_t size = 0); 237 GPUVAddr address = 0, size_t size = 0);
236 238
237 /// Return true when a CPU region is modified from the GPU 239 /// Return true when a CPU region is modified from the GPU
238 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 240 [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size);
239 241
240 [[nodiscard]] bool IsRescaling() const noexcept; 242 [[nodiscard]] bool IsRescaling() const noexcept;
241 243
@@ -252,7 +254,7 @@ public:
252private: 254private:
253 /// Iterate over all page indices in a range 255 /// Iterate over all page indices in a range
254 template <typename Func> 256 template <typename Func>
255 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { 257 static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) {
256 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 258 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
257 const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; 259 const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
258 for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { 260 for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
@@ -326,7 +328,7 @@ private:
326 328
327 /// Create a new image and join perfectly matching existing images 329 /// Create a new image and join perfectly matching existing images
328 /// Remove joined images from the cache 330 /// Remove joined images from the cache
329 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); 331 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr);
330 332
331 [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); 333 [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr);
332 334
@@ -349,7 +351,7 @@ private:
349 351
350 /// Iterates over all the images in a region calling func 352 /// Iterates over all the images in a region calling func
351 template <typename Func> 353 template <typename Func>
352 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 354 void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func);
353 355
354 template <typename Func> 356 template <typename Func>
355 void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); 357 void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
@@ -421,7 +423,7 @@ private:
421 423
422 Runtime& runtime; 424 Runtime& runtime;
423 425
424 VideoCore::RasterizerInterface& rasterizer; 426 Tegra::MaxwellDeviceMemoryManager& device_memory;
425 std::deque<TextureCacheGPUMap> gpu_page_table_storage; 427 std::deque<TextureCacheGPUMap> gpu_page_table_storage;
426 428
427 RenderTargets render_targets; 429 RenderTargets render_targets;
@@ -432,7 +434,7 @@ private:
432 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; 434 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
433 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; 435 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
434 436
435 VAddr virtual_invalid_space{}; 437 DAddr virtual_invalid_space{};
436 438
437 bool has_deleted_images = false; 439 bool has_deleted_images = false;
438 bool is_rescaling = false; 440 bool is_rescaling = false;
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index fcf70068e..1a6f0d1ad 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -20,9 +20,9 @@
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h" 21#include "common/scratch_buffer.h"
22#include "common/settings.h" 22#include "common/settings.h"
23#include "core/memory.h"
24#include "video_core/compatible_formats.h" 23#include "video_core/compatible_formats.h"
25#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/guest_memory.h"
26#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
27#include "video_core/surface.h" 27#include "video_core/surface.h"
28#include "video_core/texture_cache/decode_bc.h" 28#include "video_core/texture_cache/decode_bc.h"
@@ -552,7 +552,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
552 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 552 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
553 const std::span<const u8> src = input.subspan(host_offset); 553 const std::span<const u8> src = input.subspan(host_offset);
554 { 554 {
555 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> 555 Tegra::Memory::GpuGuestMemoryScoped<u8,
556 Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite>
556 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); 557 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
557 558
558 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 559 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index b42d48416..0efb7b49d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -6,6 +6,8 @@
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/settings.h" 7#include "common/settings.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "video_core/host1x/gpu_device_memory_manager.h"
10#include "video_core/host1x/host1x.h"
9#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
10#include "video_core/renderer_null/renderer_null.h" 12#include "video_core/renderer_null/renderer_null.h"
11#include "video_core/renderer_opengl/renderer_opengl.h" 13#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
18 Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, 20 Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
19 std::unique_ptr<Core::Frontend::GraphicsContext> context) { 21 std::unique_ptr<Core::Frontend::GraphicsContext> context) {
20 auto& telemetry_session = system.TelemetrySession(); 22 auto& telemetry_session = system.TelemetrySession();
21 auto& cpu_memory = system.ApplicationMemory(); 23 auto& device_memory = system.Host1x().MemoryManager();
22 24
23 switch (Settings::values.renderer_backend.GetValue()) { 25 switch (Settings::values.renderer_backend.GetValue()) {
24 case Settings::RendererBackend::OpenGL: 26 case Settings::RendererBackend::OpenGL:
25 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, 27 return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window,
26 gpu, std::move(context)); 28 device_memory, gpu, std::move(context));
27 case Settings::RendererBackend::Vulkan: 29 case Settings::RendererBackend::Vulkan:
28 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, 30 return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window,
29 gpu, std::move(context)); 31 device_memory, gpu, std::move(context));
30 case Settings::RendererBackend::Null: 32 case Settings::RendererBackend::Null:
31 return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, 33 return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context));
32 std::move(context));
33 default: 34 default:
34 return nullptr; 35 return nullptr;
35 } 36 }