summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-12-25 07:32:16 +0100
committerGravatar Liam2024-01-18 21:12:30 -0500
commit0a2536a0df1f4aea406f2132d3edda0430acc9d1 (patch)
treec0ad53890581c9c7e180c5ccb3b66e3c63e3ba64 /src/video_core/buffer_cache
parentSMMU: Implement backing CPU page protect/unprotect (diff)
downloadyuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.gz
yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.xz
yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.zip
SMMU: Initial adaptation to video_core.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h450
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h98
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h18
-rw-r--r--src/video_core/buffer_cache/word_manager.h24
5 files changed, 304 insertions, 289 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 0bb3bf8ae..40e98e395 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -33,13 +33,12 @@ struct NullBufferParams {};
33 * 33 *
34 * The buffer size and address is forcefully aligned to CPU page boundaries. 34 * The buffer size and address is forcefully aligned to CPU page boundaries.
35 */ 35 */
36template <class RasterizerInterface>
37class BufferBase { 36class BufferBase {
38public: 37public:
39 static constexpr u64 BASE_PAGE_BITS = 16; 38 static constexpr u64 BASE_PAGE_BITS = 16;
40 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
41 40
42 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) 41 explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_)
43 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} 42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
44 43
45 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 6d1fc3887..6fe2e8b93 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -8,16 +8,16 @@
8#include <numeric> 8#include <numeric>
9 9
10#include "video_core/buffer_cache/buffer_cache_base.h" 10#include "video_core/buffer_cache/buffer_cache_base.h"
11#include "video_core/guest_memory.h"
12#include "video_core/host1x/gpu_device_memory_manager.h"
11 13
12namespace VideoCommon { 14namespace VideoCommon {
13 15
14using Core::Memory::YUZU_PAGESIZE; 16using Core::Memory::YUZU_PAGESIZE;
15 17
16template <class P> 18template <class P>
17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 19BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 20 : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
21 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
22 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
23 common_ranges.clear(); 23 common_ranges.clear();
@@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
29 return; 29 return;
30 } 30 }
31 31
32 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 32 const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
33 const s64 min_spacing_expected = device_memory - 1_GiB; 33 const s64 min_spacing_expected = device_local_memory - 1_GiB;
34 const s64 min_spacing_critical = device_memory - 512_MiB; 34 const s64 min_spacing_critical = device_local_memory - 512_MiB;
35 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 35 const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD);
36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
38 minimum_memory = static_cast<u64>( 38 minimum_memory = static_cast<u64>(
39 std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), 39 std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected),
40 DEFAULT_EXPECTED_MEMORY)); 40 DEFAULT_EXPECTED_MEMORY));
41 critical_memory = static_cast<u64>( 41 critical_memory = static_cast<u64>(
42 std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), 42 std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical),
43 DEFAULT_CRITICAL_MEMORY)); 43 DEFAULT_CRITICAL_MEMORY));
44} 44}
45 45
@@ -105,71 +105,72 @@ void BufferCache<P>::TickFrame() {
105} 105}
106 106
107template <class P> 107template <class P>
108void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 108void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
109 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 109 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
110 const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; 110 const IntervalType subtract_interval{device_addr, device_addr + size};
111 ClearDownload(subtract_interval); 111 ClearDownload(subtract_interval);
112 common_ranges.subtract(subtract_interval); 112 common_ranges.subtract(subtract_interval);
113 } 113 }
114 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); 114 memory_tracker.MarkRegionAsCpuModified(device_addr, size);
115} 115}
116 116
117template <class P> 117template <class P>
118void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 118void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) {
119 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 119 const bool is_dirty = IsRegionRegistered(device_addr, size);
120 if (!is_dirty) { 120 if (!is_dirty) {
121 return; 121 return;
122 } 122 }
123 VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); 123 DAddr aligned_start = Common::AlignDown(device_addr, YUZU_PAGESIZE);
124 VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); 124 DAddr aligned_end = Common::AlignUp(device_addr + size, YUZU_PAGESIZE);
125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 125 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
126 WriteMemory(cpu_addr, size); 126 WriteMemory(device_addr, size);
127 return; 127 return;
128 } 128 }
129 129
130 tmp_buffer.resize_destructive(size); 130 tmp_buffer.resize_destructive(size);
131 cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); 131 device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size);
132 132
133 InlineMemoryImplementation(cpu_addr, size, tmp_buffer); 133 InlineMemoryImplementation(device_addr, size, tmp_buffer);
134} 134}
135 135
136template <class P> 136template <class P>
137bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { 137bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) {
138 const bool is_dirty = IsRegionRegistered(cpu_addr, size); 138 const bool is_dirty = IsRegionRegistered(device_addr, size);
139 if (!is_dirty) { 139 if (!is_dirty) {
140 return false; 140 return false;
141 } 141 }
142 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { 142 if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
143 return true; 143 return true;
144 } 144 }
145 WriteMemory(cpu_addr, size); 145 WriteMemory(device_addr, size);
146 return false; 146 return false;
147} 147}
148 148
149template <class P> 149template <class P>
150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, 150std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr,
151 u64 size) { 151 u64 size) {
152 std::optional<VideoCore::RasterizerDownloadArea> area{}; 152 std::optional<VideoCore::RasterizerDownloadArea> area{};
153 area.emplace(); 153 area.emplace();
154 VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); 154 DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::Memory::YUZU_PAGESIZE);
155 VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 155 DAddr device_addr_end_aligned =
156 area->start_address = cpu_addr_start_aligned; 156 Common::AlignUp(device_addr + size, Core::Memory::YUZU_PAGESIZE);
157 area->end_address = cpu_addr_end_aligned; 157 area->start_address = device_addr_start_aligned;
158 if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { 158 area->end_address = device_addr_end_aligned;
159 if (memory_tracker.IsRegionPreflushable(device_addr, size)) {
159 area->preemtive = true; 160 area->preemtive = true;
160 return area; 161 return area;
161 }; 162 };
162 area->preemtive = 163 area->preemtive = !IsRegionGpuModified(device_addr_start_aligned,
163 !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); 164 device_addr_end_aligned - device_addr_start_aligned);
164 memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, 165 memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned,
165 cpu_addr_end_aligned - cpu_addr_start_aligned); 166 device_addr_end_aligned - device_addr_start_aligned);
166 return area; 167 return area;
167} 168}
168 169
169template <class P> 170template <class P>
170void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 171void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
171 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 172 ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) {
172 DownloadBufferMemory(buffer, cpu_addr, size); 173 DownloadBufferMemory(buffer, device_addr, size);
173 }); 174 });
174} 175}
175 176
@@ -184,8 +185,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
184 185
185template <class P> 186template <class P>
186bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 187bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
187 const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); 188 const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
188 const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); 189 const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
189 if (!cpu_src_address || !cpu_dest_address) { 190 if (!cpu_src_address || !cpu_dest_address) {
190 return false; 191 return false;
191 } 192 }
@@ -216,10 +217,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
216 }}; 217 }};
217 218
218 boost::container::small_vector<IntervalType, 4> tmp_intervals; 219 boost::container::small_vector<IntervalType, 4> tmp_intervals;
219 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 220 auto mirror = [&](DAddr base_address, DAddr base_address_end) {
220 const u64 size = base_address_end - base_address; 221 const u64 size = base_address_end - base_address;
221 const VAddr diff = base_address - *cpu_src_address; 222 const DAddr diff = base_address - *cpu_src_address;
222 const VAddr new_base_address = *cpu_dest_address + diff; 223 const DAddr new_base_address = *cpu_dest_address + diff;
223 const IntervalType add_interval{new_base_address, new_base_address + size}; 224 const IntervalType add_interval{new_base_address, new_base_address + size};
224 tmp_intervals.push_back(add_interval); 225 tmp_intervals.push_back(add_interval);
225 uncommitted_ranges.add(add_interval); 226 uncommitted_ranges.add(add_interval);
@@ -239,15 +240,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
239 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 240 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
240 } 241 }
241 242
242 Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( 243 Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp(
243 cpu_memory, *cpu_src_address, amount, &tmp_buffer); 244 device_memory, *cpu_src_address, amount, &tmp_buffer);
244 tmp.SetAddressAndSize(*cpu_dest_address, amount); 245 tmp.SetAddressAndSize(*cpu_dest_address, amount);
245 return true; 246 return true;
246} 247}
247 248
248template <class P> 249template <class P>
249bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { 250bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
250 const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); 251 const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
251 if (!cpu_dst_address) { 252 if (!cpu_dst_address) {
252 return false; 253 return false;
253 } 254 }
@@ -273,23 +274,23 @@ template <class P>
273std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, 274std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
274 ObtainBufferSynchronize sync_info, 275 ObtainBufferSynchronize sync_info,
275 ObtainBufferOperation post_op) { 276 ObtainBufferOperation post_op) {
276 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 277 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
277 if (!cpu_addr) { 278 if (!device_addr) {
278 return {&slot_buffers[NULL_BUFFER_ID], 0}; 279 return {&slot_buffers[NULL_BUFFER_ID], 0};
279 } 280 }
280 return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); 281 return ObtainCPUBuffer(*device_addr, size, sync_info, post_op);
281} 282}
282 283
283template <class P> 284template <class P>
284std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( 285std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
285 VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { 286 DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
286 const BufferId buffer_id = FindBuffer(cpu_addr, size); 287 const BufferId buffer_id = FindBuffer(device_addr, size);
287 Buffer& buffer = slot_buffers[buffer_id]; 288 Buffer& buffer = slot_buffers[buffer_id];
288 289
289 // synchronize op 290 // synchronize op
290 switch (sync_info) { 291 switch (sync_info) {
291 case ObtainBufferSynchronize::FullSynchronize: 292 case ObtainBufferSynchronize::FullSynchronize:
292 SynchronizeBuffer(buffer, cpu_addr, size); 293 SynchronizeBuffer(buffer, device_addr, size);
293 break; 294 break;
294 default: 295 default:
295 break; 296 break;
@@ -297,12 +298,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
297 298
298 switch (post_op) { 299 switch (post_op) {
299 case ObtainBufferOperation::MarkAsWritten: 300 case ObtainBufferOperation::MarkAsWritten:
300 MarkWrittenBuffer(buffer_id, cpu_addr, size); 301 MarkWrittenBuffer(buffer_id, device_addr, size);
301 break; 302 break;
302 case ObtainBufferOperation::DiscardWrite: { 303 case ObtainBufferOperation::DiscardWrite: {
303 VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); 304 DAddr device_addr_start = Common::AlignDown(device_addr, 64);
304 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); 305 DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
305 IntervalType interval{cpu_addr_start, cpu_addr_end}; 306 IntervalType interval{device_addr_start, device_addr_end};
306 ClearDownload(interval); 307 ClearDownload(interval);
307 common_ranges.subtract(interval); 308 common_ranges.subtract(interval);
308 break; 309 break;
@@ -311,15 +312,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
311 break; 312 break;
312 } 313 }
313 314
314 return {&buffer, buffer.Offset(cpu_addr)}; 315 return {&buffer, buffer.Offset(device_addr)};
315} 316}
316 317
317template <class P> 318template <class P>
318void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 319void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
319 u32 size) { 320 u32 size) {
320 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 321 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
321 const Binding binding{ 322 const Binding binding{
322 .cpu_addr = *cpu_addr, 323 .device_addr = *device_addr,
323 .size = size, 324 .size = size,
324 .buffer_id = BufferId{}, 325 .buffer_id = BufferId{},
325 }; 326 };
@@ -555,16 +556,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
555 for (const IntervalSet& intervals : committed_ranges) { 556 for (const IntervalSet& intervals : committed_ranges) {
556 for (auto& interval : intervals) { 557 for (auto& interval : intervals) {
557 const std::size_t size = interval.upper() - interval.lower(); 558 const std::size_t size = interval.upper() - interval.lower();
558 const VAddr cpu_addr = interval.lower(); 559 const DAddr device_addr = interval.lower();
559 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 560 ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
560 const VAddr buffer_start = buffer.CpuAddr(); 561 const DAddr buffer_start = buffer.CpuAddr();
561 const VAddr buffer_end = buffer_start + buffer.SizeBytes(); 562 const DAddr buffer_end = buffer_start + buffer.SizeBytes();
562 const VAddr new_start = std::max(buffer_start, cpu_addr); 563 const DAddr new_start = std::max(buffer_start, device_addr);
563 const VAddr new_end = std::min(buffer_end, cpu_addr + size); 564 const DAddr new_end = std::min(buffer_end, device_addr + size);
564 memory_tracker.ForEachDownloadRange( 565 memory_tracker.ForEachDownloadRange(
565 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { 566 new_start, new_end - new_start, false,
566 const VAddr buffer_addr = buffer.CpuAddr(); 567 [&](u64 device_addr_out, u64 range_size) {
567 const auto add_download = [&](VAddr start, VAddr end) { 568 const DAddr buffer_addr = buffer.CpuAddr();
569 const auto add_download = [&](DAddr start, DAddr end) {
568 const u64 new_offset = start - buffer_addr; 570 const u64 new_offset = start - buffer_addr;
569 const u64 new_size = end - start; 571 const u64 new_size = end - start;
570 downloads.push_back({ 572 downloads.push_back({
@@ -582,7 +584,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
582 largest_copy = std::max(largest_copy, new_size); 584 largest_copy = std::max(largest_copy, new_size);
583 }; 585 };
584 586
585 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); 587 ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download);
586 }); 588 });
587 }); 589 });
588 } 590 }
@@ -605,8 +607,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
605 BufferCopy second_copy{copy}; 607 BufferCopy second_copy{copy};
606 Buffer& buffer = slot_buffers[buffer_id]; 608 Buffer& buffer = slot_buffers[buffer_id];
607 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; 609 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
608 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); 610 DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
609 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; 611 const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size};
610 async_downloads += std::make_pair(base_interval, 1); 612 async_downloads += std::make_pair(base_interval, 1);
611 buffer.MarkUsage(copy.src_offset, copy.size); 613 buffer.MarkUsage(copy.src_offset, copy.size);
612 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); 614 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
@@ -635,11 +637,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
635 runtime.Finish(); 637 runtime.Finish();
636 for (const auto& [copy, buffer_id] : downloads) { 638 for (const auto& [copy, buffer_id] : downloads) {
637 const Buffer& buffer = slot_buffers[buffer_id]; 639 const Buffer& buffer = slot_buffers[buffer_id];
638 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 640 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
639 // Undo the modified offset 641 // Undo the modified offset
640 const u64 dst_offset = copy.dst_offset - download_staging.offset; 642 const u64 dst_offset = copy.dst_offset - download_staging.offset;
641 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 643 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
642 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 644 device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
643 } 645 }
644 } else { 646 } else {
645 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 647 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
@@ -647,8 +649,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
647 Buffer& buffer = slot_buffers[buffer_id]; 649 Buffer& buffer = slot_buffers[buffer_id];
648 buffer.ImmediateDownload(copy.src_offset, 650 buffer.ImmediateDownload(copy.src_offset,
649 immediate_buffer.subspan(0, copy.size)); 651 immediate_buffer.subspan(0, copy.size));
650 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 652 const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
651 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 653 device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
652 } 654 }
653 } 655 }
654 } 656 }
@@ -681,19 +683,19 @@ void BufferCache<P>::PopAsyncBuffers() {
681 u8* base = async_buffer->mapped_span.data(); 683 u8* base = async_buffer->mapped_span.data();
682 const size_t base_offset = async_buffer->offset; 684 const size_t base_offset = async_buffer->offset;
683 for (const auto& copy : downloads) { 685 for (const auto& copy : downloads) {
684 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); 686 const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
685 const u64 dst_offset = copy.dst_offset - base_offset; 687 const u64 dst_offset = copy.dst_offset - base_offset;
686 const u8* read_mapped_memory = base + dst_offset; 688 const u8* read_mapped_memory = base + dst_offset;
687 ForEachInOverlapCounter( 689 ForEachInOverlapCounter(
688 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { 690 async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) {
689 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], 691 device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
690 end - start); 692 end - start);
691 if (count == 1) { 693 if (count == 1) {
692 const IntervalType base_interval{start, end}; 694 const IntervalType base_interval{start, end};
693 common_ranges.subtract(base_interval); 695 common_ranges.subtract(base_interval);
694 } 696 }
695 }); 697 });
696 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; 698 const IntervalType subtract_interval{device_addr, device_addr + copy.size};
697 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); 699 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
698 } 700 }
699 async_buffers_death_ring.emplace_back(*async_buffer); 701 async_buffers_death_ring.emplace_back(*async_buffer);
@@ -703,15 +705,15 @@ void BufferCache<P>::PopAsyncBuffers() {
703} 705}
704 706
705template <class P> 707template <class P>
706bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 708bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
707 bool is_dirty = false; 709 bool is_dirty = false;
708 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); 710 ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; });
709 return is_dirty; 711 return is_dirty;
710} 712}
711 713
712template <class P> 714template <class P>
713bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 715bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) {
714 const VAddr end_addr = addr + size; 716 const DAddr end_addr = addr + size;
715 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); 717 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
716 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { 718 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
717 const BufferId buffer_id = page_table[page]; 719 const BufferId buffer_id = page_table[page];
@@ -720,8 +722,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
720 continue; 722 continue;
721 } 723 }
722 Buffer& buffer = slot_buffers[buffer_id]; 724 Buffer& buffer = slot_buffers[buffer_id];
723 const VAddr buf_start_addr = buffer.CpuAddr(); 725 const DAddr buf_start_addr = buffer.CpuAddr();
724 const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); 726 const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
725 if (buf_start_addr < end_addr && addr < buf_end_addr) { 727 if (buf_start_addr < end_addr && addr < buf_end_addr) {
726 return true; 728 return true;
727 } 729 }
@@ -731,7 +733,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
731} 733}
732 734
733template <class P> 735template <class P>
734bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 736bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) {
735 return memory_tracker.IsRegionCpuModified(addr, size); 737 return memory_tracker.IsRegionCpuModified(addr, size);
736} 738}
737 739
@@ -739,7 +741,7 @@ template <class P>
739void BufferCache<P>::BindHostIndexBuffer() { 741void BufferCache<P>::BindHostIndexBuffer() {
740 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; 742 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
741 TouchBuffer(buffer, channel_state->index_buffer.buffer_id); 743 TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
742 const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); 744 const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr);
743 const u32 size = channel_state->index_buffer.size; 745 const u32 size = channel_state->index_buffer.size;
744 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 746 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
745 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { 747 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
@@ -754,7 +756,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
754 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 756 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
755 } 757 }
756 } else { 758 } else {
757 SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); 759 SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size);
758 } 760 }
759 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 761 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
760 const u32 new_offset = 762 const u32 new_offset =
@@ -777,7 +779,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
777 const Binding& binding = channel_state->vertex_buffers[index]; 779 const Binding& binding = channel_state->vertex_buffers[index];
778 Buffer& buffer = slot_buffers[binding.buffer_id]; 780 Buffer& buffer = slot_buffers[binding.buffer_id];
779 TouchBuffer(buffer, binding.buffer_id); 781 TouchBuffer(buffer, binding.buffer_id);
780 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 782 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
781 if (!flags[Dirty::VertexBuffer0 + index]) { 783 if (!flags[Dirty::VertexBuffer0 + index]) {
782 continue; 784 continue;
783 } 785 }
@@ -797,7 +799,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
797 Buffer& buffer = slot_buffers[binding.buffer_id]; 799 Buffer& buffer = slot_buffers[binding.buffer_id];
798 800
799 const u32 stride = maxwell3d->regs.vertex_streams[index].stride; 801 const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
800 const u32 offset = buffer.Offset(binding.cpu_addr); 802 const u32 offset = buffer.Offset(binding.device_addr);
801 buffer.MarkUsage(offset, binding.size); 803 buffer.MarkUsage(offset, binding.size);
802 804
803 host_bindings.buffers.push_back(&buffer); 805 host_bindings.buffers.push_back(&buffer);
@@ -814,7 +816,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
814 const auto bind_buffer = [this](const Binding& binding) { 816 const auto bind_buffer = [this](const Binding& binding) {
815 Buffer& buffer = slot_buffers[binding.buffer_id]; 817 Buffer& buffer = slot_buffers[binding.buffer_id];
816 TouchBuffer(buffer, binding.buffer_id); 818 TouchBuffer(buffer, binding.buffer_id);
817 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 819 SynchronizeBuffer(buffer, binding.device_addr, binding.size);
818 }; 820 };
819 if (current_draw_indirect->include_count) { 821 if (current_draw_indirect->include_count) {
820 bind_buffer(channel_state->count_buffer_binding); 822 bind_buffer(channel_state->count_buffer_binding);
@@ -842,13 +844,13 @@ template <class P>
842void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, 844void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
843 bool needs_bind) { 845 bool needs_bind) {
844 const Binding& binding = channel_state->uniform_buffers[stage][index]; 846 const Binding& binding = channel_state->uniform_buffers[stage][index];
845 const VAddr cpu_addr = binding.cpu_addr; 847 const DAddr device_addr = binding.device_addr;
846 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); 848 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
847 Buffer& buffer = slot_buffers[binding.buffer_id]; 849 Buffer& buffer = slot_buffers[binding.buffer_id];
848 TouchBuffer(buffer, binding.buffer_id); 850 TouchBuffer(buffer, binding.buffer_id);
849 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 851 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
850 size <= channel_state->uniform_buffer_skip_cache_size && 852 size <= channel_state->uniform_buffer_skip_cache_size &&
851 !memory_tracker.IsRegionGpuModified(cpu_addr, size); 853 !memory_tracker.IsRegionGpuModified(device_addr, size);
852 if (use_fast_buffer) { 854 if (use_fast_buffer) {
853 if constexpr (IS_OPENGL) { 855 if constexpr (IS_OPENGL) {
854 if (runtime.HasFastBufferSubData()) { 856 if (runtime.HasFastBufferSubData()) {
@@ -862,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
862 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; 864 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
863 runtime.BindFastUniformBuffer(stage, binding_index, size); 865 runtime.BindFastUniformBuffer(stage, binding_index, size);
864 } 866 }
865 const auto span = ImmediateBufferWithData(cpu_addr, size); 867 const auto span = ImmediateBufferWithData(device_addr, size);
866 runtime.PushFastUniformBuffer(stage, binding_index, span); 868 runtime.PushFastUniformBuffer(stage, binding_index, span);
867 return; 869 return;
868 } 870 }
@@ -873,11 +875,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
873 } 875 }
874 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 876 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
875 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 877 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
876 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 878 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
877 return; 879 return;
878 } 880 }
879 // Classic cached path 881 // Classic cached path
880 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); 882 const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size);
881 if (sync_cached) { 883 if (sync_cached) {
882 ++channel_state->uniform_cache_hits[0]; 884 ++channel_state->uniform_cache_hits[0];
883 } 885 }
@@ -892,7 +894,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
892 if (!needs_bind) { 894 if (!needs_bind) {
893 return; 895 return;
894 } 896 }
895 const u32 offset = buffer.Offset(cpu_addr); 897 const u32 offset = buffer.Offset(device_addr);
896 if constexpr (IS_OPENGL) { 898 if constexpr (IS_OPENGL) {
897 // Fast buffer will be unbound 899 // Fast buffer will be unbound
898 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); 900 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
@@ -920,14 +922,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
920 Buffer& buffer = slot_buffers[binding.buffer_id]; 922 Buffer& buffer = slot_buffers[binding.buffer_id];
921 TouchBuffer(buffer, binding.buffer_id); 923 TouchBuffer(buffer, binding.buffer_id);
922 const u32 size = binding.size; 924 const u32 size = binding.size;
923 SynchronizeBuffer(buffer, binding.cpu_addr, size); 925 SynchronizeBuffer(buffer, binding.device_addr, size);
924 926
925 const u32 offset = buffer.Offset(binding.cpu_addr); 927 const u32 offset = buffer.Offset(binding.device_addr);
926 buffer.MarkUsage(offset, size); 928 buffer.MarkUsage(offset, size);
927 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; 929 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
928 930
929 if (is_written) { 931 if (is_written) {
930 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 932 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
931 } 933 }
932 934
933 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 935 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -945,14 +947,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
945 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; 947 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
946 Buffer& buffer = slot_buffers[binding.buffer_id]; 948 Buffer& buffer = slot_buffers[binding.buffer_id];
947 const u32 size = binding.size; 949 const u32 size = binding.size;
948 SynchronizeBuffer(buffer, binding.cpu_addr, size); 950 SynchronizeBuffer(buffer, binding.device_addr, size);
949 951
950 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; 952 const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
951 if (is_written) { 953 if (is_written) {
952 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 954 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
953 } 955 }
954 956
955 const u32 offset = buffer.Offset(binding.cpu_addr); 957 const u32 offset = buffer.Offset(binding.device_addr);
956 const PixelFormat format = binding.format; 958 const PixelFormat format = binding.format;
957 buffer.MarkUsage(offset, size); 959 buffer.MarkUsage(offset, size);
958 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 960 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -982,11 +984,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
982 Buffer& buffer = slot_buffers[binding.buffer_id]; 984 Buffer& buffer = slot_buffers[binding.buffer_id];
983 TouchBuffer(buffer, binding.buffer_id); 985 TouchBuffer(buffer, binding.buffer_id);
984 const u32 size = binding.size; 986 const u32 size = binding.size;
985 SynchronizeBuffer(buffer, binding.cpu_addr, size); 987 SynchronizeBuffer(buffer, binding.device_addr, size);
986 988
987 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 989 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
988 990
989 const u32 offset = buffer.Offset(binding.cpu_addr); 991 const u32 offset = buffer.Offset(binding.device_addr);
990 buffer.MarkUsage(offset, size); 992 buffer.MarkUsage(offset, size);
991 host_bindings.buffers.push_back(&buffer); 993 host_bindings.buffers.push_back(&buffer);
992 host_bindings.offsets.push_back(offset); 994 host_bindings.offsets.push_back(offset);
@@ -1011,9 +1013,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1011 TouchBuffer(buffer, binding.buffer_id); 1013 TouchBuffer(buffer, binding.buffer_id);
1012 const u32 size = 1014 const u32 size =
1013 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); 1015 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
1014 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1016 SynchronizeBuffer(buffer, binding.device_addr, size);
1015 1017
1016 const u32 offset = buffer.Offset(binding.cpu_addr); 1018 const u32 offset = buffer.Offset(binding.device_addr);
1017 buffer.MarkUsage(offset, size); 1019 buffer.MarkUsage(offset, size);
1018 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1020 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
1019 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); 1021 runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size);
@@ -1032,15 +1034,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1032 Buffer& buffer = slot_buffers[binding.buffer_id]; 1034 Buffer& buffer = slot_buffers[binding.buffer_id];
1033 TouchBuffer(buffer, binding.buffer_id); 1035 TouchBuffer(buffer, binding.buffer_id);
1034 const u32 size = binding.size; 1036 const u32 size = binding.size;
1035 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1037 SynchronizeBuffer(buffer, binding.device_addr, size);
1036 1038
1037 const u32 offset = buffer.Offset(binding.cpu_addr); 1039 const u32 offset = buffer.Offset(binding.device_addr);
1038 buffer.MarkUsage(offset, size); 1040 buffer.MarkUsage(offset, size);
1039 const bool is_written = 1041 const bool is_written =
1040 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; 1042 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
1041 1043
1042 if (is_written) { 1044 if (is_written) {
1043 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1045 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1044 } 1046 }
1045 1047
1046 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 1048 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
@@ -1058,15 +1060,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1058 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; 1060 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
1059 Buffer& buffer = slot_buffers[binding.buffer_id]; 1061 Buffer& buffer = slot_buffers[binding.buffer_id];
1060 const u32 size = binding.size; 1062 const u32 size = binding.size;
1061 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1063 SynchronizeBuffer(buffer, binding.device_addr, size);
1062 1064
1063 const bool is_written = 1065 const bool is_written =
1064 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; 1066 ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
1065 if (is_written) { 1067 if (is_written) {
1066 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); 1068 MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size);
1067 } 1069 }
1068 1070
1069 const u32 offset = buffer.Offset(binding.cpu_addr); 1071 const u32 offset = buffer.Offset(binding.device_addr);
1070 const PixelFormat format = binding.format; 1072 const PixelFormat format = binding.format;
1071 buffer.MarkUsage(offset, size); 1073 buffer.MarkUsage(offset, size);
1072 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 1074 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1131,7 +1133,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1131 inline_buffer_id = CreateBuffer(0, buffer_size); 1133 inline_buffer_id = CreateBuffer(0, buffer_size);
1132 } 1134 }
1133 channel_state->index_buffer = Binding{ 1135 channel_state->index_buffer = Binding{
1134 .cpu_addr = 0, 1136 .device_addr = 0,
1135 .size = inline_index_size, 1137 .size = inline_index_size,
1136 .buffer_id = inline_buffer_id, 1138 .buffer_id = inline_buffer_id,
1137 }; 1139 };
@@ -1140,19 +1142,19 @@ void BufferCache<P>::UpdateIndexBuffer() {
1140 1142
1141 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); 1143 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1142 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); 1144 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1143 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1145 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1144 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1146 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1145 const u32 draw_size = 1147 const u32 draw_size =
1146 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); 1148 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1147 const u32 size = std::min(address_size, draw_size); 1149 const u32 size = std::min(address_size, draw_size);
1148 if (size == 0 || !cpu_addr) { 1150 if (size == 0 || !device_addr) {
1149 channel_state->index_buffer = NULL_BINDING; 1151 channel_state->index_buffer = NULL_BINDING;
1150 return; 1152 return;
1151 } 1153 }
1152 channel_state->index_buffer = Binding{ 1154 channel_state->index_buffer = Binding{
1153 .cpu_addr = *cpu_addr, 1155 .device_addr = *device_addr,
1154 .size = size, 1156 .size = size,
1155 .buffer_id = FindBuffer(*cpu_addr, size), 1157 .buffer_id = FindBuffer(*device_addr, size),
1156 }; 1158 };
1157} 1159}
1158 1160
@@ -1178,19 +1180,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1178 const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; 1180 const auto& limit = maxwell3d->regs.vertex_stream_limits[index];
1179 const GPUVAddr gpu_addr_begin = array.Address(); 1181 const GPUVAddr gpu_addr_begin = array.Address();
1180 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1182 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1181 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1183 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1182 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1184 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1183 u32 size = address_size; // TODO: Analyze stride and number of vertices 1185 u32 size = address_size; // TODO: Analyze stride and number of vertices
1184 if (array.enable == 0 || size == 0 || !cpu_addr) { 1186 if (array.enable == 0 || size == 0 || !device_addr) {
1185 channel_state->vertex_buffers[index] = NULL_BINDING; 1187 channel_state->vertex_buffers[index] = NULL_BINDING;
1186 return; 1188 return;
1187 } 1189 }
1188 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1190 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1189 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1191 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1190 } 1192 }
1191 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1193 const BufferId buffer_id = FindBuffer(*device_addr, size);
1192 channel_state->vertex_buffers[index] = Binding{ 1194 channel_state->vertex_buffers[index] = Binding{
1193 .cpu_addr = *cpu_addr, 1195 .device_addr = *device_addr,
1194 .size = size, 1196 .size = size,
1195 .buffer_id = buffer_id, 1197 .buffer_id = buffer_id,
1196 }; 1198 };
@@ -1199,15 +1201,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1199template <class P> 1201template <class P>
1200void BufferCache<P>::UpdateDrawIndirect() { 1202void BufferCache<P>::UpdateDrawIndirect() {
1201 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { 1203 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
1202 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1204 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1203 if (!cpu_addr) { 1205 if (!device_addr) {
1204 binding = NULL_BINDING; 1206 binding = NULL_BINDING;
1205 return; 1207 return;
1206 } 1208 }
1207 binding = Binding{ 1209 binding = Binding{
1208 .cpu_addr = *cpu_addr, 1210 .device_addr = *device_addr,
1209 .size = static_cast<u32>(size), 1211 .size = static_cast<u32>(size),
1210 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), 1212 .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)),
1211 }; 1213 };
1212 }; 1214 };
1213 if (current_draw_indirect->include_count) { 1215 if (current_draw_indirect->include_count) {
@@ -1231,7 +1233,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1231 channel_state->dirty_uniform_buffers[stage] |= 1U << index; 1233 channel_state->dirty_uniform_buffers[stage] |= 1U << index;
1232 } 1234 }
1233 // Resolve buffer 1235 // Resolve buffer
1234 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1236 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1235 }); 1237 });
1236} 1238}
1237 1239
@@ -1240,7 +1242,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1240 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { 1242 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
1241 // Resolve buffer 1243 // Resolve buffer
1242 Binding& binding = channel_state->storage_buffers[stage][index]; 1244 Binding& binding = channel_state->storage_buffers[stage][index];
1243 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1245 const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size);
1244 binding.buffer_id = buffer_id; 1246 binding.buffer_id = buffer_id;
1245 }); 1247 });
1246} 1248}
@@ -1249,7 +1251,7 @@ template <class P>
1249void BufferCache<P>::UpdateTextureBuffers(size_t stage) { 1251void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1250 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { 1252 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
1251 Binding& binding = channel_state->texture_buffers[stage][index]; 1253 Binding& binding = channel_state->texture_buffers[stage][index];
1252 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1254 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1253 }); 1255 });
1254} 1256}
1255 1257
@@ -1268,14 +1270,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1268 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; 1270 const auto& binding = maxwell3d->regs.transform_feedback.buffers[index];
1269 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; 1271 const GPUVAddr gpu_addr = binding.Address() + binding.start_offset;
1270 const u32 size = binding.size; 1272 const u32 size = binding.size;
1271 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1273 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1272 if (binding.enable == 0 || size == 0 || !cpu_addr) { 1274 if (binding.enable == 0 || size == 0 || !device_addr) {
1273 channel_state->transform_feedback_buffers[index] = NULL_BINDING; 1275 channel_state->transform_feedback_buffers[index] = NULL_BINDING;
1274 return; 1276 return;
1275 } 1277 }
1276 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1278 const BufferId buffer_id = FindBuffer(*device_addr, size);
1277 channel_state->transform_feedback_buffers[index] = Binding{ 1279 channel_state->transform_feedback_buffers[index] = Binding{
1278 .cpu_addr = *cpu_addr, 1280 .device_addr = *device_addr,
1279 .size = size, 1281 .size = size,
1280 .buffer_id = buffer_id, 1282 .buffer_id = buffer_id,
1281 }; 1283 };
@@ -1289,13 +1291,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1289 const auto& launch_desc = kepler_compute->launch_description; 1291 const auto& launch_desc = kepler_compute->launch_description;
1290 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1292 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
1291 const auto& cbuf = launch_desc.const_buffer_config[index]; 1293 const auto& cbuf = launch_desc.const_buffer_config[index];
1292 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); 1294 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
1293 if (cpu_addr) { 1295 if (device_addr) {
1294 binding.cpu_addr = *cpu_addr; 1296 binding.device_addr = *device_addr;
1295 binding.size = cbuf.size; 1297 binding.size = cbuf.size;
1296 } 1298 }
1297 } 1299 }
1298 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1300 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1299 }); 1301 });
1300} 1302}
1301 1303
@@ -1304,7 +1306,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1304 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { 1306 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
1305 // Resolve buffer 1307 // Resolve buffer
1306 Binding& binding = channel_state->compute_storage_buffers[index]; 1308 Binding& binding = channel_state->compute_storage_buffers[index];
1307 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1309 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1308 }); 1310 });
1309} 1311}
1310 1312
@@ -1312,45 +1314,63 @@ template <class P>
1312void BufferCache<P>::UpdateComputeTextureBuffers() { 1314void BufferCache<P>::UpdateComputeTextureBuffers() {
1313 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { 1315 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
1314 Binding& binding = channel_state->compute_texture_buffers[index]; 1316 Binding& binding = channel_state->compute_texture_buffers[index];
1315 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1317 binding.buffer_id = FindBuffer(binding.device_addr, binding.size);
1316 }); 1318 });
1317} 1319}
1318 1320
1319template <class P> 1321template <class P>
1320void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1322void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
1321 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); 1323 memory_tracker.MarkRegionAsGpuModified(device_addr, size);
1322 1324
1323 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1325 const IntervalType base_interval{device_addr, device_addr + size};
1324 common_ranges.add(base_interval); 1326 common_ranges.add(base_interval);
1325 uncommitted_ranges.add(base_interval); 1327 uncommitted_ranges.add(base_interval);
1326} 1328}
1327 1329
1328template <class P> 1330template <class P>
1329BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { 1331BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) {
1330 if (cpu_addr == 0) { 1332 if (device_addr == 0) {
1331 return NULL_BUFFER_ID; 1333 return NULL_BUFFER_ID;
1332 } 1334 }
1333 const u64 page = cpu_addr >> CACHING_PAGEBITS; 1335 const u64 page = device_addr >> CACHING_PAGEBITS;
1334 const BufferId buffer_id = page_table[page]; 1336 const BufferId buffer_id = page_table[page];
1335 if (!buffer_id) { 1337 if (!buffer_id) {
1336 return CreateBuffer(cpu_addr, size); 1338 return CreateBuffer(device_addr, size);
1337 } 1339 }
1338 const Buffer& buffer = slot_buffers[buffer_id]; 1340 const Buffer& buffer = slot_buffers[buffer_id];
1339 if (buffer.IsInBounds(cpu_addr, size)) { 1341 if (buffer.IsInBounds(device_addr, size)) {
1340 return buffer_id; 1342 return buffer_id;
1341 } 1343 }
1342 return CreateBuffer(cpu_addr, size); 1344 return CreateBuffer(device_addr, size);
1343} 1345}
1344 1346
1345template <class P> 1347template <class P>
1346typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1348typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr,
1347 u32 wanted_size) { 1349 u32 wanted_size) {
1348 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1350 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1349 boost::container::small_vector<BufferId, 16> overlap_ids; 1351 boost::container::small_vector<BufferId, 16> overlap_ids;
1350 VAddr begin = cpu_addr; 1352 DAddr begin = device_addr;
1351 VAddr end = cpu_addr + wanted_size; 1353 DAddr end = device_addr + wanted_size;
1352 int stream_score = 0; 1354 int stream_score = 0;
1353 bool has_stream_leap = false; 1355 bool has_stream_leap = false;
1356 auto expand_begin = [&](DAddr add_value) {
1357 static constexpr DAddr min_page = CACHING_PAGESIZE + Core::Memory::YUZU_PAGESIZE;
1358 if (add_value > begin - min_page ) {
1359 begin = min_page;
1360 device_addr = Core::Memory::YUZU_PAGESIZE;
1361 return;
1362 }
1363 begin -= add_value;
1364 device_addr = begin - CACHING_PAGESIZE;
1365 };
1366 auto expand_end = [&](DAddr add_value) {
1367 static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS;
1368 if (add_value > max_page - end ) {
1369 end = max_page;
1370 return;
1371 }
1372 end += add_value;
1373 };
1354 if (begin == 0) { 1374 if (begin == 0) {
1355 return OverlapResult{ 1375 return OverlapResult{
1356 .ids = std::move(overlap_ids), 1376 .ids = std::move(overlap_ids),
@@ -1359,9 +1379,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1359 .has_stream_leap = has_stream_leap, 1379 .has_stream_leap = has_stream_leap,
1360 }; 1380 };
1361 } 1381 }
1362 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); 1382 for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1363 cpu_addr += CACHING_PAGESIZE) { 1383 device_addr += CACHING_PAGESIZE) {
1364 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; 1384 const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS];
1365 if (!overlap_id) { 1385 if (!overlap_id) {
1366 continue; 1386 continue;
1367 } 1387 }
@@ -1371,12 +1391,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1371 } 1391 }
1372 overlap_ids.push_back(overlap_id); 1392 overlap_ids.push_back(overlap_id);
1373 overlap.Pick(); 1393 overlap.Pick();
1374 const VAddr overlap_cpu_addr = overlap.CpuAddr(); 1394 const DAddr overlap_device_addr = overlap.CpuAddr();
1375 const bool expands_left = overlap_cpu_addr < begin; 1395 const bool expands_left = overlap_device_addr < begin;
1376 if (expands_left) { 1396 if (expands_left) {
1377 begin = overlap_cpu_addr; 1397 begin = overlap_device_addr;
1378 } 1398 }
1379 const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); 1399 const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes();
1380 const bool expands_right = overlap_end > end; 1400 const bool expands_right = overlap_end > end;
1381 if (overlap_end > end) { 1401 if (overlap_end > end) {
1382 end = overlap_end; 1402 end = overlap_end;
@@ -1387,11 +1407,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1387 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1407 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1388 has_stream_leap = true; 1408 has_stream_leap = true;
1389 if (expands_right) { 1409 if (expands_right) {
1390 begin -= CACHING_PAGESIZE * 256; 1410 expand_begin(CACHING_PAGESIZE * 128);
1391 cpu_addr = begin - CACHING_PAGESIZE;
1392 } 1411 }
1393 if (expands_left) { 1412 if (expands_left) {
1394 end += CACHING_PAGESIZE * 256; 1413 expand_end(CACHING_PAGESIZE * 128);
1395 } 1414 }
1396 } 1415 }
1397 } 1416 }
@@ -1424,13 +1443,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1424} 1443}
1425 1444
1426template <class P> 1445template <class P>
1427BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1446BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) {
1428 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); 1447 DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE);
1429 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); 1448 device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE);
1430 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); 1449 wanted_size = static_cast<u32>(device_addr_end - device_addr);
1431 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1450 const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
1432 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1451 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1433 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1452 const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size);
1434 auto& new_buffer = slot_buffers[new_buffer_id]; 1453 auto& new_buffer = slot_buffers[new_buffer_id];
1435 const size_t size_bytes = new_buffer.SizeBytes(); 1454 const size_t size_bytes = new_buffer.SizeBytes();
1436 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); 1455 runtime.ClearBuffer(new_buffer, 0, size_bytes, 0);
@@ -1465,10 +1484,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1465 total_used_memory -= Common::AlignUp(size, 1024); 1484 total_used_memory -= Common::AlignUp(size, 1024);
1466 lru_cache.Free(buffer.getLRUID()); 1485 lru_cache.Free(buffer.getLRUID());
1467 } 1486 }
1468 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1487 const DAddr device_addr_begin = buffer.CpuAddr();
1469 const VAddr cpu_addr_end = cpu_addr_begin + size; 1488 const DAddr device_addr_end = device_addr_begin + size;
1470 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; 1489 const u64 page_begin = device_addr_begin / CACHING_PAGESIZE;
1471 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); 1490 const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE);
1472 for (u64 page = page_begin; page != page_end; ++page) { 1491 for (u64 page = page_begin; page != page_end; ++page) {
1473 if constexpr (insert) { 1492 if constexpr (insert) {
1474 page_table[page] = buffer_id; 1493 page_table[page] = buffer_id;
@@ -1486,15 +1505,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1486} 1505}
1487 1506
1488template <class P> 1507template <class P>
1489bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1508bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) {
1490 boost::container::small_vector<BufferCopy, 4> copies; 1509 boost::container::small_vector<BufferCopy, 4> copies;
1491 u64 total_size_bytes = 0; 1510 u64 total_size_bytes = 0;
1492 u64 largest_copy = 0; 1511 u64 largest_copy = 0;
1493 VAddr buffer_start = buffer.CpuAddr(); 1512 DAddr buffer_start = buffer.CpuAddr();
1494 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1513 memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1495 copies.push_back(BufferCopy{ 1514 copies.push_back(BufferCopy{
1496 .src_offset = total_size_bytes, 1515 .src_offset = total_size_bytes,
1497 .dst_offset = cpu_addr_out - buffer_start, 1516 .dst_offset = device_addr_out - buffer_start,
1498 .size = range_size, 1517 .size = range_size,
1499 }); 1518 });
1500 total_size_bytes += range_size; 1519 total_size_bytes += range_size;
@@ -1526,14 +1545,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1526 std::span<u8> immediate_buffer; 1545 std::span<u8> immediate_buffer;
1527 for (const BufferCopy& copy : copies) { 1546 for (const BufferCopy& copy : copies) {
1528 std::span<const u8> upload_span; 1547 std::span<const u8> upload_span;
1529 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1548 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1530 if (IsRangeGranular(cpu_addr, copy.size)) { 1549 if (IsRangeGranular(device_addr, copy.size)) {
1531 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1550 upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size);
1532 } else { 1551 } else {
1533 if (immediate_buffer.empty()) { 1552 if (immediate_buffer.empty()) {
1534 immediate_buffer = ImmediateBuffer(largest_copy); 1553 immediate_buffer = ImmediateBuffer(largest_copy);
1535 } 1554 }
1536 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1555 device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
1537 upload_span = immediate_buffer.subspan(0, copy.size); 1556 upload_span = immediate_buffer.subspan(0, copy.size);
1538 } 1557 }
1539 buffer.ImmediateUpload(copy.dst_offset, upload_span); 1558 buffer.ImmediateUpload(copy.dst_offset, upload_span);
@@ -1550,8 +1569,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1550 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1569 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1551 for (BufferCopy& copy : copies) { 1570 for (BufferCopy& copy : copies) {
1552 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1571 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1553 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1572 const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
1554 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1573 device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size);
1555 1574
1556 // Apply the staging offset 1575 // Apply the staging offset
1557 copy.src_offset += upload_staging.offset; 1576 copy.src_offset += upload_staging.offset;
@@ -1562,14 +1581,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1562} 1581}
1563 1582
1564template <class P> 1583template <class P>
1565bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, 1584bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
1566 std::span<const u8> inlined_buffer) { 1585 std::span<const u8> inlined_buffer) {
1567 const bool is_dirty = IsRegionRegistered(dest_address, copy_size); 1586 const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
1568 if (!is_dirty) { 1587 if (!is_dirty) {
1569 return false; 1588 return false;
1570 } 1589 }
1571 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); 1590 DAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1572 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); 1591 DAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1573 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { 1592 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1574 return false; 1593 return false;
1575 } 1594 }
@@ -1580,7 +1599,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1580} 1599}
1581 1600
1582template <class P> 1601template <class P>
1583void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 1602void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
1584 std::span<const u8> inlined_buffer) { 1603 std::span<const u8> inlined_buffer) {
1585 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1604 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1586 ClearDownload(subtract_interval); 1605 ClearDownload(subtract_interval);
@@ -1612,14 +1631,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
1612} 1631}
1613 1632
1614template <class P> 1633template <class P>
1615void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { 1634void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) {
1616 boost::container::small_vector<BufferCopy, 1> copies; 1635 boost::container::small_vector<BufferCopy, 1> copies;
1617 u64 total_size_bytes = 0; 1636 u64 total_size_bytes = 0;
1618 u64 largest_copy = 0; 1637 u64 largest_copy = 0;
1619 memory_tracker.ForEachDownloadRangeAndClear( 1638 memory_tracker.ForEachDownloadRangeAndClear(
1620 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { 1639 device_addr, size, [&](u64 device_addr_out, u64 range_size) {
1621 const VAddr buffer_addr = buffer.CpuAddr(); 1640 const DAddr buffer_addr = buffer.CpuAddr();
1622 const auto add_download = [&](VAddr start, VAddr end) { 1641 const auto add_download = [&](DAddr start, DAddr end) {
1623 const u64 new_offset = start - buffer_addr; 1642 const u64 new_offset = start - buffer_addr;
1624 const u64 new_size = end - start; 1643 const u64 new_size = end - start;
1625 copies.push_back(BufferCopy{ 1644 copies.push_back(BufferCopy{
@@ -1634,8 +1653,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1634 largest_copy = std::max(largest_copy, new_size); 1653 largest_copy = std::max(largest_copy, new_size);
1635 }; 1654 };
1636 1655
1637 const VAddr start_address = cpu_addr_out; 1656 const DAddr start_address = device_addr_out;
1638 const VAddr end_address = start_address + range_size; 1657 const DAddr end_address = start_address + range_size;
1639 ForEachInRangeSet(common_ranges, start_address, range_size, add_download); 1658 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1640 const IntervalType subtract_interval{start_address, end_address}; 1659 const IntervalType subtract_interval{start_address, end_address};
1641 ClearDownload(subtract_interval); 1660 ClearDownload(subtract_interval);
@@ -1658,18 +1677,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1658 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); 1677 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
1659 runtime.Finish(); 1678 runtime.Finish();
1660 for (const BufferCopy& copy : copies) { 1679 for (const BufferCopy& copy : copies) {
1661 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1680 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1662 // Undo the modified offset 1681 // Undo the modified offset
1663 const u64 dst_offset = copy.dst_offset - download_staging.offset; 1682 const u64 dst_offset = copy.dst_offset - download_staging.offset;
1664 const u8* copy_mapped_memory = mapped_memory + dst_offset; 1683 const u8* copy_mapped_memory = mapped_memory + dst_offset;
1665 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); 1684 device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size);
1666 } 1685 }
1667 } else { 1686 } else {
1668 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 1687 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
1669 for (const BufferCopy& copy : copies) { 1688 for (const BufferCopy& copy : copies) {
1670 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 1689 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
1671 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 1690 const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
1672 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); 1691 device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size);
1673 } 1692 }
1674 } 1693 }
1675} 1694}
@@ -1758,20 +1777,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1758 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); 1777 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1759 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; 1778 const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size;
1760 1779
1761 const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); 1780 const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1762 if (!aligned_cpu_addr || size == 0) { 1781 if (!aligned_device_addr || size == 0) {
1763 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1782 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
1764 return NULL_BINDING; 1783 return NULL_BINDING;
1765 } 1784 }
1766 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1785 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1767 ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", 1786 ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}",
1768 cbuf_index); 1787 cbuf_index);
1769 // The end address used for size calculation does not need to be aligned 1788 // The end address used for size calculation does not need to be aligned
1770 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1789 const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::Memory::YUZU_PAGESIZE);
1771 1790
1772 const Binding binding{ 1791 const Binding binding{
1773 .cpu_addr = *aligned_cpu_addr, 1792 .device_addr = *aligned_device_addr,
1774 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), 1793 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr),
1775 .buffer_id = BufferId{}, 1794 .buffer_id = BufferId{},
1776 }; 1795 };
1777 return binding; 1796 return binding;
@@ -1780,15 +1799,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1780template <class P> 1799template <class P>
1781TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 1800TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
1782 PixelFormat format) { 1801 PixelFormat format) {
1783 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1802 const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1784 TextureBufferBinding binding; 1803 TextureBufferBinding binding;
1785 if (!cpu_addr || size == 0) { 1804 if (!device_addr || size == 0) {
1786 binding.cpu_addr = 0; 1805 binding.device_addr = 0;
1787 binding.size = 0; 1806 binding.size = 0;
1788 binding.buffer_id = NULL_BUFFER_ID; 1807 binding.buffer_id = NULL_BUFFER_ID;
1789 binding.format = PixelFormat::Invalid; 1808 binding.format = PixelFormat::Invalid;
1790 } else { 1809 } else {
1791 binding.cpu_addr = *cpu_addr; 1810 binding.device_addr = *device_addr;
1792 binding.size = size; 1811 binding.size = size;
1793 binding.buffer_id = BufferId{}; 1812 binding.buffer_id = BufferId{};
1794 binding.format = format; 1813 binding.format = format;
@@ -1797,14 +1816,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr,
1797} 1816}
1798 1817
1799template <class P> 1818template <class P>
1800std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1819std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) {
1801 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1820 u8* const base_pointer = device_memory.GetPointer<u8>(device_addr);
1802 if (IsRangeGranular(cpu_addr, size) || 1821 if (IsRangeGranular(device_addr, size) ||
1803 base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { 1822 base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) {
1804 return std::span(base_pointer, size); 1823 return std::span(base_pointer, size);
1805 } else { 1824 } else {
1806 const std::span<u8> span = ImmediateBuffer(size); 1825 const std::span<u8> span = ImmediateBuffer(size);
1807 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1826 device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
1808 return span; 1827 return span;
1809 } 1828 }
1810} 1829}
@@ -1828,13 +1847,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
1828template <class P> 1847template <class P>
1829std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { 1848std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
1830 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; 1849 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
1831 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); 1850 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr));
1832} 1851}
1833 1852
1834template <class P> 1853template <class P>
1835std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { 1854std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
1836 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; 1855 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
1837 return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); 1856 return std::make_pair(&buffer,
1857 buffer.Offset(channel_state->indirect_buffer_binding.device_addr));
1838} 1858}
1839 1859
1840} // namespace VideoCommon 1860} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index d6d696d8c..4074003e4 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -32,7 +32,6 @@
32#include "common/microprofile.h" 32#include "common/microprofile.h"
33#include "common/scope_exit.h" 33#include "common/scope_exit.h"
34#include "common/settings.h" 34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h" 35#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h" 36#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h" 37#include "video_core/delayed_destruction_ring.h"
@@ -41,7 +40,6 @@
41#include "video_core/engines/kepler_compute.h" 40#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h" 41#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h" 42#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h" 43#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h" 44#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h" 45#include "video_core/texture_cache/types.h"
@@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0};
94static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); 92static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
95 93
96struct Binding { 94struct Binding {
97 VAddr cpu_addr{}; 95 DAddr device_addr{};
98 u32 size{}; 96 u32 size{};
99 BufferId buffer_id; 97 BufferId buffer_id;
100}; 98};
@@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding {
104}; 102};
105 103
106static constexpr Binding NULL_BINDING{ 104static constexpr Binding NULL_BINDING{
107 .cpu_addr = 0, 105 .device_addr = 0,
108 .size = 0, 106 .size = 0,
109 .buffer_id = NULL_BUFFER_ID, 107 .buffer_id = NULL_BUFFER_ID,
110}; 108};
@@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
204 using Async_Buffer = typename P::Async_Buffer; 202 using Async_Buffer = typename P::Async_Buffer;
205 using MemoryTracker = typename P::MemoryTracker; 203 using MemoryTracker = typename P::MemoryTracker;
206 204
207 using IntervalCompare = std::less<VAddr>; 205 using IntervalCompare = std::less<DAddr>;
208 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; 206 using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
209 using IntervalAllocator = boost::fast_pool_allocator<VAddr>; 207 using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
210 using IntervalSet = boost::icl::interval_set<VAddr>; 208 using IntervalSet = boost::icl::interval_set<DAddr>;
211 using IntervalType = typename IntervalSet::interval_type; 209 using IntervalType = typename IntervalSet::interval_type;
212 210
213 template <typename Type> 211 template <typename Type>
@@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
230 228
231 using OverlapCombine = counter_add_functor<int>; 229 using OverlapCombine = counter_add_functor<int>;
232 using OverlapSection = boost::icl::inter_section<int>; 230 using OverlapSection = boost::icl::inter_section<int>;
233 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 231 using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
234 232
235 struct OverlapResult { 233 struct OverlapResult {
236 boost::container::small_vector<BufferId, 16> ids; 234 boost::container::small_vector<BufferId, 16> ids;
237 VAddr begin; 235 DAddr begin;
238 VAddr end; 236 DAddr end;
239 bool has_stream_leap = false; 237 bool has_stream_leap = false;
240 }; 238 };
241 239
242public: 240public:
243 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 241 explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
244 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
245 242
246 void TickFrame(); 243 void TickFrame();
247 244
248 void WriteMemory(VAddr cpu_addr, u64 size); 245 void WriteMemory(DAddr device_addr, u64 size);
249 246
250 void CachedWriteMemory(VAddr cpu_addr, u64 size); 247 void CachedWriteMemory(DAddr device_addr, u64 size);
251 248
252 bool OnCPUWrite(VAddr cpu_addr, u64 size); 249 bool OnCPUWrite(DAddr device_addr, u64 size);
253 250
254 void DownloadMemory(VAddr cpu_addr, u64 size); 251 void DownloadMemory(DAddr device_addr, u64 size);
255 252
256 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 253 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size);
257 254
258 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); 255 bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
259 256
260 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); 257 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
261 258
@@ -300,7 +297,7 @@ public:
300 ObtainBufferSynchronize sync_info, 297 ObtainBufferSynchronize sync_info,
301 ObtainBufferOperation post_op); 298 ObtainBufferOperation post_op);
302 299
303 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, 300 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size,
304 ObtainBufferSynchronize sync_info, 301 ObtainBufferSynchronize sync_info,
305 ObtainBufferOperation post_op); 302 ObtainBufferOperation post_op);
306 void FlushCachedWrites(); 303 void FlushCachedWrites();
@@ -326,13 +323,13 @@ public:
326 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); 323 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
327 324
328 /// Return true when a CPU region is modified from the GPU 325 /// Return true when a CPU region is modified from the GPU
329 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 326 [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size);
330 327
331 /// Return true when a region is registered on the cache 328 /// Return true when a region is registered on the cache
332 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); 329 [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size);
333 330
334 /// Return true when a CPU region is modified from the CPU 331 /// Return true when a CPU region is modified from the CPU
335 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 332 [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size);
336 333
337 void SetDrawIndirect( 334 void SetDrawIndirect(
338 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { 335 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
@@ -366,9 +363,9 @@ private:
366 } 363 }
367 364
368 template <typename Func> 365 template <typename Func>
369 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { 366 void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) {
370 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); 367 const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
371 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { 368 for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) {
372 const BufferId buffer_id = page_table[page]; 369 const BufferId buffer_id = page_table[page];
373 if (!buffer_id) { 370 if (!buffer_id) {
374 ++page; 371 ++page;
@@ -377,15 +374,15 @@ private:
377 Buffer& buffer = slot_buffers[buffer_id]; 374 Buffer& buffer = slot_buffers[buffer_id];
378 func(buffer_id, buffer); 375 func(buffer_id, buffer);
379 376
380 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 377 const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
381 page = Common::DivCeil(end_addr, CACHING_PAGESIZE); 378 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
382 } 379 }
383 } 380 }
384 381
385 template <typename Func> 382 template <typename Func>
386 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { 383 void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
387 const VAddr start_address = cpu_addr; 384 const DAddr start_address = device_addr;
388 const VAddr end_address = start_address + size; 385 const DAddr end_address = start_address + size;
389 const IntervalType search_interval{start_address, end_address}; 386 const IntervalType search_interval{start_address, end_address};
390 auto it = current_range.lower_bound(search_interval); 387 auto it = current_range.lower_bound(search_interval);
391 if (it == current_range.end()) { 388 if (it == current_range.end()) {
@@ -393,8 +390,8 @@ private:
393 } 390 }
394 auto end_it = current_range.upper_bound(search_interval); 391 auto end_it = current_range.upper_bound(search_interval);
395 for (; it != end_it; it++) { 392 for (; it != end_it; it++) {
396 VAddr inter_addr_end = it->upper(); 393 DAddr inter_addr_end = it->upper();
397 VAddr inter_addr = it->lower(); 394 DAddr inter_addr = it->lower();
398 if (inter_addr_end > end_address) { 395 if (inter_addr_end > end_address) {
399 inter_addr_end = end_address; 396 inter_addr_end = end_address;
400 } 397 }
@@ -406,10 +403,10 @@ private:
406 } 403 }
407 404
408 template <typename Func> 405 template <typename Func>
409 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, 406 void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
410 Func&& func) { 407 Func&& func) {
411 const VAddr start_address = cpu_addr; 408 const DAddr start_address = device_addr;
412 const VAddr end_address = start_address + size; 409 const DAddr end_address = start_address + size;
413 const IntervalType search_interval{start_address, end_address}; 410 const IntervalType search_interval{start_address, end_address};
414 auto it = current_range.lower_bound(search_interval); 411 auto it = current_range.lower_bound(search_interval);
415 if (it == current_range.end()) { 412 if (it == current_range.end()) {
@@ -418,8 +415,8 @@ private:
418 auto end_it = current_range.upper_bound(search_interval); 415 auto end_it = current_range.upper_bound(search_interval);
419 for (; it != end_it; it++) { 416 for (; it != end_it; it++) {
420 auto& inter = it->first; 417 auto& inter = it->first;
421 VAddr inter_addr_end = inter.upper(); 418 DAddr inter_addr_end = inter.upper();
422 VAddr inter_addr = inter.lower(); 419 DAddr inter_addr = inter.lower();
423 if (inter_addr_end > end_address) { 420 if (inter_addr_end > end_address) {
424 inter_addr_end = end_address; 421 inter_addr_end = end_address;
425 } 422 }
@@ -451,9 +448,9 @@ private:
451 } while (any_removals); 448 } while (any_removals);
452 } 449 }
453 450
454 static bool IsRangeGranular(VAddr cpu_addr, size_t size) { 451 static bool IsRangeGranular(DAddr device_addr, size_t size) {
455 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == 452 return (device_addr & ~Core::Memory::YUZU_PAGEMASK) ==
456 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); 453 ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
457 } 454 }
458 455
459 void RunGarbageCollector(); 456 void RunGarbageCollector();
@@ -508,15 +505,15 @@ private:
508 505
509 void UpdateComputeTextureBuffers(); 506 void UpdateComputeTextureBuffers();
510 507
511 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 508 void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size);
512 509
513 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 510 [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size);
514 511
515 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); 512 [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size);
516 513
517 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); 514 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
518 515
519 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); 516 [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size);
520 517
521 void Register(BufferId buffer_id); 518 void Register(BufferId buffer_id);
522 519
@@ -527,7 +524,7 @@ private:
527 524
528 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; 525 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
529 526
530 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 527 bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size);
531 528
532 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 529 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
533 std::span<BufferCopy> copies); 530 std::span<BufferCopy> copies);
@@ -539,7 +536,7 @@ private:
539 536
540 void DownloadBufferMemory(Buffer& buffer_id); 537 void DownloadBufferMemory(Buffer& buffer_id);
541 538
542 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); 539 void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size);
543 540
544 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); 541 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
545 542
@@ -549,7 +546,7 @@ private:
549 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 546 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
550 PixelFormat format); 547 PixelFormat format);
551 548
552 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 549 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size);
553 550
554 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 551 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
555 552
@@ -557,11 +554,10 @@ private:
557 554
558 void ClearDownload(IntervalType subtract_interval); 555 void ClearDownload(IntervalType subtract_interval);
559 556
560 void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, 557 void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
561 std::span<const u8> inlined_buffer); 558 std::span<const u8> inlined_buffer);
562 559
563 VideoCore::RasterizerInterface& rasterizer; 560 Tegra::MaxwellDeviceMemoryManager& device_memory;
564 Core::Memory::Memory& cpu_memory;
565 561
566 SlotVector<Buffer> slot_buffers; 562 SlotVector<Buffer> slot_buffers;
567 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 563 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -598,7 +594,7 @@ private:
598 u64 critical_memory = 0; 594 u64 critical_memory = 0;
599 BufferId inline_buffer_id; 595 BufferId inline_buffer_id;
600 596
601 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 597 std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table;
602 Common::ScratchBuffer<u8> tmp_buffer; 598 Common::ScratchBuffer<u8> tmp_buffer;
603}; 599};
604 600
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
index 6c1c8287b..c95eed1f6 100644
--- a/src/video_core/buffer_cache/memory_tracker_base.h
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -17,19 +17,19 @@
17 17
18namespace VideoCommon { 18namespace VideoCommon {
19 19
20template <class RasterizerInterface> 20template <typename DeviceTracker>
21class MemoryTrackerBase { 21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39; 22 static constexpr size_t MAX_CPU_PAGE_BITS = 34;
23 static constexpr size_t HIGHER_PAGE_BITS = 22; 23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; 24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; 25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); 26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32; 27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; 28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; 29 using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>;
30 30
31public: 31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} 32 MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {}
33 ~MemoryTrackerBase() = default; 33 ~MemoryTrackerBase() = default;
34 34
35 /// Returns the inclusive CPU modified range in a begin end pair 35 /// Returns the inclusive CPU modified range in a begin end pair
@@ -74,7 +74,7 @@ public:
74 }); 74 });
75 } 75 }
76 76
77 /// Mark region as CPU modified, notifying the rasterizer about this change 77 /// Mark region as CPU modified, notifying the device_tracker about this change
78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 78 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
79 IteratePages<true>(dirty_cpu_addr, query_size, 79 IteratePages<true>(dirty_cpu_addr, query_size,
80 [](Manager* manager, u64 offset, size_t size) { 80 [](Manager* manager, u64 offset, size_t size) {
@@ -83,7 +83,7 @@ public:
83 }); 83 });
84 } 84 }
85 85
86 /// Unmark region as CPU modified, notifying the rasterizer about this change 86 /// Unmark region as CPU modified, notifying the device_tracker about this change
87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { 87 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
88 IteratePages<true>(dirty_cpu_addr, query_size, 88 IteratePages<true>(dirty_cpu_addr, query_size,
89 [](Manager* manager, u64 offset, size_t size) { 89 [](Manager* manager, u64 offset, size_t size) {
@@ -139,7 +139,7 @@ public:
139 }); 139 });
140 } 140 }
141 141
142 /// Flushes cached CPU writes, and notify the rasterizer about the deltas 142 /// Flushes cached CPU writes, and notify the device_tracker about the deltas
143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { 143 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
144 IteratePages<false>(query_cpu_addr, query_size, 144 IteratePages<false>(query_cpu_addr, query_size,
145 [](Manager* manager, [[maybe_unused]] u64 offset, 145 [](Manager* manager, [[maybe_unused]] u64 offset,
@@ -280,7 +280,7 @@ private:
280 manager_pool.emplace_back(); 280 manager_pool.emplace_back();
281 auto& last_pool = manager_pool.back(); 281 auto& last_pool = manager_pool.back();
282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { 282 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
283 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); 283 new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE);
284 free_managers.push_back(&last_pool[i]); 284 free_managers.push_back(&last_pool[i]);
285 } 285 }
286 return on_return(); 286 return on_return();
@@ -293,7 +293,7 @@ private:
293 293
294 std::unordered_set<u32> cached_pages; 294 std::unordered_set<u32> cached_pages;
295 295
296 RasterizerInterface* rasterizer = nullptr; 296 DeviceTracker* device_tracker = nullptr;
297}; 297};
298 298
299} // namespace VideoCommon 299} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
index a336bde41..56ab4f5f1 100644
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -163,11 +163,11 @@ struct Words {
163 WordsArray<stack_words> preflushable; 163 WordsArray<stack_words> preflushable;
164}; 164};
165 165
166template <class RasterizerInterface, size_t stack_words = 1> 166template <class DeviceTracker, size_t stack_words = 1>
167class WordManager { 167class WordManager {
168public: 168public:
169 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) 169 explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes)
170 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} 170 : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {}
171 171
172 explicit WordManager() = default; 172 explicit WordManager() = default;
173 173
@@ -279,7 +279,7 @@ public:
279 } 279 }
280 280
281 /** 281 /**
282 * Loop over each page in the given range, turn off those bits and notify the rasterizer if 282 * Loop over each page in the given range, turn off those bits and notify the tracker if
283 * needed. Call the given function on each turned off range. 283 * needed. Call the given function on each turned off range.
284 * 284 *
285 * @param query_cpu_range Base CPU address to loop over 285 * @param query_cpu_range Base CPU address to loop over
@@ -459,26 +459,26 @@ private:
459 } 459 }
460 460
461 /** 461 /**
462 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer 462 * Notify tracker about changes in the CPU tracking state of a word in the buffer
463 * 463 *
464 * @param word_index Index to the word to notify to the rasterizer 464 * @param word_index Index to the word to notify to the tracker
465 * @param current_bits Current state of the word 465 * @param current_bits Current state of the word
466 * @param new_bits New state of the word 466 * @param new_bits New state of the word
467 * 467 *
468 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 468 * @tparam add_to_tracker True when the tracker should start tracking the new pages
469 */ 469 */
470 template <bool add_to_rasterizer> 470 template <bool add_to_tracker>
471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { 471 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
472 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 472 u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 473 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
474 IteratePages(changed_bits, [&](size_t offset, size_t size) { 474 IteratePages(changed_bits, [&](size_t offset, size_t size) {
475 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, 475 tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
476 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); 476 size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1);
477 }); 477 });
478 } 478 }
479 479
480 VAddr cpu_addr = 0; 480 VAddr cpu_addr = 0;
481 RasterizerInterface* rasterizer = nullptr; 481 DeviceTracker* tracker = nullptr;
482 Words<stack_words> words; 482 Words<stack_words> words;
483}; 483};
484 484