summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/tests/CMakeLists.txt2
-rw-r--r--src/tests/video_core/buffer_base.cpp549
-rw-r--r--src/tests/video_core/memory_tracker.cpp549
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache/buffer_base.h518
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1002
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h580
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h271
-rw-r--r--src/video_core/buffer_cache/word_manager.h462
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp9
15 files changed, 2255 insertions, 1727 deletions
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 39b774c98..1e158f375 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -15,7 +15,7 @@ add_executable(tests
15 core/core_timing.cpp 15 core/core_timing.cpp
16 core/internal_network/network.cpp 16 core/internal_network/network.cpp
17 precompiled_headers.h 17 precompiled_headers.h
18 video_core/buffer_base.cpp 18 video_core/memory_tracker.cpp
19 input_common/calibration_configuration_job.cpp 19 input_common/calibration_configuration_job.cpp
20) 20)
21 21
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
deleted file mode 100644
index 734dbf4b6..000000000
--- a/src/tests/video_core/buffer_base.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stdexcept>
5#include <unordered_map>
6
7#include <catch2/catch_test_macros.hpp>
8
9#include "common/alignment.h"
10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_base.h"
12
13namespace {
14using VideoCommon::BufferBase;
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19
20constexpr VAddr c = 0x1328914000;
21
22class RasterizerInterface {
23public:
24 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
25 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
26 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
27 Core::Memory::YUZU_PAGEBITS};
28 for (u64 page = page_start; page < page_end; ++page) {
29 int& value = page_table[page];
30 value += delta;
31 if (value < 0) {
32 throw std::logic_error{"negative page"};
33 }
34 if (value == 0) {
35 page_table.erase(page);
36 }
37 }
38 }
39
40 [[nodiscard]] int Count(VAddr addr) const noexcept {
41 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
42 return it == page_table.end() ? 0 : it->second;
43 }
44
45 [[nodiscard]] unsigned Count() const noexcept {
46 unsigned count = 0;
47 for (const auto& [index, value] : page_table) {
48 count += value;
49 }
50 return count;
51 }
52
53private:
54 std::unordered_map<u64, int> page_table;
55};
56} // Anonymous namespace
57
58TEST_CASE("BufferBase: Small buffer", "[video_core]") {
59 RasterizerInterface rasterizer;
60 BufferBase buffer(rasterizer, c, WORD);
61 REQUIRE(rasterizer.Count() == 0);
62 buffer.UnmarkRegionAsCpuModified(c, WORD);
63 REQUIRE(rasterizer.Count() == WORD / PAGE);
64 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0});
65
66 buffer.MarkRegionAsCpuModified(c + PAGE, 1);
67 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2});
68}
69
70TEST_CASE("BufferBase: Large buffer", "[video_core]") {
71 RasterizerInterface rasterizer;
72 BufferBase buffer(rasterizer, c, WORD * 32);
73 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
74 buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4);
75 REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2});
76 REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8});
77 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE});
78 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE});
79 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
80 Range{WORD * 3 + PAGE * 63, WORD * 4});
81
82 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
83 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
84 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
85 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9});
86
87 buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
88 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
89 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7});
90
91 buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
92 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32});
93
94 buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
95 buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
96
97 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
98 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
99}
100
101TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") {
102 RasterizerInterface rasterizer;
103 BufferBase buffer(rasterizer, c, PAGE * 2);
104 REQUIRE(rasterizer.Count() == 0);
105 buffer.UnmarkRegionAsCpuModified(c, PAGE);
106 REQUIRE(rasterizer.Count() == 1);
107 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
108 REQUIRE(rasterizer.Count() == 0);
109 buffer.UnmarkRegionAsCpuModified(c, PAGE);
110 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
111 REQUIRE(rasterizer.Count() == 2);
112 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
113 REQUIRE(rasterizer.Count() == 0);
114}
115
116TEST_CASE("BufferBase: Basic range", "[video_core]") {
117 RasterizerInterface rasterizer;
118 BufferBase buffer(rasterizer, c, WORD);
119 buffer.UnmarkRegionAsCpuModified(c, WORD);
120 buffer.MarkRegionAsCpuModified(c, PAGE);
121 int num = 0;
122 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
123 REQUIRE(offset == 0U);
124 REQUIRE(size == PAGE);
125 ++num;
126 });
127 REQUIRE(num == 1U);
128}
129
130TEST_CASE("BufferBase: Border upload", "[video_core]") {
131 RasterizerInterface rasterizer;
132 BufferBase buffer(rasterizer, c, WORD * 2);
133 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
134 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
135 buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
136 REQUIRE(offset == WORD - PAGE);
137 REQUIRE(size == PAGE * 2);
138 });
139}
140
141TEST_CASE("BufferBase: Border upload range", "[video_core]") {
142 RasterizerInterface rasterizer;
143 BufferBase buffer(rasterizer, c, WORD * 2);
144 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
145 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
146 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
147 REQUIRE(offset == WORD - PAGE);
148 REQUIRE(size == PAGE * 2);
149 });
150 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
151 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
152 REQUIRE(offset == WORD - PAGE);
153 REQUIRE(size == PAGE);
154 });
155 buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
156 REQUIRE(offset == WORD);
157 REQUIRE(size == PAGE);
158 });
159}
160
161TEST_CASE("BufferBase: Border upload partial range", "[video_core]") {
162 RasterizerInterface rasterizer;
163 BufferBase buffer(rasterizer, c, WORD * 2);
164 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
165 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
166 buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
167 REQUIRE(offset == WORD - PAGE);
168 REQUIRE(size == PAGE * 2);
169 });
170 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
171 buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
172 REQUIRE(offset == WORD - PAGE);
173 REQUIRE(size == PAGE);
174 });
175 buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
176 REQUIRE(offset == WORD);
177 REQUIRE(size == PAGE);
178 });
179}
180
181TEST_CASE("BufferBase: Partial word uploads", "[video_core]") {
182 RasterizerInterface rasterizer;
183 BufferBase buffer(rasterizer, c, 0x9d000);
184 int num = 0;
185 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
186 REQUIRE(offset == 0U);
187 REQUIRE(size == WORD);
188 ++num;
189 });
190 REQUIRE(num == 1);
191 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
192 REQUIRE(offset == WORD);
193 REQUIRE(size == WORD);
194 ++num;
195 });
196 REQUIRE(num == 2);
197 buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
198 REQUIRE(offset == WORD * 2);
199 REQUIRE(size == PAGE * 0x1d);
200 ++num;
201 });
202 REQUIRE(num == 3);
203}
204
205TEST_CASE("BufferBase: Partial page upload", "[video_core]") {
206 RasterizerInterface rasterizer;
207 BufferBase buffer(rasterizer, c, WORD);
208 buffer.UnmarkRegionAsCpuModified(c, WORD);
209 int num = 0;
210 buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
211 buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
212 buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
213 REQUIRE(offset == PAGE * 2);
214 REQUIRE(size == PAGE);
215 ++num;
216 });
217 REQUIRE(num == 1);
218 buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
219 REQUIRE(offset == PAGE * 9);
220 REQUIRE(size == PAGE);
221 ++num;
222 });
223 REQUIRE(num == 2);
224}
225
226TEST_CASE("BufferBase: Partial page upload with multiple words on the right") {
227 RasterizerInterface rasterizer;
228 BufferBase buffer(rasterizer, c, WORD * 8);
229 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
230 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
231 int num = 0;
232 buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
233 REQUIRE(offset == PAGE * 13);
234 REQUIRE(size == WORD * 7 - PAGE * 3);
235 ++num;
236 });
237 REQUIRE(num == 1);
238 buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
239 REQUIRE(offset == WORD * 7 + PAGE * 10);
240 REQUIRE(size == PAGE * 3);
241 ++num;
242 });
243 REQUIRE(num == 2);
244}
245
246TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") {
247 RasterizerInterface rasterizer;
248 BufferBase buffer(rasterizer, c, WORD * 8);
249 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
250 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
251 int num = 0;
252 buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
253 REQUIRE(offset == PAGE * 16);
254 REQUIRE(size == WORD * 7 - PAGE * 3);
255 ++num;
256 });
257 REQUIRE(num == 1);
258 buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
259 REQUIRE(offset == PAGE * 13);
260 REQUIRE(size == PAGE * 3);
261 ++num;
262 });
263 REQUIRE(num == 2);
264}
265
266TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") {
267 RasterizerInterface rasterizer;
268 BufferBase buffer(rasterizer, c, WORD * 8);
269 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
270 buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
271 int num = 0;
272 buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
273 REQUIRE(offset == PAGE * 16);
274 REQUIRE(size == WORD);
275 ++num;
276 });
277 REQUIRE(num == 1);
278 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
279 REQUIRE(offset == PAGE * 13);
280 REQUIRE(size == PAGE * 3);
281 ++num;
282 });
283 REQUIRE(num == 2);
284 buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
285 REQUIRE(offset == WORD + PAGE * 16);
286 REQUIRE(size == PAGE * 73);
287 ++num;
288 });
289 REQUIRE(num == 3);
290}
291
292TEST_CASE("BufferBase: Empty right bits", "[video_core]") {
293 RasterizerInterface rasterizer;
294 BufferBase buffer(rasterizer, c, WORD * 2048);
295 buffer.UnmarkRegionAsCpuModified(c, WORD * 2048);
296 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
297 buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
298 REQUIRE(offset == WORD - PAGE);
299 REQUIRE(size == PAGE * 2);
300 });
301}
302
303TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") {
304 RasterizerInterface rasterizer;
305 BufferBase buffer(rasterizer, c, WORD);
306 buffer.UnmarkRegionAsCpuModified(c, WORD);
307 buffer.MarkRegionAsCpuModified(c, PAGE);
308 int num = 0;
309 buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
310 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
311 buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
312 REQUIRE(num == 0);
313 buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
314 REQUIRE(num == 1);
315 buffer.MarkRegionAsCpuModified(c, WORD);
316 REQUIRE(rasterizer.Count() == 0);
317}
318
319TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") {
320 RasterizerInterface rasterizer;
321 BufferBase buffer(rasterizer, c, 0x22000);
322 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
323 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
324 REQUIRE(rasterizer.Count() == 0);
325 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
326 REQUIRE(rasterizer.Count() == 1);
327 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2));
328 buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2);
329 buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2);
330 REQUIRE(rasterizer.Count() == 2);
331}
332
333TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") {
334 RasterizerInterface rasterizer;
335 BufferBase buffer(rasterizer, c, 0x310720);
336 buffer.UnmarkRegionAsCpuModified(c, 0x310720);
337 REQUIRE(rasterizer.Count(c) == 1);
338 REQUIRE(rasterizer.Count(c + PAGE) == 1);
339 REQUIRE(rasterizer.Count(c + WORD) == 1);
340 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
341}
342
343TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") {
344 RasterizerInterface rasterizer;
345 BufferBase buffer(rasterizer, c, WORD);
346 buffer.UnmarkRegionAsCpuModified(c, WORD);
347 buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
348 buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
349 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
350 static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3};
351 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
352 REQUIRE(offset == offsets.at(i));
353 REQUIRE(size == sizes.at(i));
354 ++i;
355 });
356}
357
358TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") {
359 RasterizerInterface rasterizer;
360 BufferBase buffer(rasterizer, c, 0x22000);
361 buffer.UnmarkRegionAsCpuModified(c, 0x22000);
362 REQUIRE(rasterizer.Count() == 0x22);
363 buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
364 buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
365 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
366 static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21};
367 static constexpr std::array<u64, 2> sizes{PAGE, PAGE};
368 REQUIRE(offset == offsets.at(i));
369 REQUIRE(size == sizes.at(i));
370 ++i;
371 });
372}
373
374TEST_CASE("BufferBase: Single page modified range", "[video_core]") {
375 RasterizerInterface rasterizer;
376 BufferBase buffer(rasterizer, c, PAGE);
377 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
378 buffer.UnmarkRegionAsCpuModified(c, PAGE);
379 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
380}
381
382TEST_CASE("BufferBase: Two page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 BufferBase buffer(rasterizer, c, PAGE * 2);
385 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
386 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
387 REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2));
388 buffer.UnmarkRegionAsCpuModified(c, PAGE);
389 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
390}
391
392TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") {
393 for (int offset = 0; offset < 4; ++offset) {
394 const VAddr address = c + WORD * offset;
395 RasterizerInterface rasterizer;
396 BufferBase buffer(rasterizer, address, WORD * 4);
397 REQUIRE(buffer.IsRegionCpuModified(address, PAGE));
398 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE));
399 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE));
400
401 buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
402 REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD));
403 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE));
404 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE));
405 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE));
406 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
407 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
408
409 buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
410 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
411 }
412}
413
414TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") {
415 RasterizerInterface rasterizer;
416 BufferBase buffer(rasterizer, c, WORD * 16);
417 buffer.UnmarkRegionAsCpuModified(c, WORD * 16);
418 REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16));
419
420 buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
421 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16));
422 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2));
423 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2));
424 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2));
425 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
426 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
427 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
428 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
429 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
430}
431
432TEST_CASE("BufferBase: Out of bounds region query") {
433 RasterizerInterface rasterizer;
434 BufferBase buffer(rasterizer, c, WORD * 16);
435 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE));
436 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE));
437 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE));
438 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64));
439 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64));
440}
441
442TEST_CASE("BufferBase: Wrap word regions") {
443 RasterizerInterface rasterizer;
444 BufferBase buffer(rasterizer, c, WORD * 2);
445 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
446 buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
447 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2));
448 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE));
449 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE));
450 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE));
451 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
452 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
453 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
454
455 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
457 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
458 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE));
459 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE));
460 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
461 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16));
462}
463
464TEST_CASE("BufferBase: Unaligned page region query") {
465 RasterizerInterface rasterizer;
466 BufferBase buffer(rasterizer, c, WORD);
467 buffer.UnmarkRegionAsCpuModified(c, WORD);
468 buffer.MarkRegionAsCpuModified(c + 4000, 1000);
469 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
470 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
473}
474
475TEST_CASE("BufferBase: Cached write") {
476 RasterizerInterface rasterizer;
477 BufferBase buffer(rasterizer, c, WORD);
478 buffer.UnmarkRegionAsCpuModified(c, WORD);
479 buffer.CachedCpuWrite(c + PAGE, PAGE);
480 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
481 buffer.FlushCachedWrites();
482 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
483 buffer.MarkRegionAsCpuModified(c, WORD);
484 REQUIRE(rasterizer.Count() == 0);
485}
486
487TEST_CASE("BufferBase: Multiple cached write") {
488 RasterizerInterface rasterizer;
489 BufferBase buffer(rasterizer, c, WORD);
490 buffer.UnmarkRegionAsCpuModified(c, WORD);
491 buffer.CachedCpuWrite(c + PAGE, PAGE);
492 buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
493 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
494 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
495 buffer.FlushCachedWrites();
496 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
497 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
498 buffer.MarkRegionAsCpuModified(c, WORD);
499 REQUIRE(rasterizer.Count() == 0);
500}
501
502TEST_CASE("BufferBase: Cached write unmarked") {
503 RasterizerInterface rasterizer;
504 BufferBase buffer(rasterizer, c, WORD);
505 buffer.UnmarkRegionAsCpuModified(c, WORD);
506 buffer.CachedCpuWrite(c + PAGE, PAGE);
507 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
508 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
509 buffer.FlushCachedWrites();
510 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
511 buffer.MarkRegionAsCpuModified(c, WORD);
512 REQUIRE(rasterizer.Count() == 0);
513}
514
515TEST_CASE("BufferBase: Cached write iterated") {
516 RasterizerInterface rasterizer;
517 BufferBase buffer(rasterizer, c, WORD);
518 buffer.UnmarkRegionAsCpuModified(c, WORD);
519 buffer.CachedCpuWrite(c + PAGE, PAGE);
520 int num = 0;
521 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
522 REQUIRE(num == 0);
523 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
524 buffer.FlushCachedWrites();
525 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
526 buffer.MarkRegionAsCpuModified(c, WORD);
527 REQUIRE(rasterizer.Count() == 0);
528}
529
530TEST_CASE("BufferBase: Cached write downloads") {
531 RasterizerInterface rasterizer;
532 BufferBase buffer(rasterizer, c, WORD);
533 buffer.UnmarkRegionAsCpuModified(c, WORD);
534 REQUIRE(rasterizer.Count() == 64);
535 buffer.CachedCpuWrite(c + PAGE, PAGE);
536 REQUIRE(rasterizer.Count() == 63);
537 buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
538 int num = 0;
539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites();
545 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
547 buffer.MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549}
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp
new file mode 100644
index 000000000..3981907a2
--- /dev/null
+++ b/src/tests/video_core/memory_tracker.cpp
@@ -0,0 +1,549 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <memory>
5#include <stdexcept>
6#include <unordered_map>
7
8#include <catch2/catch_test_macros.hpp>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/buffer_cache/memory_tracker_base.h"
13
14namespace {
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19constexpr u64 HIGH_PAGE_BITS = 22;
20constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS;
21
22constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
23
24class RasterizerInterface {
25public:
26 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
27 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
28 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
29 Core::Memory::YUZU_PAGEBITS};
30 for (u64 page = page_start; page < page_end; ++page) {
31 int& value = page_table[page];
32 value += delta;
33 if (value < 0) {
34 throw std::logic_error{"negative page"};
35 }
36 if (value == 0) {
37 page_table.erase(page);
38 }
39 }
40 }
41
42 [[nodiscard]] int Count(VAddr addr) const noexcept {
43 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
44 return it == page_table.end() ? 0 : it->second;
45 }
46
47 [[nodiscard]] unsigned Count() const noexcept {
48 unsigned count = 0;
49 for (const auto& [index, value] : page_table) {
50 count += value;
51 }
52 return count;
53 }
54
55private:
56 std::unordered_map<u64, int> page_table;
57};
58} // Anonymous namespace
59
60using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>;
61
62TEST_CASE("MemoryTracker: Small region", "[video_core]") {
63 RasterizerInterface rasterizer;
64 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
65 REQUIRE(rasterizer.Count() == 0);
66 memory_track->UnmarkRegionAsCpuModified(c, WORD);
67 REQUIRE(rasterizer.Count() == WORD / PAGE);
68 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0});
69
70 memory_track->MarkRegionAsCpuModified(c + PAGE, 1);
71 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2});
72}
73
74TEST_CASE("MemoryTracker: Large region", "[video_core]") {
75 RasterizerInterface rasterizer;
76 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
77 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
78 memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4);
79 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) ==
80 Range{c + PAGE, c + WORD + PAGE * 2});
81 REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) ==
82 Range{c + PAGE * 2, c + PAGE * 8});
83 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE});
84 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) ==
85 Range{c + WORD * 4, c + WORD * 4 + PAGE});
86 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
87 Range{c + WORD * 3 + PAGE * 63, c + WORD * 4});
88
89 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
90 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
91 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
92 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9});
93
94 memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
95 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
96 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7});
97
98 memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
99 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32});
100
101 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
102 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
103
104 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
105 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
106}
107
108TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") {
109 RasterizerInterface rasterizer;
110 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
111 REQUIRE(rasterizer.Count() == 0);
112 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
113 REQUIRE(rasterizer.Count() == 1);
114 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
115 REQUIRE(rasterizer.Count() == 0);
116 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
117 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
118 REQUIRE(rasterizer.Count() == 2);
119 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
120 REQUIRE(rasterizer.Count() == 0);
121}
122
123TEST_CASE("MemoryTracker: Basic range", "[video_core]") {
124 RasterizerInterface rasterizer;
125 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
126 memory_track->UnmarkRegionAsCpuModified(c, WORD);
127 memory_track->MarkRegionAsCpuModified(c, PAGE);
128 int num = 0;
129 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
130 REQUIRE(offset == c);
131 REQUIRE(size == PAGE);
132 ++num;
133 });
134 REQUIRE(num == 1U);
135}
136
137TEST_CASE("MemoryTracker: Border upload", "[video_core]") {
138 RasterizerInterface rasterizer;
139 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
140 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
141 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
142 memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
143 REQUIRE(offset == c + WORD - PAGE);
144 REQUIRE(size == PAGE * 2);
145 });
146}
147
148TEST_CASE("MemoryTracker: Border upload range", "[video_core]") {
149 RasterizerInterface rasterizer;
150 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
151 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
152 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
153 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
154 REQUIRE(offset == c + WORD - PAGE);
155 REQUIRE(size == PAGE * 2);
156 });
157 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
158 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
159 REQUIRE(offset == c + WORD - PAGE);
160 REQUIRE(size == PAGE);
161 });
162 memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
163 REQUIRE(offset == c + WORD);
164 REQUIRE(size == PAGE);
165 });
166}
167
168TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") {
169 RasterizerInterface rasterizer;
170 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
171 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
172 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
173 memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
174 REQUIRE(offset == c + WORD - PAGE);
175 REQUIRE(size == PAGE * 2);
176 });
177 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
178 memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
179 REQUIRE(offset == c + WORD - PAGE);
180 REQUIRE(size == PAGE);
181 });
182 memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
183 REQUIRE(offset == c + WORD);
184 REQUIRE(size == PAGE);
185 });
186}
187
188TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") {
189 RasterizerInterface rasterizer;
190 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
191 int num = 0;
192 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
193 REQUIRE(offset == c);
194 REQUIRE(size == WORD);
195 ++num;
196 });
197 REQUIRE(num == 1);
198 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
199 REQUIRE(offset == c + WORD);
200 REQUIRE(size == WORD);
201 ++num;
202 });
203 REQUIRE(num == 2);
204 memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
205 REQUIRE(offset == c + WORD * 2);
206 REQUIRE(size == PAGE * 0x1d);
207 ++num;
208 });
209 REQUIRE(num == 3);
210}
211
212TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") {
213 RasterizerInterface rasterizer;
214 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
215 memory_track->UnmarkRegionAsCpuModified(c, WORD);
216 int num = 0;
217 memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
218 memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
219 memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
220 REQUIRE(offset == c + PAGE * 2);
221 REQUIRE(size == PAGE);
222 ++num;
223 });
224 REQUIRE(num == 1);
225 memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
226 REQUIRE(offset == c + PAGE * 9);
227 REQUIRE(size == PAGE);
228 ++num;
229 });
230 REQUIRE(num == 2);
231}
232
233TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") {
234 RasterizerInterface rasterizer;
235 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
236 memory_track->UnmarkRegionAsCpuModified(c, WORD * 9);
237 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
238 int num = 0;
239 memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
240 REQUIRE(offset == c + PAGE * 13);
241 REQUIRE(size == WORD * 7 - PAGE * 3);
242 ++num;
243 });
244 REQUIRE(num == 1);
245 memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
246 REQUIRE(offset == c + WORD * 7 + PAGE * 10);
247 REQUIRE(size == PAGE * 3);
248 ++num;
249 });
250 REQUIRE(num == 2);
251}
252
253TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") {
254 RasterizerInterface rasterizer;
255 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
256 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
257 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
258 int num = 0;
259 memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
260 REQUIRE(offset == c + PAGE * 16);
261 REQUIRE(size == WORD * 7 - PAGE * 3);
262 ++num;
263 });
264 REQUIRE(num == 1);
265 memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
266 REQUIRE(offset == c + PAGE * 13);
267 REQUIRE(size == PAGE * 3);
268 ++num;
269 });
270 REQUIRE(num == 2);
271}
272
273TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") {
274 RasterizerInterface rasterizer;
275 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
276 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
277 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
278 int num = 0;
279 memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
280 REQUIRE(offset == c + PAGE * 16);
281 REQUIRE(size == WORD);
282 ++num;
283 });
284 REQUIRE(num == 1);
285 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
286 REQUIRE(offset == c + PAGE * 13);
287 REQUIRE(size == PAGE * 3);
288 ++num;
289 });
290 REQUIRE(num == 2);
291 memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
292 REQUIRE(offset == c + WORD + PAGE * 16);
293 REQUIRE(size == PAGE * 73);
294 ++num;
295 });
296 REQUIRE(num == 3);
297}
298
299TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") {
300 RasterizerInterface rasterizer;
301 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
302 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048);
303 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
304 memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
305 REQUIRE(offset == c + WORD - PAGE);
306 REQUIRE(size == PAGE * 2);
307 });
308}
309
310TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") {
311 RasterizerInterface rasterizer;
312 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
313 memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD);
314 memory_track->MarkRegionAsCpuModified(c, PAGE);
315 REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE);
316 int num = 0;
317 memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
318 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
319 memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
320 REQUIRE(num == 0);
321 memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
322 REQUIRE(num == 1);
323 memory_track->MarkRegionAsCpuModified(c, WORD);
324 REQUIRE(rasterizer.Count() == 2 * WORD / PAGE);
325}
326
327TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") {
328 RasterizerInterface rasterizer;
329 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
330 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
331 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
332 REQUIRE(rasterizer.Count() == 2);
333 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
334 REQUIRE(rasterizer.Count() == 3);
335 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2));
336 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2);
337 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2);
338 REQUIRE(rasterizer.Count() == 7);
339}
340
341TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") {
342 RasterizerInterface rasterizer;
343 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
344 memory_track->UnmarkRegionAsCpuModified(c, 0x310720);
345 REQUIRE(rasterizer.Count(c) == 1);
346 REQUIRE(rasterizer.Count(c + PAGE) == 1);
347 REQUIRE(rasterizer.Count(c + WORD) == 1);
348 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
349}
350
351TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") {
352 RasterizerInterface rasterizer;
353 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
354 memory_track->UnmarkRegionAsCpuModified(c, WORD);
355 memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
356 memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
357 memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
358 static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3};
359 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
360 REQUIRE(offset == offsets.at(i));
361 REQUIRE(size == sizes.at(i));
362 ++i;
363 });
364}
365
366TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") {
367 RasterizerInterface rasterizer;
368 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
369 memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23);
370 REQUIRE(rasterizer.Count() == 0x23);
371 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
372 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
373 memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable {
374 static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21};
375 static constexpr std::array<u64, 3> sizes{PAGE, PAGE};
376 REQUIRE(offset == offsets.at(i));
377 REQUIRE(size == sizes.at(i));
378 ++i;
379 });
380}
381
382TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
385 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
386 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
387 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
388}
389
390TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") {
391 RasterizerInterface rasterizer;
392 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
393 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
394 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
395 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2));
396 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
397 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
398}
399
400TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") {
401 for (int offset = 0; offset < 4; ++offset) {
402 const VAddr address = c + WORD * offset;
403 RasterizerInterface rasterizer;
404 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
405 REQUIRE(memory_track->IsRegionCpuModified(address, PAGE));
406 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE));
407 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE));
408
409 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
410 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD));
411 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE));
412 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE));
413 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE));
414 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
415 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
416
417 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
418 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
419 }
420}
421
422TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") {
423 RasterizerInterface rasterizer;
424 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
425 memory_track->UnmarkRegionAsCpuModified(c, WORD * 16);
426 REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16));
427
428 memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
429 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16));
430 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2));
431 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2));
432 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2));
433 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
434 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
435 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
436 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
437 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
438}
439
440TEST_CASE("MemoryTracker: Wrap word regions") {
441 RasterizerInterface rasterizer;
442 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
443 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
444 memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
445 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2));
446 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE));
447 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE));
448 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE));
449 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
450 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
451 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
452
453 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
454 memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
455 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE));
457 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE));
458 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
459 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16));
460}
461
462TEST_CASE("MemoryTracker: Unaligned page region query") {
463 RasterizerInterface rasterizer;
464 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
465 memory_track->UnmarkRegionAsCpuModified(c, WORD);
466 memory_track->MarkRegionAsCpuModified(c + 4000, 1000);
467 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
468 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
469 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000));
470 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1));
471}
472
473TEST_CASE("MemoryTracker: Cached write") {
474 RasterizerInterface rasterizer;
475 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
476 memory_track->UnmarkRegionAsCpuModified(c, WORD);
477 memory_track->CachedCpuWrite(c + PAGE, c + PAGE);
478 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
479 memory_track->FlushCachedWrites();
480 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
481 memory_track->MarkRegionAsCpuModified(c, WORD);
482 REQUIRE(rasterizer.Count() == 0);
483}
484
485TEST_CASE("MemoryTracker: Multiple cached write") {
486 RasterizerInterface rasterizer;
487 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
488 memory_track->UnmarkRegionAsCpuModified(c, WORD);
489 memory_track->CachedCpuWrite(c + PAGE, PAGE);
490 memory_track->CachedCpuWrite(c + PAGE * 3, PAGE);
491 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
492 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
493 memory_track->FlushCachedWrites();
494 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
495 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
496 memory_track->MarkRegionAsCpuModified(c, WORD);
497 REQUIRE(rasterizer.Count() == 0);
498}
499
500TEST_CASE("MemoryTracker: Cached write unmarked") {
501 RasterizerInterface rasterizer;
502 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
503 memory_track->UnmarkRegionAsCpuModified(c, WORD);
504 memory_track->CachedCpuWrite(c + PAGE, PAGE);
505 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
506 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
507 memory_track->FlushCachedWrites();
508 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
509 memory_track->MarkRegionAsCpuModified(c, WORD);
510 REQUIRE(rasterizer.Count() == 0);
511}
512
513TEST_CASE("MemoryTracker: Cached write iterated") {
514 RasterizerInterface rasterizer;
515 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
516 memory_track->UnmarkRegionAsCpuModified(c, WORD);
517 memory_track->CachedCpuWrite(c + PAGE, PAGE);
518 int num = 0;
519 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
520 REQUIRE(num == 0);
521 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
522 memory_track->FlushCachedWrites();
523 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
524 memory_track->MarkRegionAsCpuModified(c, WORD);
525 REQUIRE(rasterizer.Count() == 0);
526}
527
528TEST_CASE("MemoryTracker: Cached write downloads") {
529 RasterizerInterface rasterizer;
530 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
531 memory_track->UnmarkRegionAsCpuModified(c, WORD);
532 REQUIRE(rasterizer.Count() == 64);
533 memory_track->CachedCpuWrite(c + PAGE, PAGE);
534 REQUIRE(rasterizer.Count() == 63);
535 memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE);
536 int num = 0;
537 memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
538 REQUIRE(num == 1);
539 num = 0;
540 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
544 memory_track->FlushCachedWrites();
545 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
547 memory_track->MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549} \ No newline at end of file
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e904573d7..92cab93f3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -11,8 +11,11 @@ endif()
11 11
12add_library(video_core STATIC 12add_library(video_core STATIC
13 buffer_cache/buffer_base.h 13 buffer_cache/buffer_base.h
14 buffer_cache/buffer_cache_base.h
14 buffer_cache/buffer_cache.cpp 15 buffer_cache/buffer_cache.cpp
15 buffer_cache/buffer_cache.h 16 buffer_cache/buffer_cache.h
17 buffer_cache/memory_tracker_base.h
18 buffer_cache/word_manager.h
16 cache_types.h 19 cache_types.h
17 cdma_pusher.cpp 20 cdma_pusher.cpp
18 cdma_pusher.h 21 cdma_pusher.h
@@ -104,6 +107,7 @@ add_library(video_core STATIC
104 renderer_null/renderer_null.h 107 renderer_null/renderer_null.h
105 renderer_opengl/blit_image.cpp 108 renderer_opengl/blit_image.cpp
106 renderer_opengl/blit_image.h 109 renderer_opengl/blit_image.h
110 renderer_opengl/gl_buffer_cache_base.cpp
107 renderer_opengl/gl_buffer_cache.cpp 111 renderer_opengl/gl_buffer_cache.cpp
108 renderer_opengl/gl_buffer_cache.h 112 renderer_opengl/gl_buffer_cache.h
109 renderer_opengl/gl_compute_pipeline.cpp 113 renderer_opengl/gl_compute_pipeline.cpp
@@ -154,6 +158,7 @@ add_library(video_core STATIC
154 renderer_vulkan/renderer_vulkan.cpp 158 renderer_vulkan/renderer_vulkan.cpp
155 renderer_vulkan/vk_blit_screen.cpp 159 renderer_vulkan/vk_blit_screen.cpp
156 renderer_vulkan/vk_blit_screen.h 160 renderer_vulkan/vk_blit_screen.h
161 renderer_vulkan/vk_buffer_cache_base.cpp
157 renderer_vulkan/vk_buffer_cache.cpp 162 renderer_vulkan/vk_buffer_cache.cpp
158 renderer_vulkan/vk_buffer_cache.h 163 renderer_vulkan/vk_buffer_cache.h
159 renderer_vulkan/vk_command_pool.cpp 164 renderer_vulkan/vk_command_pool.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 1b4d63616..9cbd95c4b 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
@@ -11,9 +11,7 @@
11#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/common_funcs.h" 12#include "common/common_funcs.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/div_ceil.h" 14#include "video_core/buffer_cache/word_manager.h"
15#include "common/settings.h"
16#include "core/memory.h"
17 15
18namespace VideoCommon { 16namespace VideoCommon {
19 17
@@ -36,116 +34,12 @@ struct NullBufferParams {};
36 */ 34 */
37template <class RasterizerInterface> 35template <class RasterizerInterface>
38class BufferBase { 36class BufferBase {
39 static constexpr u64 PAGES_PER_WORD = 64;
40 static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
42
43 /// Vector tracking modified pages tightly packed with small vector optimization
44 union WordsArray {
45 /// Returns the pointer to the words state
46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
47 return is_short ? &stack : heap;
48 }
49
50 /// Returns the pointer to the words state
51 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
52 return is_short ? &stack : heap;
53 }
54
55 u64 stack = 0; ///< Small buffers storage
56 u64* heap; ///< Not-small buffers pointer to the storage
57 };
58
59 struct Words {
60 explicit Words() = default;
61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
62 if (IsShort()) {
63 cpu.stack = ~u64{0};
64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
67 } else {
68 // Share allocation between CPU and GPU pages and set their default values
69 const size_t num_words = NumWords();
70 u64* const alloc = new u64[num_words * 4];
71 cpu.heap = alloc;
72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
75 std::fill_n(cpu.heap, num_words, ~u64{0});
76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
79 }
80 // Clean up tailing bits
81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
84 const u64 last_word = (~u64{0} << shift) >> shift;
85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
87 }
88
89 ~Words() {
90 Release();
91 }
92
93 Words& operator=(Words&& rhs) noexcept {
94 Release();
95 size_bytes = rhs.size_bytes;
96 cpu = rhs.cpu;
97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
100 rhs.cpu.heap = nullptr;
101 return *this;
102 }
103
104 Words(Words&& rhs) noexcept
105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
107 rhs.cpu.heap = nullptr;
108 }
109
110 Words& operator=(const Words&) = delete;
111 Words(const Words&) = delete;
112
113 /// Returns true when the buffer fits in the small vector optimization
114 [[nodiscard]] bool IsShort() const noexcept {
115 return size_bytes <= BYTES_PER_WORD;
116 }
117
118 /// Returns the number of words of the buffer
119 [[nodiscard]] size_t NumWords() const noexcept {
120 return Common::DivCeil(size_bytes, BYTES_PER_WORD);
121 }
122
123 /// Release buffer resources
124 void Release() {
125 if (!IsShort()) {
126 // CPU written words is the base for the heap allocation
127 delete[] cpu.heap;
128 }
129 }
130
131 u64 size_bytes = 0;
132 WordsArray cpu;
133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
143 };
144
145public: 37public:
146 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) 38 static constexpr u64 BASE_PAGE_BITS = 16;
147 : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
148 words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} 40
41 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_)
42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
149 43
150 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
151 45
@@ -155,100 +49,6 @@ public:
155 BufferBase& operator=(BufferBase&&) = default; 49 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default; 50 BufferBase(BufferBase&&) = default;
157 51
158 /// Returns the inclusive CPU modified range in a begin end pair
159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
160 u64 query_size) const noexcept {
161 const u64 offset = query_cpu_addr - cpu_addr;
162 return ModifiedRegion<Type::CPU>(offset, query_size);
163 }
164
165 /// Returns the inclusive GPU modified range in a begin end pair
166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
167 u64 query_size) const noexcept {
168 const u64 offset = query_cpu_addr - cpu_addr;
169 return ModifiedRegion<Type::GPU>(offset, query_size);
170 }
171
172 /// Returns true if a region has been modified from the CPU
173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
174 const u64 offset = query_cpu_addr - cpu_addr;
175 return IsRegionModified<Type::CPU>(offset, query_size);
176 }
177
178 /// Returns true if a region has been modified from the GPU
179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
180 const u64 offset = query_cpu_addr - cpu_addr;
181 return IsRegionModified<Type::GPU>(offset, query_size);
182 }
183
184 /// Mark region as CPU modified, notifying the rasterizer about this change
185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
187 }
188
189 /// Unmark region as CPU modified, notifying the rasterizer about this change
190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
192 }
193
194 /// Mark region as modified from the host GPU
195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
197 }
198
199 /// Unmark region as modified from the host GPU
200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 if (!Settings::values.use_pessimistic_flushes) {
224 cached_words[word_index] = 0;
225 }
226 }
227 }
228
229 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
230 template <typename Func>
231 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
232 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
233 }
234
235 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
236 template <typename Func>
237 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
238 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
239 }
240
241 template <typename Func>
242 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) {
243 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func);
244 }
245
246 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
247 template <typename Func>
248 void ForEachDownloadRange(Func&& func) {
249 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
250 }
251
252 /// Mark buffer as picked 52 /// Mark buffer as picked
253 void Pick() noexcept { 53 void Pick() noexcept {
254 flags |= BufferFlagBits::Picked; 54 flags |= BufferFlagBits::Picked;
@@ -295,11 +95,6 @@ public:
295 return static_cast<u32>(other_cpu_addr - cpu_addr); 95 return static_cast<u32>(other_cpu_addr - cpu_addr);
296 } 96 }
297 97
298 /// Returns the size in bytes of the buffer
299 [[nodiscard]] u64 SizeBytes() const noexcept {
300 return words.size_bytes;
301 }
302
303 size_t getLRUID() const noexcept { 98 size_t getLRUID() const noexcept {
304 return lru_id; 99 return lru_id;
305 } 100 }
@@ -308,305 +103,16 @@ public:
308 lru_id = lru_id_; 103 lru_id = lru_id_;
309 } 104 }
310 105
311private: 106 size_t SizeBytes() const {
312 template <Type type> 107 return size_bytes;
313 u64* Array() noexcept {
314 if constexpr (type == Type::CPU) {
315 return words.cpu.Pointer(IsShort());
316 } else if constexpr (type == Type::GPU) {
317 return words.gpu.Pointer(IsShort());
318 } else if constexpr (type == Type::CachedCPU) {
319 return words.cached_cpu.Pointer(IsShort());
320 } else if constexpr (type == Type::Untracked) {
321 return words.untracked.Pointer(IsShort());
322 }
323 }
324
325 template <Type type>
326 const u64* Array() const noexcept {
327 if constexpr (type == Type::CPU) {
328 return words.cpu.Pointer(IsShort());
329 } else if constexpr (type == Type::GPU) {
330 return words.gpu.Pointer(IsShort());
331 } else if constexpr (type == Type::CachedCPU) {
332 return words.cached_cpu.Pointer(IsShort());
333 } else if constexpr (type == Type::Untracked) {
334 return words.untracked.Pointer(IsShort());
335 }
336 }
337
338 /**
339 * Change the state of a range of pages
340 *
341 * @param dirty_addr Base address to mark or unmark as modified
342 * @param size Size in bytes to mark or unmark as modified
343 */
344 template <Type type, bool enable>
345 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
346 const s64 difference = dirty_addr - cpu_addr;
347 const u64 offset = std::max<s64>(difference, 0);
348 size += std::min<s64>(difference, 0);
349 if (offset >= SizeBytes() || size < 0) {
350 return;
351 }
352 u64* const untracked_words = Array<Type::Untracked>();
353 u64* const state_words = Array<type>();
354 const u64 offset_end = std::min(offset + size, SizeBytes());
355 const u64 begin_page_index = offset / BYTES_PER_PAGE;
356 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
357 const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE);
358 const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD);
359 u64 page_index = begin_page_index % PAGES_PER_WORD;
360 u64 word_index = begin_word_index;
361 while (word_index < end_word_index) {
362 const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD;
363 const u64 left_offset =
364 std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD;
365 const u64 right_offset = page_index;
366 u64 bits = ~u64{0};
367 bits = (bits >> right_offset) << right_offset;
368 bits = (bits << left_offset) >> left_offset;
369 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
370 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
371 }
372 if constexpr (enable) {
373 state_words[word_index] |= bits;
374 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
375 untracked_words[word_index] |= bits;
376 }
377 } else {
378 state_words[word_index] &= ~bits;
379 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
380 untracked_words[word_index] &= ~bits;
381 }
382 }
383 page_index = 0;
384 ++word_index;
385 }
386 }
387
388 /**
389 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
390 *
391 * @param word_index Index to the word to notify to the rasterizer
392 * @param current_bits Current state of the word
393 * @param new_bits New state of the word
394 *
395 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
396 */
397 template <bool add_to_rasterizer>
398 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
399 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
400 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
401 while (changed_bits != 0) {
402 const int empty_bits = std::countr_zero(changed_bits);
403 addr += empty_bits * BYTES_PER_PAGE;
404 changed_bits >>= empty_bits;
405
406 const u32 continuous_bits = std::countr_one(changed_bits);
407 const u64 size = continuous_bits * BYTES_PER_PAGE;
408 const VAddr begin_addr = addr;
409 addr += size;
410 changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0;
411 rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1);
412 }
413 }
414
415 /**
416 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
417 * needed. Call the given function on each turned off range.
418 *
419 * @param query_cpu_range Base CPU address to loop over
420 * @param size Size in bytes of the CPU range to loop over
421 * @param func Function to call for each turned off region
422 */
423 template <Type type, typename Func>
424 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
425 static_assert(type != Type::Untracked);
426
427 const s64 difference = query_cpu_range - cpu_addr;
428 const u64 query_begin = std::max<s64>(difference, 0);
429 size += std::min<s64>(difference, 0);
430 if (query_begin >= SizeBytes() || size < 0) {
431 return;
432 }
433 u64* const untracked_words = Array<Type::Untracked>();
434 u64* const state_words = Array<type>();
435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
437 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
438
439 const auto modified = [](u64 word) { return word != 0; };
440 const auto first_modified_word = std::find_if(words_begin, words_end, modified);
441 if (first_modified_word == words_end) {
442 // Exit early when the buffer is not modified
443 return;
444 }
445 const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified);
446
447 const u64 word_index_begin = std::distance(state_words, first_modified_word);
448 const u64 word_index_end = std::distance(state_words, last_modified_word);
449
450 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
451 const unsigned local_page_end =
452 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
453 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
454 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
455 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
456 const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE);
457 const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin);
458 const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end);
459 const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD;
460 const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1;
461
462 u64 page_begin = first_word_page_begin;
463 u64 current_base = 0;
464 u64 current_size = 0;
465 bool on_going = false;
466 for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) {
467 const bool is_last_word = word_index + 1 == word_index_end;
468 const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD;
469 const u64 right_offset = page_begin;
470 const u64 left_offset = PAGES_PER_WORD - page_end;
471 u64 bits = ~u64{0};
472 bits = (bits >> right_offset) << right_offset;
473 bits = (bits << left_offset) >> left_offset;
474
475 const u64 current_word = state_words[word_index] & bits;
476 if (clear) {
477 state_words[word_index] &= ~bits;
478 }
479
480 if constexpr (type == Type::CPU) {
481 const u64 current_bits = untracked_words[word_index] & bits;
482 untracked_words[word_index] &= ~bits;
483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
484 }
485 // Exclude CPU modified pages when visiting GPU pages
486 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
487 u64 page = page_begin;
488 page_begin = 0;
489
490 while (page < page_end) {
491 const int empty_bits = std::countr_zero(word >> page);
492 if (on_going && empty_bits != 0) {
493 InvokeModifiedRange(func, current_size, current_base);
494 current_size = 0;
495 on_going = false;
496 }
497 if (empty_bits == PAGES_PER_WORD) {
498 break;
499 }
500 page += empty_bits;
501
502 const int continuous_bits = std::countr_one(word >> page);
503 if (!on_going && continuous_bits != 0) {
504 current_base = word_index * PAGES_PER_WORD + page;
505 on_going = true;
506 }
507 current_size += continuous_bits;
508 page += continuous_bits;
509 }
510 }
511 if (on_going && current_size > 0) {
512 InvokeModifiedRange(func, current_size, current_base);
513 }
514 }
515
516 template <typename Func>
517 void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) {
518 const u64 current_size_bytes = current_size * BYTES_PER_PAGE;
519 const u64 offset_begin = current_base * BYTES_PER_PAGE;
520 const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes());
521 func(offset_begin, offset_end - offset_begin);
522 } 108 }
523 109
524 /** 110private:
525 * Returns true when a region has been modified
526 *
527 * @param offset Offset in bytes from the start of the buffer
528 * @param size Size in bytes of the region to query for modifications
529 */
530 template <Type type>
531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
532 static_assert(type != Type::Untracked);
533
534 const u64* const untracked_words = Array<Type::Untracked>();
535 const u64* const state_words = Array<type>();
536 const u64 num_query_words = size / BYTES_PER_WORD + 1;
537 const u64 word_begin = offset / BYTES_PER_WORD;
538 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
542 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
543 const u64 word = state_words[word_index] & ~off_word;
544 if (word == 0) {
545 continue;
546 }
547 const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit);
548 const u64 local_page_end = page_end % PAGES_PER_WORD;
549 const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD;
550 if (((word >> page_index) << page_index) << page_end_shift != 0) {
551 return true;
552 }
553 }
554 return false;
555 }
556
557 /**
558 * Returns a begin end pair with the inclusive modified region
559 *
560 * @param offset Offset in bytes from the start of the buffer
561 * @param size Size in bytes of the region to query for modifications
562 */
563 template <Type type>
564 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
565 static_assert(type != Type::Untracked);
566
567 const u64* const untracked_words = Array<Type::Untracked>();
568 const u64* const state_words = Array<type>();
569 const u64 num_query_words = size / BYTES_PER_WORD + 1;
570 const u64 word_begin = offset / BYTES_PER_WORD;
571 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
572 const u64 page_base = offset / BYTES_PER_PAGE;
573 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
574 u64 begin = std::numeric_limits<u64>::max();
575 u64 end = 0;
576 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
577 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
578 const u64 word = state_words[word_index] & ~off_word;
579 if (word == 0) {
580 continue;
581 }
582 const u64 local_page_begin = std::countr_zero(word);
583 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
584 const u64 page_index = word_index * PAGES_PER_WORD;
585 const u64 page_begin = std::max(page_index + local_page_begin, page_base);
586 const u64 page_end = std::min(page_index + local_page_end, page_limit);
587 begin = std::min(begin, page_begin);
588 end = std::max(end, page_end);
589 }
590 static constexpr std::pair<u64, u64> EMPTY{0, 0};
591 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
592 }
593
594 /// Returns the number of words of the buffer
595 [[nodiscard]] size_t NumWords() const noexcept {
596 return words.NumWords();
597 }
598
599 /// Returns true when the buffer fits in the small vector optimization
600 [[nodiscard]] bool IsShort() const noexcept {
601 return words.IsShort();
602 }
603
604 RasterizerInterface* rasterizer = nullptr;
605 VAddr cpu_addr = 0; 111 VAddr cpu_addr = 0;
606 Words words;
607 BufferFlagBits flags{}; 112 BufferFlagBits flags{};
608 int stream_score = 0; 113 int stream_score = 0;
609 size_t lru_id = SIZE_MAX; 114 size_t lru_id = SIZE_MAX;
115 size_t size_bytes = 0;
610}; 116};
611 117
612} // namespace VideoCommon 118} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index a16308b60..40db243d2 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include "common/microprofile.h" 4#include "common/microprofile.h"
5 5
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index abdc593df..7975564b5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1,485 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <array>
8#include <memory> 7#include <memory>
9#include <mutex>
10#include <numeric> 8#include <numeric>
11#include <span>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/icl/interval_set.hpp>
16
17#include "common/common_types.h"
18#include "common/div_ceil.h"
19#include "common/literals.h"
20#include "common/lru_cache.h"
21#include "common/microprofile.h"
22#include "common/polyfill_ranges.h"
23#include "common/scratch_buffer.h"
24#include "common/settings.h"
25#include "core/memory.h"
26#include "video_core/buffer_cache/buffer_base.h"
27#include "video_core/control/channel_state_cache.h"
28#include "video_core/delayed_destruction_ring.h"
29#include "video_core/dirty_flags.h"
30#include "video_core/engines/draw_manager.h"
31#include "video_core/engines/kepler_compute.h"
32#include "video_core/engines/maxwell_3d.h"
33#include "video_core/memory_manager.h"
34#include "video_core/rasterizer_interface.h"
35#include "video_core/surface.h"
36#include "video_core/texture_cache/slot_vector.h"
37#include "video_core/texture_cache/types.h"
38 9
39namespace VideoCommon { 10#include "video_core/buffer_cache/buffer_cache_base.h"
40
41MICROPROFILE_DECLARE(GPU_PrepareBuffers);
42MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
43MICROPROFILE_DECLARE(GPU_DownloadMemory);
44
45using BufferId = SlotId;
46
47using VideoCore::Surface::PixelFormat;
48using namespace Common::Literals;
49
50constexpr u32 NUM_VERTEX_BUFFERS = 32;
51constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
52constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
53constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
54constexpr u32 NUM_STORAGE_BUFFERS = 16;
55constexpr u32 NUM_TEXTURE_BUFFERS = 16;
56constexpr u32 NUM_STAGES = 5;
57
58enum class ObtainBufferSynchronize : u32 {
59 NoSynchronize = 0,
60 FullSynchronize = 1,
61 SynchronizeNoDirty = 2,
62};
63
64enum class ObtainBufferOperation : u32 {
65 DoNothing = 0,
66 MarkAsWritten = 1,
67 DiscardWrite = 2,
68 MarkQuery = 3,
69};
70
71using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
72using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
73
74template <typename P>
75class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
76
77 // Page size for caching purposes.
78 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
79 static constexpr u32 YUZU_PAGEBITS = 16;
80 static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS;
81
82 static constexpr bool IS_OPENGL = P::IS_OPENGL;
83 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
84 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
85 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
86 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
87 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
88 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
89 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
90 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
91
92 static constexpr BufferId NULL_BUFFER_ID{0};
93
94 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
95 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
96 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
97
98 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
99
100 using Runtime = typename P::Runtime;
101 using Buffer = typename P::Buffer;
102
103 using IntervalSet = boost::icl::interval_set<VAddr>;
104 using IntervalType = typename IntervalSet::interval_type;
105
106 struct Empty {};
107
108 struct OverlapResult {
109 std::vector<BufferId> ids;
110 VAddr begin;
111 VAddr end;
112 bool has_stream_leap = false;
113 };
114
115 struct Binding {
116 VAddr cpu_addr{};
117 u32 size{};
118 BufferId buffer_id;
119 };
120
121 struct TextureBufferBinding : Binding {
122 PixelFormat format;
123 };
124
125 static constexpr Binding NULL_BINDING{
126 .cpu_addr = 0,
127 .size = 0,
128 .buffer_id = NULL_BUFFER_ID,
129 };
130
131public:
132 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
133
134 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
135 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
136
137 void TickFrame();
138
139 void WriteMemory(VAddr cpu_addr, u64 size);
140
141 void CachedWriteMemory(VAddr cpu_addr, u64 size);
142
143 void DownloadMemory(VAddr cpu_addr, u64 size);
144
145 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
146
147 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
148
149 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
150
151 void UpdateGraphicsBuffers(bool is_indexed);
152
153 void UpdateComputeBuffers();
154
155 void BindHostGeometryBuffers(bool is_indexed);
156
157 void BindHostStageBuffers(size_t stage);
158
159 void BindHostComputeBuffers();
160
161 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
162 const UniformBufferSizes* sizes);
163
164 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
165
166 void UnbindGraphicsStorageBuffers(size_t stage);
167
168 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
169 bool is_written);
170
171 void UnbindGraphicsTextureBuffers(size_t stage);
172
173 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
174 PixelFormat format, bool is_written, bool is_image);
175
176 void UnbindComputeStorageBuffers();
177
178 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
179 bool is_written);
180
181 void UnbindComputeTextureBuffers();
182
183 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
184 bool is_written, bool is_image);
185
186 void FlushCachedWrites();
187
188 /// Return true when there are uncommitted buffers to be downloaded
189 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
190
191 void AccumulateFlushes();
192
193 /// Return true when the caller should wait for async downloads
194 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
195
196 /// Commit asynchronous downloads
197 void CommitAsyncFlushes();
198 void CommitAsyncFlushesHigh();
199
200 /// Pop asynchronous downloads
201 void PopAsyncFlushes();
202
203 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
204
205 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
206
207 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
208 ObtainBufferSynchronize sync_info,
209 ObtainBufferOperation post_op);
210
211 /// Return true when a CPU region is modified from the GPU
212 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
213
214 /// Return true when a region is registered on the cache
215 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
216
217 /// Return true when a CPU region is modified from the CPU
218 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
219
220 void SetDrawIndirect(
221 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
222 current_draw_indirect = current_draw_indirect_;
223 }
224
225 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
226
227 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
228
229 std::recursive_mutex mutex;
230 Runtime& runtime;
231
232private:
233 template <typename Func>
234 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
235 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
236 const int disabled_bits = std::countr_zero(enabled_mask);
237 index += disabled_bits;
238 enabled_mask >>= disabled_bits;
239 func(index);
240 }
241 }
242
243 template <typename Func>
244 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
245 const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE);
246 for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) {
247 const BufferId buffer_id = page_table[page];
248 if (!buffer_id) {
249 ++page;
250 continue;
251 }
252 Buffer& buffer = slot_buffers[buffer_id];
253 func(buffer_id, buffer);
254
255 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
256 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
257 }
258 }
259
260 template <typename Func>
261 void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
262 const VAddr start_address = cpu_addr;
263 const VAddr end_address = start_address + size;
264 const VAddr search_base =
265 static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
266 const IntervalType search_interval{search_base, search_base + 1};
267 auto it = common_ranges.lower_bound(search_interval);
268 if (it == common_ranges.end()) {
269 it = common_ranges.begin();
270 }
271 for (; it != common_ranges.end(); it++) {
272 VAddr inter_addr_end = it->upper();
273 VAddr inter_addr = it->lower();
274 if (inter_addr >= end_address) {
275 break;
276 }
277 if (inter_addr_end <= start_address) {
278 continue;
279 }
280 if (inter_addr_end > end_address) {
281 inter_addr_end = end_address;
282 }
283 if (inter_addr < start_address) {
284 inter_addr = start_address;
285 }
286 func(inter_addr, inter_addr_end);
287 }
288 }
289
290 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
291 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
292 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
293 }
294
295 void RunGarbageCollector();
296
297 void BindHostIndexBuffer();
298
299 void BindHostVertexBuffers();
300
301 void BindHostDrawIndirectBuffers();
302
303 void BindHostGraphicsUniformBuffers(size_t stage);
304
305 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
306
307 void BindHostGraphicsStorageBuffers(size_t stage);
308
309 void BindHostGraphicsTextureBuffers(size_t stage);
310
311 void BindHostTransformFeedbackBuffers();
312
313 void BindHostComputeUniformBuffers();
314
315 void BindHostComputeStorageBuffers();
316
317 void BindHostComputeTextureBuffers();
318
319 void DoUpdateGraphicsBuffers(bool is_indexed);
320
321 void DoUpdateComputeBuffers();
322
323 void UpdateIndexBuffer();
324
325 void UpdateVertexBuffers();
326
327 void UpdateVertexBuffer(u32 index);
328
329 void UpdateDrawIndirect();
330
331 void UpdateUniformBuffers(size_t stage);
332
333 void UpdateStorageBuffers(size_t stage);
334
335 void UpdateTextureBuffers(size_t stage);
336
337 void UpdateTransformFeedbackBuffers();
338
339 void UpdateTransformFeedbackBuffer(u32 index);
340
341 void UpdateComputeUniformBuffers();
342
343 void UpdateComputeStorageBuffers();
344
345 void UpdateComputeTextureBuffers();
346
347 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
348
349 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
350
351 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
352
353 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
354
355 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
356
357 void Register(BufferId buffer_id);
358
359 void Unregister(BufferId buffer_id);
360
361 template <bool insert>
362 void ChangeRegister(BufferId buffer_id);
363
364 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
365
366 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
367
368 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
369
370 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
371 std::span<BufferCopy> copies);
372
373 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
374 std::span<const BufferCopy> copies);
375
376 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
377
378 void DownloadBufferMemory(Buffer& buffer_id);
379
380 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
381
382 void DeleteBuffer(BufferId buffer_id);
383
384 void NotifyBufferDeletion();
385
386 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
387 bool is_written = false) const;
388
389 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
390 PixelFormat format);
391
392 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
393
394 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
395
396 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
397
398 void ClearDownload(IntervalType subtract_interval);
399
400 VideoCore::RasterizerInterface& rasterizer;
401 Core::Memory::Memory& cpu_memory;
402
403 SlotVector<Buffer> slot_buffers;
404 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
405
406 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
407
408 u32 last_index_count = 0;
409
410 Binding index_buffer;
411 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
412 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
413 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
414 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
415 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
416 Binding count_buffer_binding;
417 Binding indirect_buffer_binding;
418
419 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
420 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
421 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
422
423 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
424 u32 enabled_compute_uniform_buffer_mask = 0;
425
426 const UniformBufferSizes* uniform_buffer_sizes{};
427 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
428
429 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
430 std::array<u32, NUM_STAGES> written_storage_buffers{};
431 u32 enabled_compute_storage_buffers = 0;
432 u32 written_compute_storage_buffers = 0;
433
434 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
435 std::array<u32, NUM_STAGES> written_texture_buffers{};
436 std::array<u32, NUM_STAGES> image_texture_buffers{};
437 u32 enabled_compute_texture_buffers = 0;
438 u32 written_compute_texture_buffers = 0;
439 u32 image_compute_texture_buffers = 0;
440
441 std::array<u32, 16> uniform_cache_hits{};
442 std::array<u32, 16> uniform_cache_shots{};
443
444 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
445
446 bool has_deleted_buffers = false;
447 11
448 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 12namespace VideoCommon {
449 dirty_uniform_buffers{};
450 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
451 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
452 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
453 uniform_buffer_binding_sizes{};
454
455 std::vector<BufferId> cached_write_buffer_ids;
456
457 IntervalSet uncommitted_ranges;
458 IntervalSet common_ranges;
459 std::deque<IntervalSet> committed_ranges;
460
461 Common::ScratchBuffer<u8> immediate_buffer_alloc;
462
463 struct LRUItemParams {
464 using ObjectType = BufferId;
465 using TickType = u64;
466 };
467 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
468 u64 frame_tick = 0;
469 u64 total_used_memory = 0;
470 u64 minimum_memory = 0;
471 u64 critical_memory = 0;
472 13
473 std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; 14using Core::Memory::YUZU_PAGESIZE;
474};
475 15
476template <class P> 16template <class P>
477BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
478 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
479 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { 19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
480 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
481 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
482 common_ranges.clear(); 23 common_ranges.clear();
24 inline_buffer_id = NULL_BUFFER_ID;
25
26 active_async_buffers = !Settings::IsGPULevelHigh();
483 27
484 if (!runtime.CanReportMemoryUsage()) { 28 if (!runtime.CanReportMemoryUsage()) {
485 minimum_memory = DEFAULT_EXPECTED_MEMORY; 29 minimum_memory = DEFAULT_EXPECTED_MEMORY;
@@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() {
531 uniform_cache_hits[0] = 0; 75 uniform_cache_hits[0] = 0;
532 uniform_cache_shots[0] = 0; 76 uniform_cache_shots[0] = 0;
533 77
78 active_async_buffers = !Settings::IsGPULevelHigh();
79
534 const bool skip_preferred = hits * 256 < shots * 251; 80 const bool skip_preferred = hits * 256 < shots * 251;
535 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 81 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
536 82
@@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() {
543 } 89 }
544 ++frame_tick; 90 ++frame_tick;
545 delayed_destruction_ring.Tick(); 91 delayed_destruction_ring.Tick();
92
93 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
94 for (auto& buffer : async_buffers_death_ring) {
95 runtime.FreeDeferredStagingBuffer(buffer);
96 }
97 async_buffers_death_ring.clear();
98 }
546} 99}
547 100
548template <class P> 101template <class P>
549void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 102void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
550 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 103 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size);
551 buffer.MarkRegionAsCpuModified(cpu_addr, size); 104 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
552 }); 105 const IntervalType subtract_interval{cpu_addr, cpu_addr + size};
106 ClearDownload(subtract_interval);
107 common_ranges.subtract(subtract_interval);
108 }
553} 109}
554 110
555template <class P> 111template <class P>
556void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 112void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
557 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 113 memory_tracker.CachedCpuWrite(cpu_addr, size);
558 if (!buffer.HasCachedWrites()) { 114 const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE),
559 cached_write_buffer_ids.push_back(buffer_id); 115 Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)};
560 } 116 cached_ranges.add(add_interval);
561 buffer.CachedCpuWrite(cpu_addr, size);
562 });
563} 117}
564 118
565template <class P> 119template <class P>
566void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 120void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
121 WaitOnAsyncFlushes(cpu_addr, size);
567 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 122 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
568 DownloadBufferMemory(buffer, cpu_addr, size); 123 DownloadBufferMemory(buffer, cpu_addr, size);
569 }); 124 });
570} 125}
571 126
572template <class P> 127template <class P>
128void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
129 bool must_wait = false;
130 ForEachInOverlapCounter(async_downloads, cpu_addr, size,
131 [&](VAddr, VAddr, int) { must_wait = true; });
132 bool must_release = false;
133 ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; });
134 if (must_release) {
135 std::function<void()> tmp([]() {});
136 rasterizer.SignalFence(std::move(tmp));
137 }
138 if (must_wait || must_release) {
139 rasterizer.ReleaseFences();
140 }
141}
142
143template <class P>
573void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 144void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
145 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
574 uncommitted_ranges.subtract(subtract_interval); 146 uncommitted_ranges.subtract(subtract_interval);
147 pending_ranges.subtract(subtract_interval);
575 for (auto& interval_set : committed_ranges) { 148 for (auto& interval_set : committed_ranges) {
576 interval_set.subtract(subtract_interval); 149 interval_set.subtract(subtract_interval);
577 } 150 }
@@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
591 } 164 }
592 165
593 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 166 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
167 WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount));
594 ClearDownload(subtract_interval); 168 ClearDownload(subtract_interval);
595 169
596 BufferId buffer_a; 170 BufferId buffer_a;
@@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
616 const VAddr diff = base_address - *cpu_src_address; 190 const VAddr diff = base_address - *cpu_src_address;
617 const VAddr new_base_address = *cpu_dest_address + diff; 191 const VAddr new_base_address = *cpu_dest_address + diff;
618 const IntervalType add_interval{new_base_address, new_base_address + size}; 192 const IntervalType add_interval{new_base_address, new_base_address + size};
619 uncommitted_ranges.add(add_interval);
620 tmp_intervals.push_back(add_interval); 193 tmp_intervals.push_back(add_interval);
194 uncommitted_ranges.add(add_interval);
195 pending_ranges.add(add_interval);
621 }; 196 };
622 ForEachWrittenRange(*cpu_src_address, amount, mirror); 197 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
623 // This subtraction in this order is important for overlapping copies. 198 // This subtraction in this order is important for overlapping copies.
624 common_ranges.subtract(subtract_interval); 199 common_ranges.subtract(subtract_interval);
625 const bool has_new_downloads = tmp_intervals.size() != 0; 200 const bool has_new_downloads = tmp_intervals.size() != 0;
@@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
628 } 203 }
629 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 204 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
630 if (has_new_downloads) { 205 if (has_new_downloads) {
631 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); 206 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
632 } 207 }
633 std::vector<u8> tmp_buffer(amount); 208 std::vector<u8> tmp_buffer(amount);
634 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 209 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
@@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
866 441
867template <class P> 442template <class P>
868void BufferCache<P>::FlushCachedWrites() { 443void BufferCache<P>::FlushCachedWrites() {
869 for (const BufferId buffer_id : cached_write_buffer_ids) {
870 slot_buffers[buffer_id].FlushCachedWrites();
871 }
872 cached_write_buffer_ids.clear(); 444 cached_write_buffer_ids.clear();
445 memory_tracker.FlushCachedWrites();
446 cached_ranges.clear();
873} 447}
874 448
875template <class P> 449template <class P>
@@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
879 453
880template <class P> 454template <class P>
881void BufferCache<P>::AccumulateFlushes() { 455void BufferCache<P>::AccumulateFlushes() {
882 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
883 uncommitted_ranges.clear();
884 return;
885 }
886 if (uncommitted_ranges.empty()) { 456 if (uncommitted_ranges.empty()) {
887 return; 457 return;
888 } 458 }
@@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() {
891 461
892template <class P> 462template <class P>
893bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 463bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
894 return false; 464 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
465 return (!async_buffers.empty() && async_buffers.front().has_value());
466 } else {
467 return false;
468 }
895} 469}
896 470
897template <class P> 471template <class P>
@@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
899 AccumulateFlushes(); 473 AccumulateFlushes();
900 474
901 if (committed_ranges.empty()) { 475 if (committed_ranges.empty()) {
476 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
477 if (active_async_buffers) {
478 async_buffers.emplace_back(std::optional<Async_Buffer>{});
479 }
480 }
902 return; 481 return;
903 } 482 }
904 MICROPROFILE_SCOPE(GPU_DownloadMemory); 483 MICROPROFILE_SCOPE(GPU_DownloadMemory);
905 const bool is_accuracy_normal =
906 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
907 484
485 pending_ranges.clear();
908 auto it = committed_ranges.begin(); 486 auto it = committed_ranges.begin();
909 while (it != committed_ranges.end()) { 487 while (it != committed_ranges.end()) {
910 auto& current_intervals = *it; 488 auto& current_intervals = *it;
@@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
926 const std::size_t size = interval.upper() - interval.lower(); 504 const std::size_t size = interval.upper() - interval.lower();
927 const VAddr cpu_addr = interval.lower(); 505 const VAddr cpu_addr = interval.lower();
928 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 506 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
929 buffer.ForEachDownloadRangeAndClear( 507 const VAddr buffer_start = buffer.CpuAddr();
930 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 508 const VAddr buffer_end = buffer_start + buffer.SizeBytes();
931 if (is_accuracy_normal) { 509 const VAddr new_start = std::max(buffer_start, cpu_addr);
932 return; 510 const VAddr new_end = std::min(buffer_end, cpu_addr + size);
933 } 511 memory_tracker.ForEachDownloadRange(
512 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) {
934 const VAddr buffer_addr = buffer.CpuAddr(); 513 const VAddr buffer_addr = buffer.CpuAddr();
935 const auto add_download = [&](VAddr start, VAddr end) { 514 const auto add_download = [&](VAddr start, VAddr end) {
936 const u64 new_offset = start - buffer_addr; 515 const u64 new_offset = start - buffer_addr;
@@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
944 buffer_id, 523 buffer_id,
945 }); 524 });
946 // Align up to avoid cache conflicts 525 // Align up to avoid cache conflicts
947 constexpr u64 align = 8ULL; 526 constexpr u64 align = 64ULL;
948 constexpr u64 mask = ~(align - 1ULL); 527 constexpr u64 mask = ~(align - 1ULL);
949 total_size_bytes += (new_size + align - 1) & mask; 528 total_size_bytes += (new_size + align - 1) & mask;
950 largest_copy = std::max(largest_copy, new_size); 529 largest_copy = std::max(largest_copy, new_size);
951 }; 530 };
952 531
953 const VAddr start_address = buffer_addr + range_offset; 532 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download);
954 const VAddr end_address = start_address + range_size;
955 ForEachWrittenRange(start_address, range_size, add_download);
956 const IntervalType subtract_interval{start_address, end_address};
957 common_ranges.subtract(subtract_interval);
958 }); 533 });
959 }); 534 });
960 } 535 }
961 } 536 }
962 committed_ranges.clear(); 537 committed_ranges.clear();
963 if (downloads.empty()) { 538 if (downloads.empty()) {
539 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
540 if (active_async_buffers) {
541 async_buffers.emplace_back(std::optional<Async_Buffer>{});
542 }
543 }
964 return; 544 return;
965 } 545 }
966 if constexpr (USE_MEMORY_MAPS) { 546 if (active_async_buffers) {
967 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 547 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
968 runtime.PreCopyBarrier(); 548 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
969 for (auto& [copy, buffer_id] : downloads) { 549 boost::container::small_vector<BufferCopy, 4> normalized_copies;
970 // Have in mind the staging buffer offset for the copy 550 IntervalSet new_async_range{};
971 copy.dst_offset += download_staging.offset; 551 runtime.PreCopyBarrier();
972 const std::array copies{copy}; 552 for (auto& [copy, buffer_id] : downloads) {
973 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); 553 copy.dst_offset += download_staging.offset;
974 } 554 const std::array copies{copy};
975 runtime.PostCopyBarrier(); 555 BufferCopy second_copy{copy};
976 runtime.Finish(); 556 Buffer& buffer = slot_buffers[buffer_id];
977 for (const auto& [copy, buffer_id] : downloads) { 557 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
978 const Buffer& buffer = slot_buffers[buffer_id]; 558 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
979 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 559 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
980 // Undo the modified offset 560 async_downloads += std::make_pair(base_interval, 1);
981 const u64 dst_offset = copy.dst_offset - download_staging.offset; 561 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
982 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 562 normalized_copies.push_back(second_copy);
983 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 563 }
564 runtime.PostCopyBarrier();
565 pending_downloads.emplace_back(std::move(normalized_copies));
566 async_buffers.emplace_back(download_staging);
567 } else {
568 committed_ranges.clear();
569 uncommitted_ranges.clear();
984 } 570 }
985 } else { 571 } else {
986 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 572 if constexpr (USE_MEMORY_MAPS) {
987 for (const auto& [copy, buffer_id] : downloads) { 573 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
988 Buffer& buffer = slot_buffers[buffer_id]; 574 runtime.PreCopyBarrier();
989 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 575 for (auto& [copy, buffer_id] : downloads) {
990 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 576 // Have in mind the staging buffer offset for the copy
991 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 577 copy.dst_offset += download_staging.offset;
578 const std::array copies{copy};
579 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
580 }
581 runtime.PostCopyBarrier();
582 runtime.Finish();
583 for (const auto& [copy, buffer_id] : downloads) {
584 const Buffer& buffer = slot_buffers[buffer_id];
585 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
586 // Undo the modified offset
587 const u64 dst_offset = copy.dst_offset - download_staging.offset;
588 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
589 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
590 }
591 } else {
592 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
593 for (const auto& [copy, buffer_id] : downloads) {
594 Buffer& buffer = slot_buffers[buffer_id];
595 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
596 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
597 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
598 }
992 } 599 }
993 } 600 }
994} 601}
995 602
996template <class P> 603template <class P>
997void BufferCache<P>::CommitAsyncFlushes() { 604void BufferCache<P>::CommitAsyncFlushes() {
998 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 605 CommitAsyncFlushesHigh();
999 CommitAsyncFlushesHigh();
1000 } else {
1001 uncommitted_ranges.clear();
1002 committed_ranges.clear();
1003 }
1004} 606}
1005 607
1006template <class P> 608template <class P>
1007void BufferCache<P>::PopAsyncFlushes() {} 609void BufferCache<P>::PopAsyncFlushes() {
610 MICROPROFILE_SCOPE(GPU_DownloadMemory);
611 PopAsyncBuffers();
612}
1008 613
1009template <class P> 614template <class P>
1010bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 615void BufferCache<P>::PopAsyncBuffers() {
1011 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 616 if (async_buffers.empty()) {
1012 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 617 return;
1013 const BufferId image_id = page_table[page]; 618 }
1014 if (!image_id) { 619 if (!async_buffers.front().has_value()) {
1015 ++page; 620 async_buffers.pop_front();
1016 continue; 621 return;
1017 } 622 }
1018 Buffer& buffer = slot_buffers[image_id]; 623 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
1019 if (buffer.IsRegionGpuModified(addr, size)) { 624 auto& downloads = pending_downloads.front();
1020 return true; 625 auto& async_buffer = async_buffers.front();
626 u8* base = async_buffer->mapped_span.data();
627 const size_t base_offset = async_buffer->offset;
628 for (const auto& copy : downloads) {
629 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset);
630 const u64 dst_offset = copy.dst_offset - base_offset;
631 const u8* read_mapped_memory = base + dst_offset;
632 ForEachInOverlapCounter(
633 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) {
634 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr],
635 end - start);
636 if (count == 1) {
637 const IntervalType base_interval{start, end};
638 common_ranges.subtract(base_interval);
639 }
640 });
641 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size};
642 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
1021 } 643 }
1022 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 644 async_buffers_death_ring.emplace_back(*async_buffer);
1023 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 645 async_buffers.pop_front();
646 pending_downloads.pop_front();
1024 } 647 }
1025 return false; 648}
649
650template <class P>
651bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
652 bool is_dirty = false;
653 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; });
654 return is_dirty;
1026} 655}
1027 656
1028template <class P> 657template <class P>
1029bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 658bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1030 const VAddr end_addr = addr + size; 659 const VAddr end_addr = addr + size;
1031 const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); 660 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1032 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 661 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
1033 const BufferId buffer_id = page_table[page]; 662 const BufferId buffer_id = page_table[page];
1034 if (!buffer_id) { 663 if (!buffer_id) {
1035 ++page; 664 ++page;
@@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1041 if (buf_start_addr < end_addr && addr < buf_end_addr) { 670 if (buf_start_addr < end_addr && addr < buf_end_addr) {
1042 return true; 671 return true;
1043 } 672 }
1044 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 673 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1045 } 674 }
1046 return false; 675 return false;
1047} 676}
1048 677
1049template <class P> 678template <class P>
1050bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 679bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
1051 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 680 return memory_tracker.IsRegionCpuModified(addr, size);
1052 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
1053 const BufferId image_id = page_table[page];
1054 if (!image_id) {
1055 ++page;
1056 continue;
1057 }
1058 Buffer& buffer = slot_buffers[image_id];
1059 if (buffer.IsRegionCpuModified(addr, size)) {
1060 return true;
1061 }
1062 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
1063 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
1064 }
1065 return false;
1066} 681}
1067 682
1068template <class P> 683template <class P>
@@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
1072 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 687 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
1073 const u32 size = index_buffer.size; 688 const u32 size = index_buffer.size;
1074 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 689 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1075 if (!draw_state.inline_index_draw_indexes.empty()) { 690 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1076 if constexpr (USE_MEMORY_MAPS) { 691 if constexpr (USE_MEMORY_MAPS) {
1077 auto upload_staging = runtime.UploadStagingBuffer(size); 692 auto upload_staging = runtime.UploadStagingBuffer(size);
1078 std::array<BufferCopy, 1> copies{ 693 std::array<BufferCopy, 1> copies{
@@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
1155 TouchBuffer(buffer, binding.buffer_id); 770 TouchBuffer(buffer, binding.buffer_id);
1156 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 771 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
1157 size <= uniform_buffer_skip_cache_size && 772 size <= uniform_buffer_skip_cache_size &&
1158 !buffer.IsRegionGpuModified(cpu_addr, size); 773 !memory_tracker.IsRegionGpuModified(cpu_addr, size);
1159 if (use_fast_buffer) { 774 if (use_fast_buffer) {
1160 if constexpr (IS_OPENGL) { 775 if constexpr (IS_OPENGL) {
1161 if (runtime.HasFastBufferSubData()) { 776 if (runtime.HasFastBufferSubData()) {
@@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() {
1378 // We have to check for the dirty flags and index count 993 // We have to check for the dirty flags and index count
1379 // The index count is currently changed without updating the dirty flags 994 // The index count is currently changed without updating the dirty flags
1380 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 995 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1381 const auto& index_array = draw_state.index_buffer; 996 const auto& index_buffer_ref = draw_state.index_buffer;
1382 auto& flags = maxwell3d->dirty.flags; 997 auto& flags = maxwell3d->dirty.flags;
1383 if (!flags[Dirty::IndexBuffer]) { 998 if (!flags[Dirty::IndexBuffer]) {
1384 return; 999 return;
1385 } 1000 }
1386 flags[Dirty::IndexBuffer] = false; 1001 flags[Dirty::IndexBuffer] = false;
1387 last_index_count = index_array.count; 1002 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1388 if (!draw_state.inline_index_draw_indexes.empty()) {
1389 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); 1003 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size());
1004 u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE);
1005 if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] {
1006 inline_buffer_id = CreateBuffer(0, buffer_size);
1007 }
1008 if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
1009 slot_buffers.erase(inline_buffer_id);
1010 inline_buffer_id = CreateBuffer(0, buffer_size);
1011 }
1390 index_buffer = Binding{ 1012 index_buffer = Binding{
1391 .cpu_addr = 0, 1013 .cpu_addr = 0,
1392 .size = inline_index_size, 1014 .size = inline_index_size,
1393 .buffer_id = CreateBuffer(0, inline_index_size), 1015 .buffer_id = inline_buffer_id,
1394 }; 1016 };
1395 return; 1017 return;
1396 } 1018 }
1397 const GPUVAddr gpu_addr_begin = index_array.StartAddress(); 1019
1398 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1020 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1021 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1399 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1022 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1400 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1023 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1401 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); 1024 const u32 draw_size =
1025 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1402 const u32 size = std::min(address_size, draw_size); 1026 const u32 size = std::min(address_size, draw_size);
1403 if (size == 0 || !cpu_addr) { 1027 if (size == 0 || !cpu_addr) {
1404 index_buffer = NULL_BINDING; 1028 index_buffer = NULL_BINDING;
@@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1434 const GPUVAddr gpu_addr_begin = array.Address(); 1058 const GPUVAddr gpu_addr_begin = array.Address();
1435 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1059 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1436 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1060 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1437 u32 address_size = static_cast<u32>( 1061 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1438 std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); 1062 u32 size = address_size; // TODO: Analyze stride and number of vertices
1439 if (array.enable == 0 || address_size == 0 || !cpu_addr) { 1063 if (array.enable == 0 || size == 0 || !cpu_addr) {
1440 vertex_buffers[index] = NULL_BINDING; 1064 vertex_buffers[index] = NULL_BINDING;
1441 return; 1065 return;
1442 } 1066 }
1443 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1067 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1444 address_size = 1068 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1445 static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size));
1446 } 1069 }
1447 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1448 vertex_buffers[index] = Binding{ 1070 vertex_buffers[index] = Binding{
1449 .cpu_addr = *cpu_addr, 1071 .cpu_addr = *cpu_addr,
1450 .size = size, 1072 .size = size,
@@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1591 1213
1592template <class P> 1214template <class P>
1593void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1215void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
1594 Buffer& buffer = slot_buffers[buffer_id]; 1216 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
1595 buffer.MarkRegionAsGpuModified(cpu_addr, size); 1217
1218 if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
1219 SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
1220 }
1596 1221
1597 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1222 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1598 common_ranges.add(base_interval); 1223 common_ranges.add(base_interval);
1599
1600 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
1601 if (!is_async) {
1602 return;
1603 }
1604 uncommitted_ranges.add(base_interval); 1224 uncommitted_ranges.add(base_interval);
1225 pending_ranges.add(base_interval);
1605} 1226}
1606 1227
1607template <class P> 1228template <class P>
@@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
1609 if (cpu_addr == 0) { 1230 if (cpu_addr == 0) {
1610 return NULL_BUFFER_ID; 1231 return NULL_BUFFER_ID;
1611 } 1232 }
1612 const u64 page = cpu_addr >> YUZU_PAGEBITS; 1233 const u64 page = cpu_addr >> CACHING_PAGEBITS;
1613 const BufferId buffer_id = page_table[page]; 1234 const BufferId buffer_id = page_table[page];
1614 if (!buffer_id) { 1235 if (!buffer_id) {
1615 return CreateBuffer(cpu_addr, size); 1236 return CreateBuffer(cpu_addr, size);
@@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1638 .has_stream_leap = has_stream_leap, 1259 .has_stream_leap = has_stream_leap,
1639 }; 1260 };
1640 } 1261 }
1641 for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); 1262 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1642 cpu_addr += YUZU_PAGESIZE) { 1263 cpu_addr += CACHING_PAGESIZE) {
1643 const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; 1264 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS];
1644 if (!overlap_id) { 1265 if (!overlap_id) {
1645 continue; 1266 continue;
1646 } 1267 }
@@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1666 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1287 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1667 has_stream_leap = true; 1288 has_stream_leap = true;
1668 if (expands_right) { 1289 if (expands_right) {
1669 begin -= YUZU_PAGESIZE * 256; 1290 begin -= CACHING_PAGESIZE * 256;
1670 cpu_addr = begin; 1291 cpu_addr = begin;
1671 } 1292 }
1672 if (expands_left) { 1293 if (expands_left) {
1673 end += YUZU_PAGESIZE * 256; 1294 end += CACHING_PAGESIZE * 256;
1674 } 1295 }
1675 } 1296 }
1676 } 1297 }
@@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1690 if (accumulate_stream_score) { 1311 if (accumulate_stream_score) {
1691 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); 1312 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1692 } 1313 }
1693 std::vector<BufferCopy> copies; 1314 boost::container::small_vector<BufferCopy, 1> copies;
1694 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); 1315 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1695 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { 1316 copies.push_back(BufferCopy{
1696 copies.push_back(BufferCopy{ 1317 .src_offset = 0,
1697 .src_offset = begin, 1318 .dst_offset = dst_base_offset,
1698 .dst_offset = dst_base_offset + begin, 1319 .size = overlap.SizeBytes(),
1699 .size = range_size,
1700 });
1701 new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
1702 new_buffer.MarkRegionAsGpuModified(begin, range_size);
1703 }); 1320 });
1704 if (!copies.empty()) { 1321 runtime.CopyBuffer(new_buffer, overlap, copies);
1705 runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); 1322 DeleteBuffer(overlap_id, true);
1706 }
1707 DeleteBuffer(overlap_id);
1708} 1323}
1709 1324
1710template <class P> 1325template <class P>
1711BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1326BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1327 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE);
1328 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE);
1329 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr);
1712 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1330 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1713 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1331 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1714 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1332 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
@@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1718 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1336 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1719 } 1337 }
1720 Register(new_buffer_id); 1338 Register(new_buffer_id);
1721 TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); 1339 TouchBuffer(new_buffer, new_buffer_id);
1722 return new_buffer_id; 1340 return new_buffer_id;
1723} 1341}
1724 1342
@@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1746 } 1364 }
1747 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1365 const VAddr cpu_addr_begin = buffer.CpuAddr();
1748 const VAddr cpu_addr_end = cpu_addr_begin + size; 1366 const VAddr cpu_addr_end = cpu_addr_begin + size;
1749 const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; 1367 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE;
1750 const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); 1368 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE);
1751 for (u64 page = page_begin; page != page_end; ++page) { 1369 for (u64 page = page_begin; page != page_end; ++page) {
1752 if constexpr (insert) { 1370 if constexpr (insert) {
1753 page_table[page] = buffer_id; 1371 page_table[page] = buffer_id;
@@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1766 1384
1767template <class P> 1385template <class P>
1768bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1386bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1769 if (buffer.CpuAddr() == 0) {
1770 return true;
1771 }
1772 return SynchronizeBufferImpl(buffer, cpu_addr, size); 1387 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1773} 1388}
1774 1389
@@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1777 boost::container::small_vector<BufferCopy, 4> copies; 1392 boost::container::small_vector<BufferCopy, 4> copies;
1778 u64 total_size_bytes = 0; 1393 u64 total_size_bytes = 0;
1779 u64 largest_copy = 0; 1394 u64 largest_copy = 0;
1780 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1395 VAddr buffer_start = buffer.CpuAddr();
1396 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1781 copies.push_back(BufferCopy{ 1397 copies.push_back(BufferCopy{
1782 .src_offset = total_size_bytes, 1398 .src_offset = total_size_bytes,
1783 .dst_offset = range_offset, 1399 .dst_offset = cpu_addr_out - buffer_start,
1784 .size = range_size, 1400 .size = range_size,
1785 }); 1401 });
1786 total_size_bytes += range_size; 1402 total_size_bytes += range_size;
@@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1795} 1411}
1796 1412
1797template <class P> 1413template <class P>
1414bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1415 boost::container::small_vector<BufferCopy, 4> copies;
1416 u64 total_size_bytes = 0;
1417 u64 largest_copy = 0;
1418 IntervalSet found_sets{};
1419 auto make_copies = [&] {
1420 for (auto& interval : found_sets) {
1421 const std::size_t sub_size = interval.upper() - interval.lower();
1422 const VAddr cpu_addr_ = interval.lower();
1423 copies.push_back(BufferCopy{
1424 .src_offset = total_size_bytes,
1425 .dst_offset = cpu_addr_ - buffer.CpuAddr(),
1426 .size = sub_size,
1427 });
1428 total_size_bytes += sub_size;
1429 largest_copy = std::max(largest_copy, sub_size);
1430 }
1431 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1432 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1433 };
1434 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1435 const VAddr base_adr = cpu_addr_out;
1436 const VAddr end_adr = base_adr + range_size;
1437 const IntervalType add_interval{base_adr, end_adr};
1438 found_sets.add(add_interval);
1439 });
1440 if (found_sets.empty()) {
1441 return true;
1442 }
1443 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1444 auto it = common_ranges.lower_bound(search_interval);
1445 auto it_end = common_ranges.upper_bound(search_interval);
1446 if (it == common_ranges.end()) {
1447 make_copies();
1448 return false;
1449 }
1450 while (it != it_end) {
1451 found_sets.subtract(*it);
1452 it++;
1453 }
1454 make_copies();
1455 return false;
1456}
1457
1458template <class P>
1798void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1459void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1799 std::span<BufferCopy> copies) { 1460 std::span<BufferCopy> copies) {
1800 if constexpr (USE_MEMORY_MAPS) { 1461 if constexpr (USE_MEMORY_MAPS) {
@@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg
1805} 1466}
1806 1467
1807template <class P> 1468template <class P>
1808void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, 1469void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1809 std::span<const BufferCopy> copies) { 1470 [[maybe_unused]] u64 largest_copy,
1810 std::span<u8> immediate_buffer; 1471 [[maybe_unused]] std::span<const BufferCopy> copies) {
1811 for (const BufferCopy& copy : copies) { 1472 if constexpr (!USE_MEMORY_MAPS) {
1812 std::span<const u8> upload_span; 1473 std::span<u8> immediate_buffer;
1813 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1474 for (const BufferCopy& copy : copies) {
1814 if (IsRangeGranular(cpu_addr, copy.size)) { 1475 std::span<const u8> upload_span;
1815 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1476 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1816 } else { 1477 if (IsRangeGranular(cpu_addr, copy.size)) {
1817 if (immediate_buffer.empty()) { 1478 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
1818 immediate_buffer = ImmediateBuffer(largest_copy); 1479 } else {
1480 if (immediate_buffer.empty()) {
1481 immediate_buffer = ImmediateBuffer(largest_copy);
1482 }
1483 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
1484 upload_span = immediate_buffer.subspan(0, copy.size);
1819 } 1485 }
1820 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1486 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1821 upload_span = immediate_buffer.subspan(0, copy.size);
1822 } 1487 }
1823 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1824 } 1488 }
1825} 1489}
1826 1490
1827template <class P> 1491template <class P>
1828void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 1492void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1829 std::span<BufferCopy> copies) { 1493 [[maybe_unused]] u64 total_size_bytes,
1830 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); 1494 [[maybe_unused]] std::span<BufferCopy> copies) {
1831 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1495 if constexpr (USE_MEMORY_MAPS) {
1832 for (BufferCopy& copy : copies) { 1496 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1833 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1497 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1834 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1498 for (BufferCopy& copy : copies) {
1835 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1499 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1500 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1501 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1836 1502
1837 // Apply the staging offset 1503 // Apply the staging offset
1838 copy.src_offset += upload_staging.offset; 1504 copy.src_offset += upload_staging.offset;
1505 }
1506 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1839 } 1507 }
1840 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1841} 1508}
1842 1509
1843template <class P> 1510template <class P>
@@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1847 if (!is_dirty) { 1514 if (!is_dirty) {
1848 return false; 1515 return false;
1849 } 1516 }
1850 if (!IsRegionGpuModified(dest_address, copy_size)) { 1517 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1518 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1519 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1851 return false; 1520 return false;
1852 } 1521 }
1853 1522
@@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1886 boost::container::small_vector<BufferCopy, 1> copies; 1555 boost::container::small_vector<BufferCopy, 1> copies;
1887 u64 total_size_bytes = 0; 1556 u64 total_size_bytes = 0;
1888 u64 largest_copy = 0; 1557 u64 largest_copy = 0;
1889 buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1558 memory_tracker.ForEachDownloadRangeAndClear(
1890 const VAddr buffer_addr = buffer.CpuAddr(); 1559 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1891 const auto add_download = [&](VAddr start, VAddr end) { 1560 const VAddr buffer_addr = buffer.CpuAddr();
1892 const u64 new_offset = start - buffer_addr; 1561 const auto add_download = [&](VAddr start, VAddr end) {
1893 const u64 new_size = end - start; 1562 const u64 new_offset = start - buffer_addr;
1894 copies.push_back(BufferCopy{ 1563 const u64 new_size = end - start;
1895 .src_offset = new_offset, 1564 copies.push_back(BufferCopy{
1896 .dst_offset = total_size_bytes, 1565 .src_offset = new_offset,
1897 .size = new_size, 1566 .dst_offset = total_size_bytes,
1898 }); 1567 .size = new_size,
1899 // Align up to avoid cache conflicts 1568 });
1900 constexpr u64 align = 256ULL; 1569 // Align up to avoid cache conflicts
1901 constexpr u64 mask = ~(align - 1ULL); 1570 constexpr u64 align = 64ULL;
1902 total_size_bytes += (new_size + align - 1) & mask; 1571 constexpr u64 mask = ~(align - 1ULL);
1903 largest_copy = std::max(largest_copy, new_size); 1572 total_size_bytes += (new_size + align - 1) & mask;
1904 }; 1573 largest_copy = std::max(largest_copy, new_size);
1905 1574 };
1906 const VAddr start_address = buffer_addr + range_offset; 1575
1907 const VAddr end_address = start_address + range_size; 1576 const VAddr start_address = cpu_addr_out;
1908 ForEachWrittenRange(start_address, range_size, add_download); 1577 const VAddr end_address = start_address + range_size;
1909 const IntervalType subtract_interval{start_address, end_address}; 1578 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1910 ClearDownload(subtract_interval); 1579 const IntervalType subtract_interval{start_address, end_address};
1911 common_ranges.subtract(subtract_interval); 1580 ClearDownload(subtract_interval);
1912 }); 1581 common_ranges.subtract(subtract_interval);
1582 });
1913 if (total_size_bytes == 0) { 1583 if (total_size_bytes == 0) {
1914 return; 1584 return;
1915 } 1585 }
@@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1943} 1613}
1944 1614
1945template <class P> 1615template <class P>
1946void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { 1616void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
1947 const auto scalar_replace = [buffer_id](Binding& binding) { 1617 const auto scalar_replace = [buffer_id](Binding& binding) {
1948 if (binding.buffer_id == buffer_id) { 1618 if (binding.buffer_id == buffer_id) {
1949 binding.buffer_id = BufferId{}; 1619 binding.buffer_id = BufferId{};
@@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1962 std::erase(cached_write_buffer_ids, buffer_id); 1632 std::erase(cached_write_buffer_ids, buffer_id);
1963 1633
1964 // Mark the whole buffer as CPU written to stop tracking CPU writes 1634 // Mark the whole buffer as CPU written to stop tracking CPU writes
1965 Buffer& buffer = slot_buffers[buffer_id]; 1635 if (!do_not_mark) {
1966 buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); 1636 Buffer& buffer = slot_buffers[buffer_id];
1637 memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
1638 }
1967 1639
1968 Unregister(buffer_id); 1640 Unregister(buffer_id);
1969 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); 1641 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
@@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
2011 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1683 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
2012 return NULL_BINDING; 1684 return NULL_BINDING;
2013 } 1685 }
2014 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1686 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE);
2015 const Binding binding{ 1687 const Binding binding{
2016 .cpu_addr = *cpu_addr, 1688 .cpu_addr = *cpu_addr,
2017 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1689 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
new file mode 100644
index 000000000..656baa550
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -0,0 +1,580 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <array>
8#include <functional>
9#include <memory>
10#include <mutex>
11#include <numeric>
12#include <span>
13#include <unordered_map>
14#include <vector>
15
16#include <boost/container/small_vector.hpp>
17#define BOOST_NO_MT
18#include <boost/pool/detail/mutex.hpp>
19#undef BOOST_NO_MT
20#include <boost/icl/interval.hpp>
21#include <boost/icl/interval_base_set.hpp>
22#include <boost/icl/interval_set.hpp>
23#include <boost/icl/split_interval_map.hpp>
24#include <boost/pool/pool.hpp>
25#include <boost/pool/pool_alloc.hpp>
26#include <boost/pool/poolfwd.hpp>
27
28#include "common/common_types.h"
29#include "common/div_ceil.h"
30#include "common/literals.h"
31#include "common/lru_cache.h"
32#include "common/microprofile.h"
33#include "common/scope_exit.h"
34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h"
39#include "video_core/dirty_flags.h"
40#include "video_core/engines/draw_manager.h"
41#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h"
48
49namespace boost {
50template <typename T>
51class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
52}
53
54namespace VideoCommon {
55
56MICROPROFILE_DECLARE(GPU_PrepareBuffers);
57MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
58MICROPROFILE_DECLARE(GPU_DownloadMemory);
59
60using BufferId = SlotId;
61
62using VideoCore::Surface::PixelFormat;
63using namespace Common::Literals;
64
65constexpr u32 NUM_VERTEX_BUFFERS = 32;
66constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
67constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
68constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
69constexpr u32 NUM_STORAGE_BUFFERS = 16;
70constexpr u32 NUM_TEXTURE_BUFFERS = 16;
71constexpr u32 NUM_STAGES = 5;
72
73using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
74using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
75
76enum class ObtainBufferSynchronize : u32 {
77 NoSynchronize = 0,
78 FullSynchronize = 1,
79 SynchronizeNoDirty = 2,
80};
81
82enum class ObtainBufferOperation : u32 {
83 DoNothing = 0,
84 MarkAsWritten = 1,
85 DiscardWrite = 2,
86 MarkQuery = 3,
87};
88
89template <typename P>
90class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
91 // Page size for caching purposes.
92 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
93 static constexpr u32 CACHING_PAGEBITS = 16;
94 static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
95
96 static constexpr bool IS_OPENGL = P::IS_OPENGL;
97 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
98 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
99 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
100 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
101 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
102 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
103 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
104 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
105 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
106
107 static constexpr BufferId NULL_BUFFER_ID{0};
108
109 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
110 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
111 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
112
113 // Debug Flags.
114
115 static constexpr bool DISABLE_DOWNLOADS = true;
116
117 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
118
119 using Runtime = typename P::Runtime;
120 using Buffer = typename P::Buffer;
121 using Async_Buffer = typename P::Async_Buffer;
122 using MemoryTracker = typename P::MemoryTracker;
123
124 using IntervalCompare = std::less<VAddr>;
125 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>;
126 using IntervalAllocator = boost::fast_pool_allocator<VAddr>;
127 using IntervalSet = boost::icl::interval_set<VAddr>;
128 using IntervalType = typename IntervalSet::interval_type;
129
130 template <typename Type>
131 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
132 // types
133 typedef counter_add_functor<Type> type;
134 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
135
136 // public member functions
137 void operator()(Type& current, const Type& added) const {
138 current += added;
139 if (current < base_type::identity_element()) {
140 current = base_type::identity_element();
141 }
142 }
143
144 // public static functions
145 static void version(Type&){};
146 };
147
148 using OverlapCombine = counter_add_functor<int>;
149 using OverlapSection = boost::icl::inter_section<int>;
150 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
151
152 struct Empty {};
153
154 struct OverlapResult {
155 std::vector<BufferId> ids;
156 VAddr begin;
157 VAddr end;
158 bool has_stream_leap = false;
159 };
160
161 struct Binding {
162 VAddr cpu_addr{};
163 u32 size{};
164 BufferId buffer_id;
165 };
166
167 struct TextureBufferBinding : Binding {
168 PixelFormat format;
169 };
170
171 static constexpr Binding NULL_BINDING{
172 .cpu_addr = 0,
173 .size = 0,
174 .buffer_id = NULL_BUFFER_ID,
175 };
176
177public:
178 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
179
180 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
181 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
182
183 void TickFrame();
184
185 void WriteMemory(VAddr cpu_addr, u64 size);
186
187 void CachedWriteMemory(VAddr cpu_addr, u64 size);
188
189 void DownloadMemory(VAddr cpu_addr, u64 size);
190
191 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
192
193 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
194
195 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
196
197 void UpdateGraphicsBuffers(bool is_indexed);
198
199 void UpdateComputeBuffers();
200
201 void BindHostGeometryBuffers(bool is_indexed);
202
203 void BindHostStageBuffers(size_t stage);
204
205 void BindHostComputeBuffers();
206
207 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
208 const UniformBufferSizes* sizes);
209
210 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
211
212 void UnbindGraphicsStorageBuffers(size_t stage);
213
214 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
215 bool is_written);
216
217 void UnbindGraphicsTextureBuffers(size_t stage);
218
219 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
220 PixelFormat format, bool is_written, bool is_image);
221
222 void UnbindComputeStorageBuffers();
223
224 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
225 bool is_written);
226
227 void UnbindComputeTextureBuffers();
228
229 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
230 bool is_written, bool is_image);
231
232 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
233 ObtainBufferSynchronize sync_info,
234 ObtainBufferOperation post_op);
235 void FlushCachedWrites();
236
237 /// Return true when there are uncommitted buffers to be downloaded
238 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
239
240 void AccumulateFlushes();
241
242 /// Return true when the caller should wait for async downloads
243 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
244
245 /// Commit asynchronous downloads
246 void CommitAsyncFlushes();
247 void CommitAsyncFlushesHigh();
248
249 /// Pop asynchronous downloads
250 void PopAsyncFlushes();
251 void PopAsyncBuffers();
252
253 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
254
255 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
256
257 /// Return true when a CPU region is modified from the GPU
258 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
259
260 /// Return true when a region is registered on the cache
261 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
262
263 /// Return true when a CPU region is modified from the CPU
264 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
265
266 void SetDrawIndirect(
267 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
268 current_draw_indirect = current_draw_indirect_;
269 }
270
271 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
272
273 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
274
275 std::recursive_mutex mutex;
276 Runtime& runtime;
277
278private:
279 template <typename Func>
280 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
281 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
282 const int disabled_bits = std::countr_zero(enabled_mask);
283 index += disabled_bits;
284 enabled_mask >>= disabled_bits;
285 func(index);
286 }
287 }
288
289 template <typename Func>
290 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
291 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE);
292 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) {
293 const BufferId buffer_id = page_table[page];
294 if (!buffer_id) {
295 ++page;
296 continue;
297 }
298 Buffer& buffer = slot_buffers[buffer_id];
299 func(buffer_id, buffer);
300
301 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
302 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
303 }
304 }
305
306 template <typename Func>
307 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) {
308 const VAddr start_address = cpu_addr;
309 const VAddr end_address = start_address + size;
310 const IntervalType search_interval{start_address, end_address};
311 auto it = current_range.lower_bound(search_interval);
312 if (it == current_range.end()) {
313 return;
314 }
315 auto end_it = current_range.upper_bound(search_interval);
316 for (; it != end_it; it++) {
317 VAddr inter_addr_end = it->upper();
318 VAddr inter_addr = it->lower();
319 if (inter_addr_end > end_address) {
320 inter_addr_end = end_address;
321 }
322 if (inter_addr < start_address) {
323 inter_addr = start_address;
324 }
325 func(inter_addr, inter_addr_end);
326 }
327 }
328
329 template <typename Func>
330 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
331 Func&& func) {
332 const VAddr start_address = cpu_addr;
333 const VAddr end_address = start_address + size;
334 const IntervalType search_interval{start_address, end_address};
335 auto it = current_range.lower_bound(search_interval);
336 if (it == current_range.end()) {
337 return;
338 }
339 auto end_it = current_range.upper_bound(search_interval);
340 for (; it != end_it; it++) {
341 auto& inter = it->first;
342 VAddr inter_addr_end = inter.upper();
343 VAddr inter_addr = inter.lower();
344 if (inter_addr_end > end_address) {
345 inter_addr_end = end_address;
346 }
347 if (inter_addr < start_address) {
348 inter_addr = start_address;
349 }
350 func(inter_addr, inter_addr_end, it->second);
351 }
352 }
353
354 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
355 const IntervalType search_interval, int subtract_value) {
356 bool any_removals = false;
357 current_range.add(std::make_pair(search_interval, subtract_value));
358 do {
359 any_removals = false;
360 auto it = current_range.lower_bound(search_interval);
361 if (it == current_range.end()) {
362 return;
363 }
364 auto end_it = current_range.upper_bound(search_interval);
365 for (; it != end_it; it++) {
366 if (it->second <= 0) {
367 any_removals = true;
368 current_range.erase(it);
369 break;
370 }
371 }
372 } while (any_removals);
373 }
374
375 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
376 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
377 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
378 }
379
380 void RunGarbageCollector();
381
382 void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size);
383
384 void BindHostIndexBuffer();
385
386 void BindHostVertexBuffers();
387
388 void BindHostDrawIndirectBuffers();
389
390 void BindHostGraphicsUniformBuffers(size_t stage);
391
392 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
393
394 void BindHostGraphicsStorageBuffers(size_t stage);
395
396 void BindHostGraphicsTextureBuffers(size_t stage);
397
398 void BindHostTransformFeedbackBuffers();
399
400 void BindHostComputeUniformBuffers();
401
402 void BindHostComputeStorageBuffers();
403
404 void BindHostComputeTextureBuffers();
405
406 void DoUpdateGraphicsBuffers(bool is_indexed);
407
408 void DoUpdateComputeBuffers();
409
410 void UpdateIndexBuffer();
411
412 void UpdateVertexBuffers();
413
414 void UpdateVertexBuffer(u32 index);
415
416 void UpdateDrawIndirect();
417
418 void UpdateUniformBuffers(size_t stage);
419
420 void UpdateStorageBuffers(size_t stage);
421
422 void UpdateTextureBuffers(size_t stage);
423
424 void UpdateTransformFeedbackBuffers();
425
426 void UpdateTransformFeedbackBuffer(u32 index);
427
428 void UpdateComputeUniformBuffers();
429
430 void UpdateComputeStorageBuffers();
431
432 void UpdateComputeTextureBuffers();
433
434 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
435
436 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
437
438 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
439
440 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
441
442 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
443
444 void Register(BufferId buffer_id);
445
446 void Unregister(BufferId buffer_id);
447
448 template <bool insert>
449 void ChangeRegister(BufferId buffer_id);
450
451 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
452
453 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
454
455 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
456
457 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
458
459 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
460 std::span<BufferCopy> copies);
461
462 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
463 std::span<const BufferCopy> copies);
464
465 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
466
467 void DownloadBufferMemory(Buffer& buffer_id);
468
469 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
470
471 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
472
473 void NotifyBufferDeletion();
474
475 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
476 bool is_written) const;
477
478 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
479 PixelFormat format);
480
481 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
482
483 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
484
485 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
486
487 void ClearDownload(IntervalType subtract_interval);
488
489 VideoCore::RasterizerInterface& rasterizer;
490 Core::Memory::Memory& cpu_memory;
491
492 SlotVector<Buffer> slot_buffers;
493 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
494
495 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
496
497 u32 last_index_count = 0;
498
499 Binding index_buffer;
500 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
501 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
502 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
503 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
504 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
505 Binding count_buffer_binding;
506 Binding indirect_buffer_binding;
507
508 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
509 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
510 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
511
512 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
513 u32 enabled_compute_uniform_buffer_mask = 0;
514
515 const UniformBufferSizes* uniform_buffer_sizes{};
516 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
517
518 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
519 std::array<u32, NUM_STAGES> written_storage_buffers{};
520 u32 enabled_compute_storage_buffers = 0;
521 u32 written_compute_storage_buffers = 0;
522
523 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
524 std::array<u32, NUM_STAGES> written_texture_buffers{};
525 std::array<u32, NUM_STAGES> image_texture_buffers{};
526 u32 enabled_compute_texture_buffers = 0;
527 u32 written_compute_texture_buffers = 0;
528 u32 image_compute_texture_buffers = 0;
529
530 std::array<u32, 16> uniform_cache_hits{};
531 std::array<u32, 16> uniform_cache_shots{};
532
533 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
534
535 bool has_deleted_buffers = false;
536
537 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
538 dirty_uniform_buffers{};
539 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
540 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
541 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
542 uniform_buffer_binding_sizes{};
543
544 std::vector<BufferId> cached_write_buffer_ids;
545
546 MemoryTracker memory_tracker;
547 IntervalSet uncommitted_ranges;
548 IntervalSet common_ranges;
549 IntervalSet cached_ranges;
550 IntervalSet pending_ranges;
551 std::deque<IntervalSet> committed_ranges;
552
553 // Async Buffers
554 OverlapCounter async_downloads;
555 std::deque<std::optional<Async_Buffer>> async_buffers;
556 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
557 std::optional<Async_Buffer> current_buffer;
558
559 std::deque<Async_Buffer> async_buffers_death_ring;
560
561 size_t immediate_buffer_capacity = 0;
562 Common::ScratchBuffer<u8> immediate_buffer_alloc;
563
564 struct LRUItemParams {
565 using ObjectType = BufferId;
566 using TickType = u64;
567 };
568 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
569 u64 frame_tick = 0;
570 u64 total_used_memory = 0;
571 u64 minimum_memory = 0;
572 u64 critical_memory = 0;
573 BufferId inline_buffer_id;
574
575 bool active_async_buffers = false;
576
577 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
578};
579
580} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
new file mode 100644
index 000000000..4bc59017f
--- /dev/null
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -0,0 +1,271 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <deque>
9#include <limits>
10#include <type_traits>
11#include <unordered_set>
12#include <utility>
13
14#include "common/alignment.h"
15#include "common/common_types.h"
16#include "video_core/buffer_cache/word_manager.h"
17
18namespace VideoCommon {
19
20template <class RasterizerInterface>
21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39;
23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>;
30
31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {}
33 ~MemoryTrackerBase() = default;
34
35 /// Returns the inclusive CPU modified range in a begin end pair
36 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
37 u64 query_size) noexcept {
38 return IteratePairs<true>(
39 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
40 return manager->template ModifiedRegion<Type::CPU>(offset, size);
41 });
42 }
43
44 /// Returns the inclusive GPU modified range in a begin end pair
45 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
46 u64 query_size) noexcept {
47 return IteratePairs<false>(
48 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
49 return manager->template ModifiedRegion<Type::GPU>(offset, size);
50 });
51 }
52
53 /// Returns true if a region has been modified from the CPU
54 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
55 return IteratePages<true>(
56 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
57 return manager->template IsRegionModified<Type::CPU>(offset, size);
58 });
59 }
60
61 /// Returns true if a region has been modified from the GPU
62 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
63 return IteratePages<false>(
64 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
65 return manager->template IsRegionModified<Type::GPU>(offset, size);
66 });
67 }
68
69 /// Mark region as CPU modified, notifying the rasterizer about this change
70 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
71 IteratePages<true>(dirty_cpu_addr, query_size,
72 [](Manager* manager, u64 offset, size_t size) {
73 manager->template ChangeRegionState<Type::CPU, true>(
74 manager->GetCpuAddr() + offset, size);
75 });
76 }
77
78 /// Unmark region as CPU modified, notifying the rasterizer about this change
79 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
80 IteratePages<true>(dirty_cpu_addr, query_size,
81 [](Manager* manager, u64 offset, size_t size) {
82 manager->template ChangeRegionState<Type::CPU, false>(
83 manager->GetCpuAddr() + offset, size);
84 });
85 }
86
87 /// Mark region as modified from the host GPU
88 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
89 IteratePages<true>(dirty_cpu_addr, query_size,
90 [](Manager* manager, u64 offset, size_t size) {
91 manager->template ChangeRegionState<Type::GPU, true>(
92 manager->GetCpuAddr() + offset, size);
93 });
94 }
95
96 /// Unmark region as modified from the host GPU
97 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
98 IteratePages<true>(dirty_cpu_addr, query_size,
99 [](Manager* manager, u64 offset, size_t size) {
100 manager->template ChangeRegionState<Type::GPU, false>(
101 manager->GetCpuAddr() + offset, size);
102 });
103 }
104
105 /// Mark region as modified from the CPU
106 /// but don't mark it as modified until FlusHCachedWrites is called.
107 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) {
108 IteratePages<true>(
109 dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) {
110 const VAddr cpu_address = manager->GetCpuAddr() + offset;
111 manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size);
112 cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS));
113 });
114 }
115
116 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
117 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
118 IteratePages<false>(query_cpu_addr, query_size,
119 [](Manager* manager, [[maybe_unused]] u64 offset,
120 [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); });
121 }
122
123 void FlushCachedWrites() noexcept {
124 for (auto id : cached_pages) {
125 top_tier[id]->FlushCachedWrites();
126 }
127 cached_pages.clear();
128 }
129
130 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
131 template <typename Func>
132 void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
133 IteratePages<true>(query_cpu_range, query_size,
134 [&func](Manager* manager, u64 offset, size_t size) {
135 manager->template ForEachModifiedRange<Type::CPU, true>(
136 manager->GetCpuAddr() + offset, size, func);
137 });
138 }
139
140 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
141 template <typename Func>
142 void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) {
143 IteratePages<false>(query_cpu_range, query_size,
144 [&func, clear](Manager* manager, u64 offset, size_t size) {
145 if (clear) {
146 manager->template ForEachModifiedRange<Type::GPU, true>(
147 manager->GetCpuAddr() + offset, size, func);
148 } else {
149 manager->template ForEachModifiedRange<Type::GPU, false>(
150 manager->GetCpuAddr() + offset, size, func);
151 }
152 });
153 }
154
155 template <typename Func>
156 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) {
157 IteratePages<false>(query_cpu_range, query_size,
158 [&func](Manager* manager, u64 offset, size_t size) {
159 manager->template ForEachModifiedRange<Type::GPU, true>(
160 manager->GetCpuAddr() + offset, size, func);
161 });
162 }
163
164private:
165 template <bool create_region_on_fail, typename Func>
166 bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
167 using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
168 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
169 std::size_t remaining_size{size};
170 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
171 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
172 while (remaining_size > 0) {
173 const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
174 auto* manager{top_tier[page_index]};
175 if (manager) {
176 if constexpr (BOOL_BREAK) {
177 if (func(manager, page_offset, copy_amount)) {
178 return true;
179 }
180 } else {
181 func(manager, page_offset, copy_amount);
182 }
183 } else if constexpr (create_region_on_fail) {
184 CreateRegion(page_index);
185 manager = top_tier[page_index];
186 if constexpr (BOOL_BREAK) {
187 if (func(manager, page_offset, copy_amount)) {
188 return true;
189 }
190 } else {
191 func(manager, page_offset, copy_amount);
192 }
193 }
194 page_index++;
195 page_offset = 0;
196 remaining_size -= copy_amount;
197 }
198 return false;
199 }
200
201 template <bool create_region_on_fail, typename Func>
202 std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) {
203 std::size_t remaining_size{size};
204 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
205 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
206 u64 begin = std::numeric_limits<u64>::max();
207 u64 end = 0;
208 while (remaining_size > 0) {
209 const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
210 auto* manager{top_tier[page_index]};
211 const auto execute = [&] {
212 auto [new_begin, new_end] = func(manager, page_offset, copy_amount);
213 if (new_begin != 0 || new_end != 0) {
214 const u64 base_address = page_index << HIGHER_PAGE_BITS;
215 begin = std::min(new_begin + base_address, begin);
216 end = std::max(new_end + base_address, end);
217 }
218 };
219 if (manager) {
220 execute();
221 } else if constexpr (create_region_on_fail) {
222 CreateRegion(page_index);
223 manager = top_tier[page_index];
224 execute();
225 }
226 page_index++;
227 page_offset = 0;
228 remaining_size -= copy_amount;
229 }
230 if (begin < end) {
231 return std::make_pair(begin, end);
232 } else {
233 return std::make_pair(0ULL, 0ULL);
234 }
235 }
236
237 void CreateRegion(std::size_t page_index) {
238 const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
239 top_tier[page_index] = GetNewManager(base_cpu_addr);
240 }
241
242 Manager* GetNewManager(VAddr base_cpu_addess) {
243 const auto on_return = [&] {
244 auto* new_manager = free_managers.front();
245 new_manager->SetCpuAddress(base_cpu_addess);
246 free_managers.pop_front();
247 return new_manager;
248 };
249 if (!free_managers.empty()) {
250 return on_return();
251 }
252 manager_pool.emplace_back();
253 auto& last_pool = manager_pool.back();
254 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
255 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE);
256 free_managers.push_back(&last_pool[i]);
257 }
258 return on_return();
259 }
260
261 std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
262 std::deque<Manager*> free_managers;
263
264 std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
265
266 std::unordered_set<u32> cached_pages;
267
268 RasterizerInterface* rasterizer = nullptr;
269};
270
271} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
new file mode 100644
index 000000000..a42455045
--- /dev/null
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -0,0 +1,462 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <limits>
9#include <span>
10#include <utility>
11
12#include "common/alignment.h"
13#include "common/common_funcs.h"
14#include "common/common_types.h"
15#include "common/div_ceil.h"
16#include "core/memory.h"
17
18namespace VideoCommon {
19
20constexpr u64 PAGES_PER_WORD = 64;
21constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
22constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
23
24enum class Type {
25 CPU,
26 GPU,
27 CachedCPU,
28 Untracked,
29};
30
31/// Vector tracking modified pages tightly packed with small vector optimization
32template <size_t stack_words = 1>
33struct WordsArray {
34 /// Returns the pointer to the words state
35 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
36 return is_short ? stack.data() : heap;
37 }
38
39 /// Returns the pointer to the words state
40 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
41 return is_short ? stack.data() : heap;
42 }
43
44 std::array<u64, stack_words> stack{}; ///< Small buffers storage
45 u64* heap; ///< Not-small buffers pointer to the storage
46};
47
48template <size_t stack_words = 1>
49struct Words {
50 explicit Words() = default;
51 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
52 num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
53 if (IsShort()) {
54 cpu.stack.fill(~u64{0});
55 gpu.stack.fill(0);
56 cached_cpu.stack.fill(0);
57 untracked.stack.fill(~u64{0});
58 } else {
59 // Share allocation between CPU and GPU pages and set their default values
60 u64* const alloc = new u64[num_words * 4];
61 cpu.heap = alloc;
62 gpu.heap = alloc + num_words;
63 cached_cpu.heap = alloc + num_words * 2;
64 untracked.heap = alloc + num_words * 3;
65 std::fill_n(cpu.heap, num_words, ~u64{0});
66 std::fill_n(gpu.heap, num_words, 0);
67 std::fill_n(cached_cpu.heap, num_words, 0);
68 std::fill_n(untracked.heap, num_words, ~u64{0});
69 }
70 // Clean up tailing bits
71 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
72 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
73 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
74 const u64 last_word = (~u64{0} << shift) >> shift;
75 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
76 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
77 }
78
79 ~Words() {
80 Release();
81 }
82
83 Words& operator=(Words&& rhs) noexcept {
84 Release();
85 size_bytes = rhs.size_bytes;
86 num_words = rhs.num_words;
87 cpu = rhs.cpu;
88 gpu = rhs.gpu;
89 cached_cpu = rhs.cached_cpu;
90 untracked = rhs.untracked;
91 rhs.cpu.heap = nullptr;
92 return *this;
93 }
94
95 Words(Words&& rhs) noexcept
96 : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
97 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
98 rhs.cpu.heap = nullptr;
99 }
100
101 Words& operator=(const Words&) = delete;
102 Words(const Words&) = delete;
103
104 /// Returns true when the buffer fits in the small vector optimization
105 [[nodiscard]] bool IsShort() const noexcept {
106 return num_words <= stack_words;
107 }
108
109 /// Returns the number of words of the buffer
110 [[nodiscard]] size_t NumWords() const noexcept {
111 return num_words;
112 }
113
114 /// Release buffer resources
115 void Release() {
116 if (!IsShort()) {
117 // CPU written words is the base for the heap allocation
118 delete[] cpu.heap;
119 }
120 }
121
122 template <Type type>
123 std::span<u64> Span() noexcept {
124 if constexpr (type == Type::CPU) {
125 return std::span<u64>(cpu.Pointer(IsShort()), num_words);
126 } else if constexpr (type == Type::GPU) {
127 return std::span<u64>(gpu.Pointer(IsShort()), num_words);
128 } else if constexpr (type == Type::CachedCPU) {
129 return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words);
130 } else if constexpr (type == Type::Untracked) {
131 return std::span<u64>(untracked.Pointer(IsShort()), num_words);
132 }
133 }
134
135 template <Type type>
136 std::span<const u64> Span() const noexcept {
137 if constexpr (type == Type::CPU) {
138 return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
139 } else if constexpr (type == Type::GPU) {
140 return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
141 } else if constexpr (type == Type::CachedCPU) {
142 return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words);
143 } else if constexpr (type == Type::Untracked) {
144 return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
145 }
146 }
147
148 u64 size_bytes = 0;
149 size_t num_words = 0;
150 WordsArray<stack_words> cpu;
151 WordsArray<stack_words> gpu;
152 WordsArray<stack_words> cached_cpu;
153 WordsArray<stack_words> untracked;
154};
155
156template <class RasterizerInterface, size_t stack_words = 1>
157class WordManager {
158public:
159 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes)
160 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {}
161
162 explicit WordManager() = default;
163
164 void SetCpuAddress(VAddr new_cpu_addr) {
165 cpu_addr = new_cpu_addr;
166 }
167
168 VAddr GetCpuAddr() const {
169 return cpu_addr;
170 }
171
172 static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
173 constexpr size_t number_bits = sizeof(u64) * 8;
174 const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
175 u64 bits = (word >> page_start) << page_start;
176 bits = (bits << limit_page_end) >> limit_page_end;
177 return bits;
178 }
179
180 static std::pair<size_t, size_t> GetWordPage(VAddr address) {
181 const size_t converted_address = static_cast<size_t>(address);
182 const size_t word_number = converted_address / BYTES_PER_WORD;
183 const size_t amount_pages = converted_address % BYTES_PER_WORD;
184 return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
185 }
186
187 template <typename Func>
188 void IterateWords(size_t offset, size_t size, Func&& func) const {
189 using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
190 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
191 const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
192 const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
193 if (start >= SizeBytes() || end <= start) {
194 return;
195 }
196 auto [start_word, start_page] = GetWordPage(start);
197 auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
198 const size_t num_words = NumWords();
199 start_word = std::min(start_word, num_words);
200 end_word = std::min(end_word, num_words);
201 const size_t diff = end_word - start_word;
202 end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
203 end_word = std::min(end_word, num_words);
204 end_page += diff * PAGES_PER_WORD;
205 constexpr u64 base_mask{~0ULL};
206 for (size_t word_index = start_word; word_index < end_word; word_index++) {
207 const u64 mask = ExtractBits(base_mask, start_page, end_page);
208 start_page = 0;
209 end_page -= PAGES_PER_WORD;
210 if constexpr (BOOL_BREAK) {
211 if (func(word_index, mask)) {
212 return;
213 }
214 } else {
215 func(word_index, mask);
216 }
217 }
218 }
219
220 template <typename Func>
221 void IteratePages(u64 mask, Func&& func) const {
222 size_t offset = 0;
223 while (mask != 0) {
224 const size_t empty_bits = std::countr_zero(mask);
225 offset += empty_bits;
226 mask = mask >> empty_bits;
227
228 const size_t continuous_bits = std::countr_one(mask);
229 func(offset, continuous_bits);
230 mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
231 offset += continuous_bits;
232 }
233 }
234
235 /**
236 * Change the state of a range of pages
237 *
238 * @param dirty_addr Base address to mark or unmark as modified
239 * @param size Size in bytes to mark or unmark as modified
240 */
241 template <Type type, bool enable>
242 void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
243 std::span<u64> state_words = words.template Span<type>();
244 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
245 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
246 IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
247 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
248 NotifyRasterizer<!enable>(index, untracked_words[index], mask);
249 }
250 if constexpr (enable) {
251 state_words[index] |= mask;
252 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
253 untracked_words[index] |= mask;
254 }
255 if constexpr (type == Type::CPU) {
256 cached_words[index] &= ~mask;
257 }
258 } else {
259 if constexpr (type == Type::CPU) {
260 const u64 word = state_words[index] & mask;
261 cached_words[index] &= ~word;
262 }
263 state_words[index] &= ~mask;
264 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
265 untracked_words[index] &= ~mask;
266 }
267 }
268 });
269 }
270
271 /**
272 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
273 * needed. Call the given function on each turned off range.
274 *
275 * @param query_cpu_range Base CPU address to loop over
276 * @param size Size in bytes of the CPU range to loop over
277 * @param func Function to call for each turned off region
278 */
279 template <Type type, bool clear, typename Func>
280 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
281 static_assert(type != Type::Untracked);
282
283 std::span<u64> state_words = words.template Span<type>();
284 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
285 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
286 const size_t offset = query_cpu_range - cpu_addr;
287 bool pending = false;
288 size_t pending_offset{};
289 size_t pending_pointer{};
290 const auto release = [&]() {
291 func(cpu_addr + pending_offset * BYTES_PER_PAGE,
292 (pending_pointer - pending_offset) * BYTES_PER_PAGE);
293 };
294 IterateWords(offset, size, [&](size_t index, u64 mask) {
295 const u64 word = state_words[index] & mask;
296 if constexpr (clear) {
297 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
298 NotifyRasterizer<true>(index, untracked_words[index], mask);
299 }
300 state_words[index] &= ~mask;
301 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
302 untracked_words[index] &= ~mask;
303 }
304 if constexpr (type == Type::CPU) {
305 cached_words[index] &= ~word;
306 }
307 }
308 const size_t base_offset = index * PAGES_PER_WORD;
309 IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
310 const auto reset = [&]() {
311 pending_offset = base_offset + pages_offset;
312 pending_pointer = base_offset + pages_offset + pages_size;
313 };
314 if (!pending) {
315 reset();
316 pending = true;
317 return;
318 }
319 if (pending_pointer == base_offset + pages_offset) {
320 pending_pointer += pages_size;
321 return;
322 }
323 release();
324 reset();
325 });
326 });
327 if (pending) {
328 release();
329 }
330 }
331
332 /**
333 * Returns true when a region has been modified
334 *
335 * @param offset Offset in bytes from the start of the buffer
336 * @param size Size in bytes of the region to query for modifications
337 */
338 template <Type type>
339 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
340 static_assert(type != Type::Untracked);
341
342 const std::span<const u64> state_words = words.template Span<type>();
343 bool result = false;
344 IterateWords(offset, size, [&](size_t index, u64 mask) {
345 const u64 word = state_words[index] & mask;
346 if (word != 0) {
347 result = true;
348 return true;
349 }
350 return false;
351 });
352 return result;
353 }
354
355 /**
356 * Returns a begin end pair with the inclusive modified region
357 *
358 * @param offset Offset in bytes from the start of the buffer
359 * @param size Size in bytes of the region to query for modifications
360 */
361 template <Type type>
362 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
363 static_assert(type != Type::Untracked);
364 const std::span<const u64> state_words = words.template Span<type>();
365 u64 begin = std::numeric_limits<u64>::max();
366 u64 end = 0;
367 IterateWords(offset, size, [&](size_t index, u64 mask) {
368 const u64 word = state_words[index] & mask;
369 if (word == 0) {
370 return;
371 }
372 const u64 local_page_begin = std::countr_zero(word);
373 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
374 const u64 page_index = index * PAGES_PER_WORD;
375 begin = std::min(begin, page_index + local_page_begin);
376 end = page_index + local_page_end;
377 });
378 static constexpr std::pair<u64, u64> EMPTY{0, 0};
379 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
380 }
381
382 /// Returns the number of words of the manager
383 [[nodiscard]] size_t NumWords() const noexcept {
384 return words.NumWords();
385 }
386
387 /// Returns the size in bytes of the manager
388 [[nodiscard]] u64 SizeBytes() const noexcept {
389 return words.size_bytes;
390 }
391
392 /// Returns true when the buffer fits in the small vector optimization
393 [[nodiscard]] bool IsShort() const noexcept {
394 return words.IsShort();
395 }
396
397 void FlushCachedWrites() noexcept {
398 const u64 num_words = NumWords();
399 u64* const cached_words = Array<Type::CachedCPU>();
400 u64* const untracked_words = Array<Type::Untracked>();
401 u64* const cpu_words = Array<Type::CPU>();
402 for (u64 word_index = 0; word_index < num_words; ++word_index) {
403 const u64 cached_bits = cached_words[word_index];
404 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
405 untracked_words[word_index] |= cached_bits;
406 cpu_words[word_index] |= cached_bits;
407 cached_words[word_index] = 0;
408 }
409 }
410
411private:
412 template <Type type>
413 u64* Array() noexcept {
414 if constexpr (type == Type::CPU) {
415 return words.cpu.Pointer(IsShort());
416 } else if constexpr (type == Type::GPU) {
417 return words.gpu.Pointer(IsShort());
418 } else if constexpr (type == Type::CachedCPU) {
419 return words.cached_cpu.Pointer(IsShort());
420 } else if constexpr (type == Type::Untracked) {
421 return words.untracked.Pointer(IsShort());
422 }
423 }
424
425 template <Type type>
426 const u64* Array() const noexcept {
427 if constexpr (type == Type::CPU) {
428 return words.cpu.Pointer(IsShort());
429 } else if constexpr (type == Type::GPU) {
430 return words.gpu.Pointer(IsShort());
431 } else if constexpr (type == Type::CachedCPU) {
432 return words.cached_cpu.Pointer(IsShort());
433 } else if constexpr (type == Type::Untracked) {
434 return words.untracked.Pointer(IsShort());
435 }
436 }
437
438 /**
439 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
440 *
441 * @param word_index Index to the word to notify to the rasterizer
442 * @param current_bits Current state of the word
443 * @param new_bits New state of the word
444 *
445 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
446 */
447 template <bool add_to_rasterizer>
448 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
449 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
450 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
451 IteratePages(changed_bits, [&](size_t offset, size_t size) {
452 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
453 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1);
454 });
455 }
456
457 VAddr cpu_addr = 0;
458 RasterizerInterface* rasterizer = nullptr;
459 Words<stack_words> words;
460};
461
462} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..18d3c3ac0 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -8,6 +8,7 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/buffer_cache/memory_tracker_base.h"
11#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -200,6 +201,8 @@ private:
200struct BufferCacheParams { 201struct BufferCacheParams {
201 using Runtime = OpenGL::BufferCacheRuntime; 202 using Runtime = OpenGL::BufferCacheRuntime;
202 using Buffer = OpenGL::Buffer; 203 using Buffer = OpenGL::Buffer;
204 using Async_Buffer = u32;
205 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
203 206
204 static constexpr bool IS_OPENGL = true; 207 static constexpr bool IS_OPENGL = true;
205 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; 208 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
@@ -208,6 +211,7 @@ struct BufferCacheParams {
208 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 211 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
209 static constexpr bool USE_MEMORY_MAPS = false; 212 static constexpr bool USE_MEMORY_MAPS = false;
210 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; 213 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
214 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
211}; 215};
212 216
213using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 217using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
new file mode 100644
index 000000000..f15ae8e25
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_opengl/gl_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>;
9}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 9cbcb3c8f..510602e8e 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
314 return staging_pool.Request(size, MemoryUsage::Upload); 314 return staging_pool.Request(size, MemoryUsage::Upload);
315} 315}
316 316
317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { 317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
318 return staging_pool.Request(size, MemoryUsage::Download); 318 return staging_pool.Request(size, MemoryUsage::Download, deferred);
319}
320
321void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
322 staging_pool.FreeDeferred(ref);
319} 323}
320 324
321u64 BufferCacheRuntime::GetDeviceLocalMemory() const { 325u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..879f1ed94 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,7 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include "video_core/buffer_cache/buffer_cache.h" 6#include "video_core/buffer_cache/buffer_cache_base.h"
7#include "video_core/buffer_cache/memory_tracker_base.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
9#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -75,7 +76,9 @@ public:
75 76
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 77 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 78
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 79 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
80
81 void FreeDeferredStagingBuffer(StagingBufferRef& ref);
79 82
80 void PreCopyBarrier(); 83 void PreCopyBarrier();
81 84
@@ -142,6 +145,8 @@ private:
142struct BufferCacheParams { 145struct BufferCacheParams {
143 using Runtime = Vulkan::BufferCacheRuntime; 146 using Runtime = Vulkan::BufferCacheRuntime;
144 using Buffer = Vulkan::Buffer; 147 using Buffer = Vulkan::Buffer;
148 using Async_Buffer = Vulkan::StagingBufferRef;
149 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
145 150
146 static constexpr bool IS_OPENGL = false; 151 static constexpr bool IS_OPENGL = false;
147 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; 152 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
@@ -150,6 +155,7 @@ struct BufferCacheParams {
150 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 155 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
151 static constexpr bool USE_MEMORY_MAPS = true; 156 static constexpr bool USE_MEMORY_MAPS = true;
152 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; 157 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
158 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
153}; 159};
154 160
155using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 161using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
new file mode 100644
index 000000000..f9e271507
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_vulkan/vk_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>;
9}