summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-16 19:52:45 -0300
committerGravatar ReinUsesLisp2021-02-13 02:15:29 -0300
commit9d8ca6cc4a6994e2fd7bf1628fee8615a45d2e45 (patch)
tree84d46b82f3bd0238f7d6311fd9a17e1210b0c230 /src
parentMerge pull request #5877 from ameerj/res-limit-usage (diff)
downloadyuzu-9d8ca6cc4a6994e2fd7bf1628fee8615a45d2e45.tar.gz
yuzu-9d8ca6cc4a6994e2fd7bf1628fee8615a45d2e45.tar.xz
yuzu-9d8ca6cc4a6994e2fd7bf1628fee8615a45d2e45.zip
buffer_base: Add support for cached CPU writes
Some games usually write memory pages currently used by the GPU, causing rendering issues (e.g. flashing geometry and shadows on Link's Awakening). To workaround this issue, Guest CPU writes are delayed until the command buffer finishes processing, but the pages are updated immediately. The overall behavior is: - CPU writes are cached until they are flushed, they update the page state, but don't change the modification state. Cached writes stop pages from being flushed, in case games have meaningful data in it. - Command processing writes (e.g. push constants) update the page state and are marked to the command processor as dirty. They don't remove the state of cached writes.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h206
1 files changed, 145 insertions, 61 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..8a5e6a3e7 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
19 19
20enum class BufferFlagBits { 20enum class BufferFlagBits {
21 Picked = 1 << 0, 21 Picked = 1 << 0,
22 CachedWrites = 1 << 1,
22}; 23};
23DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) 24DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
24 25
@@ -40,7 +41,7 @@ class BufferBase {
40 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; 41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
41 42
42 /// Vector tracking modified pages tightly packed with small vector optimization 43 /// Vector tracking modified pages tightly packed with small vector optimization
43 union WrittenWords { 44 union WordsArray {
44 /// Returns the pointer to the words state 45 /// Returns the pointer to the words state
45 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { 46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
46 return is_short ? &stack : heap; 47 return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
55 u64* heap; ///< Not-small buffers pointer to the storage 56 u64* heap; ///< Not-small buffers pointer to the storage
56 }; 57 };
57 58
58 struct GpuCpuWords { 59 struct Words {
59 explicit GpuCpuWords() = default; 60 explicit Words() = default;
60 explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { 61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
61 if (IsShort()) { 62 if (IsShort()) {
62 cpu.stack = ~u64{0}; 63 cpu.stack = ~u64{0};
63 gpu.stack = 0; 64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
64 } else { 67 } else {
65 // Share allocation between CPU and GPU pages and set their default values 68 // Share allocation between CPU and GPU pages and set their default values
66 const size_t num_words = NumWords(); 69 const size_t num_words = NumWords();
67 u64* const alloc = new u64[num_words * 2]; 70 u64* const alloc = new u64[num_words * 4];
68 cpu.heap = alloc; 71 cpu.heap = alloc;
69 gpu.heap = alloc + num_words; 72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
70 std::fill_n(cpu.heap, num_words, ~u64{0}); 75 std::fill_n(cpu.heap, num_words, ~u64{0});
71 std::fill_n(gpu.heap, num_words, 0); 76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
72 } 79 }
73 // Clean up tailing bits 80 // Clean up tailing bits
74 const u64 last_local_page = 81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
75 Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); 82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
76 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; 83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
77 u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; 84 const u64 last_word = (~u64{0} << shift) >> shift;
78 last_word = (last_word << shift) >> shift; 85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
79 } 87 }
80 88
81 ~GpuCpuWords() { 89 ~Words() {
82 Release(); 90 Release();
83 } 91 }
84 92
85 GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { 93 Words& operator=(Words&& rhs) noexcept {
86 Release(); 94 Release();
87 size_bytes = rhs.size_bytes; 95 size_bytes = rhs.size_bytes;
88 cpu = rhs.cpu; 96 cpu = rhs.cpu;
89 gpu = rhs.gpu; 97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
90 rhs.cpu.heap = nullptr; 100 rhs.cpu.heap = nullptr;
91 return *this; 101 return *this;
92 } 102 }
93 103
94 GpuCpuWords(GpuCpuWords&& rhs) noexcept 104 Words(Words&& rhs) noexcept
95 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { 105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
96 rhs.cpu.heap = nullptr; 107 rhs.cpu.heap = nullptr;
97 } 108 }
98 109
99 GpuCpuWords& operator=(const GpuCpuWords&) = delete; 110 Words& operator=(const Words&) = delete;
100 GpuCpuWords(const GpuCpuWords&) = delete; 111 Words(const Words&) = delete;
101 112
102 /// Returns true when the buffer fits in the small vector optimization 113 /// Returns true when the buffer fits in the small vector optimization
103 [[nodiscard]] bool IsShort() const noexcept { 114 [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
118 } 129 }
119 130
120 u64 size_bytes = 0; 131 u64 size_bytes = 0;
121 WrittenWords cpu; 132 WordsArray cpu;
122 WrittenWords gpu; 133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
123 }; 143 };
124 144
125public: 145public:
@@ -132,68 +152,93 @@ public:
132 BufferBase& operator=(const BufferBase&) = delete; 152 BufferBase& operator=(const BufferBase&) = delete;
133 BufferBase(const BufferBase&) = delete; 153 BufferBase(const BufferBase&) = delete;
134 154
155 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default;
157
135 /// Returns the inclusive CPU modified range in a begin end pair 158 /// Returns the inclusive CPU modified range in a begin end pair
136 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, 159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
137 u64 query_size) const noexcept { 160 u64 query_size) const noexcept {
138 const u64 offset = query_cpu_addr - cpu_addr; 161 const u64 offset = query_cpu_addr - cpu_addr;
139 return ModifiedRegion<false>(offset, query_size); 162 return ModifiedRegion<Type::CPU>(offset, query_size);
140 } 163 }
141 164
142 /// Returns the inclusive GPU modified range in a begin end pair 165 /// Returns the inclusive GPU modified range in a begin end pair
143 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, 166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
144 u64 query_size) const noexcept { 167 u64 query_size) const noexcept {
145 const u64 offset = query_cpu_addr - cpu_addr; 168 const u64 offset = query_cpu_addr - cpu_addr;
146 return ModifiedRegion<true>(offset, query_size); 169 return ModifiedRegion<Type::GPU>(offset, query_size);
147 } 170 }
148 171
149 /// Returns true if a region has been modified from the CPU 172 /// Returns true if a region has been modified from the CPU
150 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
151 const u64 offset = query_cpu_addr - cpu_addr; 174 const u64 offset = query_cpu_addr - cpu_addr;
152 return IsRegionModified<false>(offset, query_size); 175 return IsRegionModified<Type::CPU>(offset, query_size);
153 } 176 }
154 177
155 /// Returns true if a region has been modified from the GPU 178 /// Returns true if a region has been modified from the GPU
156 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
157 const u64 offset = query_cpu_addr - cpu_addr; 180 const u64 offset = query_cpu_addr - cpu_addr;
158 return IsRegionModified<true>(offset, query_size); 181 return IsRegionModified<Type::GPU>(offset, query_size);
159 } 182 }
160 183
161 /// Mark region as CPU modified, notifying the rasterizer about this change 184 /// Mark region as CPU modified, notifying the rasterizer about this change
162 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
163 ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); 186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
164 } 187 }
165 188
166 /// Unmark region as CPU modified, notifying the rasterizer about this change 189 /// Unmark region as CPU modified, notifying the rasterizer about this change
167 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
168 ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); 191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
169 } 192 }
170 193
171 /// Mark region as modified from the host GPU 194 /// Mark region as modified from the host GPU
172 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
173 ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); 196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
174 } 197 }
175 198
176 /// Unmark region as modified from the host GPU 199 /// Unmark region as modified from the host GPU
177 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
178 ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); 201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 const u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 }
179 } 224 }
180 225
181 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified 226 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
182 template <typename Func> 227 template <typename Func>
183 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { 228 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
184 ForEachModifiedRange<false, true>(query_cpu_range, size, func); 229 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
185 } 230 }
186 231
187 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 232 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
188 template <typename Func> 233 template <typename Func>
189 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { 234 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
190 ForEachModifiedRange<true, false>(query_cpu_range, size, func); 235 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
191 } 236 }
192 237
193 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 238 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
194 template <typename Func> 239 template <typename Func>
195 void ForEachDownloadRange(Func&& func) { 240 void ForEachDownloadRange(Func&& func) {
196 ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); 241 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
197 } 242 }
198 243
199 /// Mark buffer as picked 244 /// Mark buffer as picked
@@ -216,6 +261,11 @@ public:
216 return True(flags & BufferFlagBits::Picked); 261 return True(flags & BufferFlagBits::Picked);
217 } 262 }
218 263
264 /// Returns true when the buffer has pending cached writes
265 [[nodiscard]] bool HasCachedWrites() const noexcept {
266 return True(flags & BufferFlagBits::CachedWrites);
267 }
268
219 /// Returns the base CPU address of the buffer 269 /// Returns the base CPU address of the buffer
220 [[nodiscard]] VAddr CpuAddr() const noexcept { 270 [[nodiscard]] VAddr CpuAddr() const noexcept {
221 return cpu_addr; 271 return cpu_addr;
@@ -233,26 +283,48 @@ public:
233 } 283 }
234 284
235private: 285private:
286 template <Type type>
287 u64* Array() noexcept {
288 if constexpr (type == Type::CPU) {
289 return words.cpu.Pointer(IsShort());
290 } else if constexpr (type == Type::GPU) {
291 return words.gpu.Pointer(IsShort());
292 } else if constexpr (type == Type::CachedCPU) {
293 return words.cached_cpu.Pointer(IsShort());
294 } else if constexpr (type == Type::Untracked) {
295 return words.untracked.Pointer(IsShort());
296 }
297 }
298
299 template <Type type>
300 const u64* Array() const noexcept {
301 if constexpr (type == Type::CPU) {
302 return words.cpu.Pointer(IsShort());
303 } else if constexpr (type == Type::GPU) {
304 return words.gpu.Pointer(IsShort());
305 } else if constexpr (type == Type::CachedCPU) {
306 return words.cached_cpu.Pointer(IsShort());
307 } else if constexpr (type == Type::Untracked) {
308 return words.untracked.Pointer(IsShort());
309 }
310 }
311
236 /** 312 /**
237 * Change the state of a range of pages 313 * Change the state of a range of pages
238 * 314 *
239 * @param written_words Pages to be marked or unmarked as modified
240 * @param dirty_addr Base address to mark or unmark as modified 315 * @param dirty_addr Base address to mark or unmark as modified
241 * @param size Size in bytes to mark or unmark as modified 316 * @param size Size in bytes to mark or unmark as modified
242 *
243 * @tparam enable True when the bits will be set to one, false for zero
244 * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
245 */ 317 */
246 template <bool enable, bool notify_rasterizer> 318 template <Type type, bool enable>
247 void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, 319 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
248 s64 size) noexcept(!notify_rasterizer) {
249 const s64 difference = dirty_addr - cpu_addr; 320 const s64 difference = dirty_addr - cpu_addr;
250 const u64 offset = std::max<s64>(difference, 0); 321 const u64 offset = std::max<s64>(difference, 0);
251 size += std::min<s64>(difference, 0); 322 size += std::min<s64>(difference, 0);
252 if (offset >= SizeBytes() || size < 0) { 323 if (offset >= SizeBytes() || size < 0) {
253 return; 324 return;
254 } 325 }
255 u64* const state_words = written_words.Pointer(IsShort()); 326 u64* const untracked_words = Array<Type::Untracked>();
327 u64* const state_words = Array<type>();
256 const u64 offset_end = std::min(offset + size, SizeBytes()); 328 const u64 offset_end = std::min(offset + size, SizeBytes());
257 const u64 begin_page_index = offset / BYTES_PER_PAGE; 329 const u64 begin_page_index = offset / BYTES_PER_PAGE;
258 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; 330 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +340,19 @@ private:
268 u64 bits = ~u64{0}; 340 u64 bits = ~u64{0};
269 bits = (bits >> right_offset) << right_offset; 341 bits = (bits >> right_offset) << right_offset;
270 bits = (bits << left_offset) >> left_offset; 342 bits = (bits << left_offset) >> left_offset;
271 if constexpr (notify_rasterizer) { 343 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
272 NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); 344 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
273 } 345 }
274 if constexpr (enable) { 346 if constexpr (enable) {
275 state_words[word_index] |= bits; 347 state_words[word_index] |= bits;
348 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
349 untracked_words[word_index] |= bits;
350 }
276 } else { 351 } else {
277 state_words[word_index] &= ~bits; 352 state_words[word_index] &= ~bits;
353 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
354 untracked_words[word_index] &= ~bits;
355 }
278 } 356 }
279 page_index = 0; 357 page_index = 0;
280 ++word_index; 358 ++word_index;
@@ -291,7 +369,7 @@ private:
291 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 369 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
292 */ 370 */
293 template <bool add_to_rasterizer> 371 template <bool add_to_rasterizer>
294 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { 372 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
295 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 373 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
296 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 374 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
297 while (changed_bits != 0) { 375 while (changed_bits != 0) {
@@ -315,21 +393,20 @@ private:
315 * @param query_cpu_range Base CPU address to loop over 393 * @param query_cpu_range Base CPU address to loop over
316 * @param size Size in bytes of the CPU range to loop over 394 * @param size Size in bytes of the CPU range to loop over
317 * @param func Function to call for each turned off region 395 * @param func Function to call for each turned off region
318 *
319 * @tparam gpu True for host GPU pages, false for CPU pages
320 * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
321 */ 396 */
322 template <bool gpu, bool notify_rasterizer, typename Func> 397 template <Type type, typename Func>
323 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { 398 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
399 static_assert(type != Type::Untracked);
400
324 const s64 difference = query_cpu_range - cpu_addr; 401 const s64 difference = query_cpu_range - cpu_addr;
325 const u64 query_begin = std::max<s64>(difference, 0); 402 const u64 query_begin = std::max<s64>(difference, 0);
326 size += std::min<s64>(difference, 0); 403 size += std::min<s64>(difference, 0);
327 if (query_begin >= SizeBytes() || size < 0) { 404 if (query_begin >= SizeBytes() || size < 0) {
328 return; 405 return;
329 } 406 }
330 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 407 u64* const untracked_words = Array<Type::Untracked>();
408 u64* const state_words = Array<type>();
331 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 409 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
332 u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
333 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 410 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
334 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); 411 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
335 412
@@ -345,7 +422,8 @@ private:
345 const u64 word_index_end = std::distance(state_words, last_modified_word); 422 const u64 word_index_end = std::distance(state_words, last_modified_word);
346 423
347 const unsigned local_page_begin = std::countr_zero(*first_modified_word); 424 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
348 const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); 425 const unsigned local_page_end =
426 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
349 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; 427 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
350 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; 428 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
351 const u64 query_page_begin = query_begin / BYTES_PER_PAGE; 429 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +449,13 @@ private:
371 const u64 current_word = state_words[word_index] & bits; 449 const u64 current_word = state_words[word_index] & bits;
372 state_words[word_index] &= ~bits; 450 state_words[word_index] &= ~bits;
373 451
374 // Exclude CPU modified pages when visiting GPU pages 452 if constexpr (type == Type::CPU) {
375 const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); 453 const u64 current_bits = untracked_words[word_index] & bits;
376 if constexpr (notify_rasterizer) { 454 untracked_words[word_index] &= ~bits;
377 NotifyRasterizer<true>(word_index, word, ~u64{0}); 455 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
378 } 456 }
457 // Exclude CPU modified pages when visiting GPU pages
458 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
379 u64 page = page_begin; 459 u64 page = page_begin;
380 page_begin = 0; 460 page_begin = 0;
381 461
@@ -416,17 +496,20 @@ private:
416 * @param offset Offset in bytes from the start of the buffer 496 * @param offset Offset in bytes from the start of the buffer
417 * @param size Size in bytes of the region to query for modifications 497 * @param size Size in bytes of the region to query for modifications
418 */ 498 */
419 template <bool gpu> 499 template <Type type>
420 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 500 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
421 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 501 static_assert(type != Type::Untracked);
422 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 502
503 const u64* const untracked_words = Array<Type::Untracked>();
504 const u64* const state_words = Array<type>();
423 const u64 num_query_words = size / BYTES_PER_WORD + 1; 505 const u64 num_query_words = size / BYTES_PER_WORD + 1;
424 const u64 word_begin = offset / BYTES_PER_WORD; 506 const u64 word_begin = offset / BYTES_PER_WORD;
425 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 507 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
426 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 508 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
427 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 509 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
428 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 510 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
429 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 511 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
512 const u64 word = state_words[word_index] & ~off_word;
430 if (word == 0) { 513 if (word == 0) {
431 continue; 514 continue;
432 } 515 }
@@ -445,13 +528,13 @@ private:
445 * 528 *
446 * @param offset Offset in bytes from the start of the buffer 529 * @param offset Offset in bytes from the start of the buffer
447 * @param size Size in bytes of the region to query for modifications 530 * @param size Size in bytes of the region to query for modifications
448 *
449 * @tparam gpu True to query GPU modified pages, false for CPU pages
450 */ 531 */
451 template <bool gpu> 532 template <Type type>
452 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 533 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
453 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 534 static_assert(type != Type::Untracked);
454 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 535
536 const u64* const untracked_words = Array<Type::Untracked>();
537 const u64* const state_words = Array<type>();
455 const u64 num_query_words = size / BYTES_PER_WORD + 1; 538 const u64 num_query_words = size / BYTES_PER_WORD + 1;
456 const u64 word_begin = offset / BYTES_PER_WORD; 539 const u64 word_begin = offset / BYTES_PER_WORD;
457 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 540 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +543,8 @@ private:
460 u64 begin = std::numeric_limits<u64>::max(); 543 u64 begin = std::numeric_limits<u64>::max();
461 u64 end = 0; 544 u64 end = 0;
462 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 545 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
463 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 546 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
547 const u64 word = state_words[word_index] & ~off_word;
464 if (word == 0) { 548 if (word == 0) {
465 continue; 549 continue;
466 } 550 }
@@ -488,7 +572,7 @@ private:
488 572
489 RasterizerInterface* rasterizer = nullptr; 573 RasterizerInterface* rasterizer = nullptr;
490 VAddr cpu_addr = 0; 574 VAddr cpu_addr = 0;
491 GpuCpuWords words; 575 Words words;
492 BufferFlagBits flags{}; 576 BufferFlagBits flags{};
493}; 577};
494 578