summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/buffer_cache/buffer_base.h206
1 files changed, 145 insertions, 61 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index ee8602ce9..8a5e6a3e7 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -19,6 +19,7 @@ namespace VideoCommon {
19 19
20enum class BufferFlagBits { 20enum class BufferFlagBits {
21 Picked = 1 << 0, 21 Picked = 1 << 0,
22 CachedWrites = 1 << 1,
22}; 23};
23DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) 24DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits)
24 25
@@ -40,7 +41,7 @@ class BufferBase {
40 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; 41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
41 42
42 /// Vector tracking modified pages tightly packed with small vector optimization 43 /// Vector tracking modified pages tightly packed with small vector optimization
43 union WrittenWords { 44 union WordsArray {
44 /// Returns the pointer to the words state 45 /// Returns the pointer to the words state
45 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { 46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
46 return is_short ? &stack : heap; 47 return is_short ? &stack : heap;
@@ -55,49 +56,59 @@ class BufferBase {
55 u64* heap; ///< Not-small buffers pointer to the storage 56 u64* heap; ///< Not-small buffers pointer to the storage
56 }; 57 };
57 58
58 struct GpuCpuWords { 59 struct Words {
59 explicit GpuCpuWords() = default; 60 explicit Words() = default;
60 explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { 61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
61 if (IsShort()) { 62 if (IsShort()) {
62 cpu.stack = ~u64{0}; 63 cpu.stack = ~u64{0};
63 gpu.stack = 0; 64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
64 } else { 67 } else {
65 // Share allocation between CPU and GPU pages and set their default values 68 // Share allocation between CPU and GPU pages and set their default values
66 const size_t num_words = NumWords(); 69 const size_t num_words = NumWords();
67 u64* const alloc = new u64[num_words * 2]; 70 u64* const alloc = new u64[num_words * 4];
68 cpu.heap = alloc; 71 cpu.heap = alloc;
69 gpu.heap = alloc + num_words; 72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
70 std::fill_n(cpu.heap, num_words, ~u64{0}); 75 std::fill_n(cpu.heap, num_words, ~u64{0});
71 std::fill_n(gpu.heap, num_words, 0); 76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
72 } 79 }
73 // Clean up tailing bits 80 // Clean up tailing bits
74 const u64 last_local_page = 81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
75 Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); 82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
76 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; 83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
77 u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; 84 const u64 last_word = (~u64{0} << shift) >> shift;
78 last_word = (last_word << shift) >> shift; 85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
79 } 87 }
80 88
81 ~GpuCpuWords() { 89 ~Words() {
82 Release(); 90 Release();
83 } 91 }
84 92
85 GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { 93 Words& operator=(Words&& rhs) noexcept {
86 Release(); 94 Release();
87 size_bytes = rhs.size_bytes; 95 size_bytes = rhs.size_bytes;
88 cpu = rhs.cpu; 96 cpu = rhs.cpu;
89 gpu = rhs.gpu; 97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
90 rhs.cpu.heap = nullptr; 100 rhs.cpu.heap = nullptr;
91 return *this; 101 return *this;
92 } 102 }
93 103
94 GpuCpuWords(GpuCpuWords&& rhs) noexcept 104 Words(Words&& rhs) noexcept
95 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { 105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
96 rhs.cpu.heap = nullptr; 107 rhs.cpu.heap = nullptr;
97 } 108 }
98 109
99 GpuCpuWords& operator=(const GpuCpuWords&) = delete; 110 Words& operator=(const Words&) = delete;
100 GpuCpuWords(const GpuCpuWords&) = delete; 111 Words(const Words&) = delete;
101 112
102 /// Returns true when the buffer fits in the small vector optimization 113 /// Returns true when the buffer fits in the small vector optimization
103 [[nodiscard]] bool IsShort() const noexcept { 114 [[nodiscard]] bool IsShort() const noexcept {
@@ -118,8 +129,17 @@ class BufferBase {
118 } 129 }
119 130
120 u64 size_bytes = 0; 131 u64 size_bytes = 0;
121 WrittenWords cpu; 132 WordsArray cpu;
122 WrittenWords gpu; 133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
123 }; 143 };
124 144
125public: 145public:
@@ -132,68 +152,93 @@ public:
132 BufferBase& operator=(const BufferBase&) = delete; 152 BufferBase& operator=(const BufferBase&) = delete;
133 BufferBase(const BufferBase&) = delete; 153 BufferBase(const BufferBase&) = delete;
134 154
155 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default;
157
135 /// Returns the inclusive CPU modified range in a begin end pair 158 /// Returns the inclusive CPU modified range in a begin end pair
136 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, 159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
137 u64 query_size) const noexcept { 160 u64 query_size) const noexcept {
138 const u64 offset = query_cpu_addr - cpu_addr; 161 const u64 offset = query_cpu_addr - cpu_addr;
139 return ModifiedRegion<false>(offset, query_size); 162 return ModifiedRegion<Type::CPU>(offset, query_size);
140 } 163 }
141 164
142 /// Returns the inclusive GPU modified range in a begin end pair 165 /// Returns the inclusive GPU modified range in a begin end pair
143 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, 166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
144 u64 query_size) const noexcept { 167 u64 query_size) const noexcept {
145 const u64 offset = query_cpu_addr - cpu_addr; 168 const u64 offset = query_cpu_addr - cpu_addr;
146 return ModifiedRegion<true>(offset, query_size); 169 return ModifiedRegion<Type::GPU>(offset, query_size);
147 } 170 }
148 171
149 /// Returns true if a region has been modified from the CPU 172 /// Returns true if a region has been modified from the CPU
150 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
151 const u64 offset = query_cpu_addr - cpu_addr; 174 const u64 offset = query_cpu_addr - cpu_addr;
152 return IsRegionModified<false>(offset, query_size); 175 return IsRegionModified<Type::CPU>(offset, query_size);
153 } 176 }
154 177
155 /// Returns true if a region has been modified from the GPU 178 /// Returns true if a region has been modified from the GPU
156 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { 179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
157 const u64 offset = query_cpu_addr - cpu_addr; 180 const u64 offset = query_cpu_addr - cpu_addr;
158 return IsRegionModified<true>(offset, query_size); 181 return IsRegionModified<Type::GPU>(offset, query_size);
159 } 182 }
160 183
161 /// Mark region as CPU modified, notifying the rasterizer about this change 184 /// Mark region as CPU modified, notifying the rasterizer about this change
162 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
163 ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); 186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
164 } 187 }
165 188
166 /// Unmark region as CPU modified, notifying the rasterizer about this change 189 /// Unmark region as CPU modified, notifying the rasterizer about this change
167 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { 190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
168 ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); 191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
169 } 192 }
170 193
171 /// Mark region as modified from the host GPU 194 /// Mark region as modified from the host GPU
172 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
173 ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); 196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
174 } 197 }
175 198
176 /// Unmark region as modified from the host GPU 199 /// Unmark region as modified from the host GPU
177 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { 200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
178 ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); 201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 const u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 }
179 } 224 }
180 225
181 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified 226 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
182 template <typename Func> 227 template <typename Func>
183 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { 228 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
184 ForEachModifiedRange<false, true>(query_cpu_range, size, func); 229 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
185 } 230 }
186 231
187 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 232 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
188 template <typename Func> 233 template <typename Func>
189 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { 234 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
190 ForEachModifiedRange<true, false>(query_cpu_range, size, func); 235 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
191 } 236 }
192 237
193 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified 238 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
194 template <typename Func> 239 template <typename Func>
195 void ForEachDownloadRange(Func&& func) { 240 void ForEachDownloadRange(Func&& func) {
196 ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); 241 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
197 } 242 }
198 243
199 /// Mark buffer as picked 244 /// Mark buffer as picked
@@ -216,6 +261,11 @@ public:
216 return True(flags & BufferFlagBits::Picked); 261 return True(flags & BufferFlagBits::Picked);
217 } 262 }
218 263
264 /// Returns true when the buffer has pending cached writes
265 [[nodiscard]] bool HasCachedWrites() const noexcept {
266 return True(flags & BufferFlagBits::CachedWrites);
267 }
268
219 /// Returns the base CPU address of the buffer 269 /// Returns the base CPU address of the buffer
220 [[nodiscard]] VAddr CpuAddr() const noexcept { 270 [[nodiscard]] VAddr CpuAddr() const noexcept {
221 return cpu_addr; 271 return cpu_addr;
@@ -233,26 +283,48 @@ public:
233 } 283 }
234 284
235private: 285private:
286 template <Type type>
287 u64* Array() noexcept {
288 if constexpr (type == Type::CPU) {
289 return words.cpu.Pointer(IsShort());
290 } else if constexpr (type == Type::GPU) {
291 return words.gpu.Pointer(IsShort());
292 } else if constexpr (type == Type::CachedCPU) {
293 return words.cached_cpu.Pointer(IsShort());
294 } else if constexpr (type == Type::Untracked) {
295 return words.untracked.Pointer(IsShort());
296 }
297 }
298
299 template <Type type>
300 const u64* Array() const noexcept {
301 if constexpr (type == Type::CPU) {
302 return words.cpu.Pointer(IsShort());
303 } else if constexpr (type == Type::GPU) {
304 return words.gpu.Pointer(IsShort());
305 } else if constexpr (type == Type::CachedCPU) {
306 return words.cached_cpu.Pointer(IsShort());
307 } else if constexpr (type == Type::Untracked) {
308 return words.untracked.Pointer(IsShort());
309 }
310 }
311
236 /** 312 /**
237 * Change the state of a range of pages 313 * Change the state of a range of pages
238 * 314 *
239 * @param written_words Pages to be marked or unmarked as modified
240 * @param dirty_addr Base address to mark or unmark as modified 315 * @param dirty_addr Base address to mark or unmark as modified
241 * @param size Size in bytes to mark or unmark as modified 316 * @param size Size in bytes to mark or unmark as modified
242 *
243 * @tparam enable True when the bits will be set to one, false for zero
244 * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes
245 */ 317 */
246 template <bool enable, bool notify_rasterizer> 318 template <Type type, bool enable>
247 void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, 319 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
248 s64 size) noexcept(!notify_rasterizer) {
249 const s64 difference = dirty_addr - cpu_addr; 320 const s64 difference = dirty_addr - cpu_addr;
250 const u64 offset = std::max<s64>(difference, 0); 321 const u64 offset = std::max<s64>(difference, 0);
251 size += std::min<s64>(difference, 0); 322 size += std::min<s64>(difference, 0);
252 if (offset >= SizeBytes() || size < 0) { 323 if (offset >= SizeBytes() || size < 0) {
253 return; 324 return;
254 } 325 }
255 u64* const state_words = written_words.Pointer(IsShort()); 326 u64* const untracked_words = Array<Type::Untracked>();
327 u64* const state_words = Array<type>();
256 const u64 offset_end = std::min(offset + size, SizeBytes()); 328 const u64 offset_end = std::min(offset + size, SizeBytes());
257 const u64 begin_page_index = offset / BYTES_PER_PAGE; 329 const u64 begin_page_index = offset / BYTES_PER_PAGE;
258 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; 330 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
@@ -268,13 +340,19 @@ private:
268 u64 bits = ~u64{0}; 340 u64 bits = ~u64{0};
269 bits = (bits >> right_offset) << right_offset; 341 bits = (bits >> right_offset) << right_offset;
270 bits = (bits << left_offset) >> left_offset; 342 bits = (bits << left_offset) >> left_offset;
271 if constexpr (notify_rasterizer) { 343 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
272 NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); 344 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
273 } 345 }
274 if constexpr (enable) { 346 if constexpr (enable) {
275 state_words[word_index] |= bits; 347 state_words[word_index] |= bits;
348 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
349 untracked_words[word_index] |= bits;
350 }
276 } else { 351 } else {
277 state_words[word_index] &= ~bits; 352 state_words[word_index] &= ~bits;
353 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
354 untracked_words[word_index] &= ~bits;
355 }
278 } 356 }
279 page_index = 0; 357 page_index = 0;
280 ++word_index; 358 ++word_index;
@@ -291,7 +369,7 @@ private:
291 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages 369 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
292 */ 370 */
293 template <bool add_to_rasterizer> 371 template <bool add_to_rasterizer>
294 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { 372 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
295 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; 373 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
296 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; 374 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
297 while (changed_bits != 0) { 375 while (changed_bits != 0) {
@@ -315,21 +393,20 @@ private:
315 * @param query_cpu_range Base CPU address to loop over 393 * @param query_cpu_range Base CPU address to loop over
316 * @param size Size in bytes of the CPU range to loop over 394 * @param size Size in bytes of the CPU range to loop over
317 * @param func Function to call for each turned off region 395 * @param func Function to call for each turned off region
318 *
319 * @tparam gpu True for host GPU pages, false for CPU pages
320 * @tparam notify_rasterizer True when the rasterizer should be notified about state changes
321 */ 396 */
322 template <bool gpu, bool notify_rasterizer, typename Func> 397 template <Type type, typename Func>
323 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { 398 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
399 static_assert(type != Type::Untracked);
400
324 const s64 difference = query_cpu_range - cpu_addr; 401 const s64 difference = query_cpu_range - cpu_addr;
325 const u64 query_begin = std::max<s64>(difference, 0); 402 const u64 query_begin = std::max<s64>(difference, 0);
326 size += std::min<s64>(difference, 0); 403 size += std::min<s64>(difference, 0);
327 if (query_begin >= SizeBytes() || size < 0) { 404 if (query_begin >= SizeBytes() || size < 0) {
328 return; 405 return;
329 } 406 }
330 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 407 u64* const untracked_words = Array<Type::Untracked>();
408 u64* const state_words = Array<type>();
331 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 409 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
332 u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort());
333 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 410 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
334 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); 411 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
335 412
@@ -345,7 +422,8 @@ private:
345 const u64 word_index_end = std::distance(state_words, last_modified_word); 422 const u64 word_index_end = std::distance(state_words, last_modified_word);
346 423
347 const unsigned local_page_begin = std::countr_zero(*first_modified_word); 424 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
348 const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); 425 const unsigned local_page_end =
426 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
349 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; 427 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
350 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; 428 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
351 const u64 query_page_begin = query_begin / BYTES_PER_PAGE; 429 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
@@ -371,11 +449,13 @@ private:
371 const u64 current_word = state_words[word_index] & bits; 449 const u64 current_word = state_words[word_index] & bits;
372 state_words[word_index] &= ~bits; 450 state_words[word_index] &= ~bits;
373 451
374 // Exclude CPU modified pages when visiting GPU pages 452 if constexpr (type == Type::CPU) {
375 const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); 453 const u64 current_bits = untracked_words[word_index] & bits;
376 if constexpr (notify_rasterizer) { 454 untracked_words[word_index] &= ~bits;
377 NotifyRasterizer<true>(word_index, word, ~u64{0}); 455 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
378 } 456 }
457 // Exclude CPU modified pages when visiting GPU pages
458 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
379 u64 page = page_begin; 459 u64 page = page_begin;
380 page_begin = 0; 460 page_begin = 0;
381 461
@@ -416,17 +496,20 @@ private:
416 * @param offset Offset in bytes from the start of the buffer 496 * @param offset Offset in bytes from the start of the buffer
417 * @param size Size in bytes of the region to query for modifications 497 * @param size Size in bytes of the region to query for modifications
418 */ 498 */
419 template <bool gpu> 499 template <Type type>
420 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 500 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
421 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 501 static_assert(type != Type::Untracked);
422 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 502
503 const u64* const untracked_words = Array<Type::Untracked>();
504 const u64* const state_words = Array<type>();
423 const u64 num_query_words = size / BYTES_PER_WORD + 1; 505 const u64 num_query_words = size / BYTES_PER_WORD + 1;
424 const u64 word_begin = offset / BYTES_PER_WORD; 506 const u64 word_begin = offset / BYTES_PER_WORD;
425 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 507 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
426 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 508 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
427 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 509 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
428 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 510 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
429 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 511 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
512 const u64 word = state_words[word_index] & ~off_word;
430 if (word == 0) { 513 if (word == 0) {
431 continue; 514 continue;
432 } 515 }
@@ -445,13 +528,13 @@ private:
445 * 528 *
446 * @param offset Offset in bytes from the start of the buffer 529 * @param offset Offset in bytes from the start of the buffer
447 * @param size Size in bytes of the region to query for modifications 530 * @param size Size in bytes of the region to query for modifications
448 *
449 * @tparam gpu True to query GPU modified pages, false for CPU pages
450 */ 531 */
451 template <bool gpu> 532 template <Type type>
452 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 533 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
453 const u64* const cpu_words = words.cpu.Pointer(IsShort()); 534 static_assert(type != Type::Untracked);
454 const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); 535
536 const u64* const untracked_words = Array<Type::Untracked>();
537 const u64* const state_words = Array<type>();
455 const u64 num_query_words = size / BYTES_PER_WORD + 1; 538 const u64 num_query_words = size / BYTES_PER_WORD + 1;
456 const u64 word_begin = offset / BYTES_PER_WORD; 539 const u64 word_begin = offset / BYTES_PER_WORD;
457 const u64 word_end = std::min(word_begin + num_query_words, NumWords()); 540 const u64 word_end = std::min(word_begin + num_query_words, NumWords());
@@ -460,7 +543,8 @@ private:
460 u64 begin = std::numeric_limits<u64>::max(); 543 u64 begin = std::numeric_limits<u64>::max();
461 u64 end = 0; 544 u64 end = 0;
462 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 545 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
463 const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); 546 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
547 const u64 word = state_words[word_index] & ~off_word;
464 if (word == 0) { 548 if (word == 0) {
465 continue; 549 continue;
466 } 550 }
@@ -488,7 +572,7 @@ private:
488 572
489 RasterizerInterface* rasterizer = nullptr; 573 RasterizerInterface* rasterizer = nullptr;
490 VAddr cpu_addr = 0; 574 VAddr cpu_addr = 0;
491 GpuCpuWords words; 575 Words words;
492 BufferFlagBits flags{}; 576 BufferFlagBits flags{};
493}; 577};
494 578