diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 206 |
1 files changed, 145 insertions, 61 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index ee8602ce9..8a5e6a3e7 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -19,6 +19,7 @@ namespace VideoCommon { | |||
| 19 | 19 | ||
| 20 | enum class BufferFlagBits { | 20 | enum class BufferFlagBits { |
| 21 | Picked = 1 << 0, | 21 | Picked = 1 << 0, |
| 22 | CachedWrites = 1 << 1, | ||
| 22 | }; | 23 | }; |
| 23 | DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) | 24 | DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) |
| 24 | 25 | ||
| @@ -40,7 +41,7 @@ class BufferBase { | |||
| 40 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | 41 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; |
| 41 | 42 | ||
| 42 | /// Vector tracking modified pages tightly packed with small vector optimization | 43 | /// Vector tracking modified pages tightly packed with small vector optimization |
| 43 | union WrittenWords { | 44 | union WordsArray { |
| 44 | /// Returns the pointer to the words state | 45 | /// Returns the pointer to the words state |
| 45 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | 46 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { |
| 46 | return is_short ? &stack : heap; | 47 | return is_short ? &stack : heap; |
| @@ -55,49 +56,59 @@ class BufferBase { | |||
| 55 | u64* heap; ///< Not-small buffers pointer to the storage | 56 | u64* heap; ///< Not-small buffers pointer to the storage |
| 56 | }; | 57 | }; |
| 57 | 58 | ||
| 58 | struct GpuCpuWords { | 59 | struct Words { |
| 59 | explicit GpuCpuWords() = default; | 60 | explicit Words() = default; |
| 60 | explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { | 61 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { |
| 61 | if (IsShort()) { | 62 | if (IsShort()) { |
| 62 | cpu.stack = ~u64{0}; | 63 | cpu.stack = ~u64{0}; |
| 63 | gpu.stack = 0; | 64 | gpu.stack = 0; |
| 65 | cached_cpu.stack = 0; | ||
| 66 | untracked.stack = ~u64{0}; | ||
| 64 | } else { | 67 | } else { |
| 65 | // Share allocation between CPU and GPU pages and set their default values | 68 | // Share allocation between CPU and GPU pages and set their default values |
| 66 | const size_t num_words = NumWords(); | 69 | const size_t num_words = NumWords(); |
| 67 | u64* const alloc = new u64[num_words * 2]; | 70 | u64* const alloc = new u64[num_words * 4]; |
| 68 | cpu.heap = alloc; | 71 | cpu.heap = alloc; |
| 69 | gpu.heap = alloc + num_words; | 72 | gpu.heap = alloc + num_words; |
| 73 | cached_cpu.heap = alloc + num_words * 2; | ||
| 74 | untracked.heap = alloc + num_words * 3; | ||
| 70 | std::fill_n(cpu.heap, num_words, ~u64{0}); | 75 | std::fill_n(cpu.heap, num_words, ~u64{0}); |
| 71 | std::fill_n(gpu.heap, num_words, 0); | 76 | std::fill_n(gpu.heap, num_words, 0); |
| 77 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 78 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 72 | } | 79 | } |
| 73 | // Clean up tailing bits | 80 | // Clean up tailing bits |
| 74 | const u64 last_local_page = | 81 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; |
| 75 | Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); | 82 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); |
| 76 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | 83 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; |
| 77 | u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; | 84 | const u64 last_word = (~u64{0} << shift) >> shift; |
| 78 | last_word = (last_word << shift) >> shift; | 85 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; |
| 86 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 79 | } | 87 | } |
| 80 | 88 | ||
| 81 | ~GpuCpuWords() { | 89 | ~Words() { |
| 82 | Release(); | 90 | Release(); |
| 83 | } | 91 | } |
| 84 | 92 | ||
| 85 | GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { | 93 | Words& operator=(Words&& rhs) noexcept { |
| 86 | Release(); | 94 | Release(); |
| 87 | size_bytes = rhs.size_bytes; | 95 | size_bytes = rhs.size_bytes; |
| 88 | cpu = rhs.cpu; | 96 | cpu = rhs.cpu; |
| 89 | gpu = rhs.gpu; | 97 | gpu = rhs.gpu; |
| 98 | cached_cpu = rhs.cached_cpu; | ||
| 99 | untracked = rhs.untracked; | ||
| 90 | rhs.cpu.heap = nullptr; | 100 | rhs.cpu.heap = nullptr; |
| 91 | return *this; | 101 | return *this; |
| 92 | } | 102 | } |
| 93 | 103 | ||
| 94 | GpuCpuWords(GpuCpuWords&& rhs) noexcept | 104 | Words(Words&& rhs) noexcept |
| 95 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { | 105 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, |
| 106 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 96 | rhs.cpu.heap = nullptr; | 107 | rhs.cpu.heap = nullptr; |
| 97 | } | 108 | } |
| 98 | 109 | ||
| 99 | GpuCpuWords& operator=(const GpuCpuWords&) = delete; | 110 | Words& operator=(const Words&) = delete; |
| 100 | GpuCpuWords(const GpuCpuWords&) = delete; | 111 | Words(const Words&) = delete; |
| 101 | 112 | ||
| 102 | /// Returns true when the buffer fits in the small vector optimization | 113 | /// Returns true when the buffer fits in the small vector optimization |
| 103 | [[nodiscard]] bool IsShort() const noexcept { | 114 | [[nodiscard]] bool IsShort() const noexcept { |
| @@ -118,8 +129,17 @@ class BufferBase { | |||
| 118 | } | 129 | } |
| 119 | 130 | ||
| 120 | u64 size_bytes = 0; | 131 | u64 size_bytes = 0; |
| 121 | WrittenWords cpu; | 132 | WordsArray cpu; |
| 122 | WrittenWords gpu; | 133 | WordsArray gpu; |
| 134 | WordsArray cached_cpu; | ||
| 135 | WordsArray untracked; | ||
| 136 | }; | ||
| 137 | |||
| 138 | enum class Type { | ||
| 139 | CPU, | ||
| 140 | GPU, | ||
| 141 | CachedCPU, | ||
| 142 | Untracked, | ||
| 123 | }; | 143 | }; |
| 124 | 144 | ||
| 125 | public: | 145 | public: |
| @@ -132,68 +152,93 @@ public: | |||
| 132 | BufferBase& operator=(const BufferBase&) = delete; | 152 | BufferBase& operator=(const BufferBase&) = delete; |
| 133 | BufferBase(const BufferBase&) = delete; | 153 | BufferBase(const BufferBase&) = delete; |
| 134 | 154 | ||
| 155 | BufferBase& operator=(BufferBase&&) = default; | ||
| 156 | BufferBase(BufferBase&&) = default; | ||
| 157 | |||
| 135 | /// Returns the inclusive CPU modified range in a begin end pair | 158 | /// Returns the inclusive CPU modified range in a begin end pair |
| 136 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | 159 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, |
| 137 | u64 query_size) const noexcept { | 160 | u64 query_size) const noexcept { |
| 138 | const u64 offset = query_cpu_addr - cpu_addr; | 161 | const u64 offset = query_cpu_addr - cpu_addr; |
| 139 | return ModifiedRegion<false>(offset, query_size); | 162 | return ModifiedRegion<Type::CPU>(offset, query_size); |
| 140 | } | 163 | } |
| 141 | 164 | ||
| 142 | /// Returns the inclusive GPU modified range in a begin end pair | 165 | /// Returns the inclusive GPU modified range in a begin end pair |
| 143 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | 166 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, |
| 144 | u64 query_size) const noexcept { | 167 | u64 query_size) const noexcept { |
| 145 | const u64 offset = query_cpu_addr - cpu_addr; | 168 | const u64 offset = query_cpu_addr - cpu_addr; |
| 146 | return ModifiedRegion<true>(offset, query_size); | 169 | return ModifiedRegion<Type::GPU>(offset, query_size); |
| 147 | } | 170 | } |
| 148 | 171 | ||
| 149 | /// Returns true if a region has been modified from the CPU | 172 | /// Returns true if a region has been modified from the CPU |
| 150 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { |
| 151 | const u64 offset = query_cpu_addr - cpu_addr; | 174 | const u64 offset = query_cpu_addr - cpu_addr; |
| 152 | return IsRegionModified<false>(offset, query_size); | 175 | return IsRegionModified<Type::CPU>(offset, query_size); |
| 153 | } | 176 | } |
| 154 | 177 | ||
| 155 | /// Returns true if a region has been modified from the GPU | 178 | /// Returns true if a region has been modified from the GPU |
| 156 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | 179 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { |
| 157 | const u64 offset = query_cpu_addr - cpu_addr; | 180 | const u64 offset = query_cpu_addr - cpu_addr; |
| 158 | return IsRegionModified<true>(offset, query_size); | 181 | return IsRegionModified<Type::GPU>(offset, query_size); |
| 159 | } | 182 | } |
| 160 | 183 | ||
| 161 | /// Mark region as CPU modified, notifying the rasterizer about this change | 184 | /// Mark region as CPU modified, notifying the rasterizer about this change |
| 162 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | 185 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { |
| 163 | ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); | 186 | ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); |
| 164 | } | 187 | } |
| 165 | 188 | ||
| 166 | /// Unmark region as CPU modified, notifying the rasterizer about this change | 189 | /// Unmark region as CPU modified, notifying the rasterizer about this change |
| 167 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | 190 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { |
| 168 | ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); | 191 | ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); |
| 169 | } | 192 | } |
| 170 | 193 | ||
| 171 | /// Mark region as modified from the host GPU | 194 | /// Mark region as modified from the host GPU |
| 172 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | 195 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { |
| 173 | ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); | 196 | ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); |
| 174 | } | 197 | } |
| 175 | 198 | ||
| 176 | /// Unmark region as modified from the host GPU | 199 | /// Unmark region as modified from the host GPU |
| 177 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | 200 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { |
| 178 | ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); | 201 | ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); |
| 202 | } | ||
| 203 | |||
| 204 | /// Mark region as modified from the CPU | ||
| 205 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 206 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||
| 207 | flags |= BufferFlagBits::CachedWrites; | ||
| 208 | ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 212 | void FlushCachedWrites() noexcept { | ||
| 213 | flags &= ~BufferFlagBits::CachedWrites; | ||
| 214 | const u64 num_words = NumWords(); | ||
| 215 | const u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 216 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 217 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 218 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 219 | const u64 cached_bits = cached_words[word_index]; | ||
| 220 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 221 | untracked_words[word_index] |= cached_bits; | ||
| 222 | cpu_words[word_index] |= cached_bits; | ||
| 223 | } | ||
| 179 | } | 224 | } |
| 180 | 225 | ||
| 181 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | 226 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified |
| 182 | template <typename Func> | 227 | template <typename Func> |
| 183 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 228 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { |
| 184 | ForEachModifiedRange<false, true>(query_cpu_range, size, func); | 229 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); |
| 185 | } | 230 | } |
| 186 | 231 | ||
| 187 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 232 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
| 188 | template <typename Func> | 233 | template <typename Func> |
| 189 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 234 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { |
| 190 | ForEachModifiedRange<true, false>(query_cpu_range, size, func); | 235 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); |
| 191 | } | 236 | } |
| 192 | 237 | ||
| 193 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 238 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
| 194 | template <typename Func> | 239 | template <typename Func> |
| 195 | void ForEachDownloadRange(Func&& func) { | 240 | void ForEachDownloadRange(Func&& func) { |
| 196 | ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); | 241 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); |
| 197 | } | 242 | } |
| 198 | 243 | ||
| 199 | /// Mark buffer as picked | 244 | /// Mark buffer as picked |
| @@ -216,6 +261,11 @@ public: | |||
| 216 | return True(flags & BufferFlagBits::Picked); | 261 | return True(flags & BufferFlagBits::Picked); |
| 217 | } | 262 | } |
| 218 | 263 | ||
| 264 | /// Returns true when the buffer has pending cached writes | ||
| 265 | [[nodiscard]] bool HasCachedWrites() const noexcept { | ||
| 266 | return True(flags & BufferFlagBits::CachedWrites); | ||
| 267 | } | ||
| 268 | |||
| 219 | /// Returns the base CPU address of the buffer | 269 | /// Returns the base CPU address of the buffer |
| 220 | [[nodiscard]] VAddr CpuAddr() const noexcept { | 270 | [[nodiscard]] VAddr CpuAddr() const noexcept { |
| 221 | return cpu_addr; | 271 | return cpu_addr; |
| @@ -233,26 +283,48 @@ public: | |||
| 233 | } | 283 | } |
| 234 | 284 | ||
| 235 | private: | 285 | private: |
| 286 | template <Type type> | ||
| 287 | u64* Array() noexcept { | ||
| 288 | if constexpr (type == Type::CPU) { | ||
| 289 | return words.cpu.Pointer(IsShort()); | ||
| 290 | } else if constexpr (type == Type::GPU) { | ||
| 291 | return words.gpu.Pointer(IsShort()); | ||
| 292 | } else if constexpr (type == Type::CachedCPU) { | ||
| 293 | return words.cached_cpu.Pointer(IsShort()); | ||
| 294 | } else if constexpr (type == Type::Untracked) { | ||
| 295 | return words.untracked.Pointer(IsShort()); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | template <Type type> | ||
| 300 | const u64* Array() const noexcept { | ||
| 301 | if constexpr (type == Type::CPU) { | ||
| 302 | return words.cpu.Pointer(IsShort()); | ||
| 303 | } else if constexpr (type == Type::GPU) { | ||
| 304 | return words.gpu.Pointer(IsShort()); | ||
| 305 | } else if constexpr (type == Type::CachedCPU) { | ||
| 306 | return words.cached_cpu.Pointer(IsShort()); | ||
| 307 | } else if constexpr (type == Type::Untracked) { | ||
| 308 | return words.untracked.Pointer(IsShort()); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | |||
| 236 | /** | 312 | /** |
| 237 | * Change the state of a range of pages | 313 | * Change the state of a range of pages |
| 238 | * | 314 | * |
| 239 | * @param written_words Pages to be marked or unmarked as modified | ||
| 240 | * @param dirty_addr Base address to mark or unmark as modified | 315 | * @param dirty_addr Base address to mark or unmark as modified |
| 241 | * @param size Size in bytes to mark or unmark as modified | 316 | * @param size Size in bytes to mark or unmark as modified |
| 242 | * | ||
| 243 | * @tparam enable True when the bits will be set to one, false for zero | ||
| 244 | * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes | ||
| 245 | */ | 317 | */ |
| 246 | template <bool enable, bool notify_rasterizer> | 318 | template <Type type, bool enable> |
| 247 | void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, | 319 | void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { |
| 248 | s64 size) noexcept(!notify_rasterizer) { | ||
| 249 | const s64 difference = dirty_addr - cpu_addr; | 320 | const s64 difference = dirty_addr - cpu_addr; |
| 250 | const u64 offset = std::max<s64>(difference, 0); | 321 | const u64 offset = std::max<s64>(difference, 0); |
| 251 | size += std::min<s64>(difference, 0); | 322 | size += std::min<s64>(difference, 0); |
| 252 | if (offset >= SizeBytes() || size < 0) { | 323 | if (offset >= SizeBytes() || size < 0) { |
| 253 | return; | 324 | return; |
| 254 | } | 325 | } |
| 255 | u64* const state_words = written_words.Pointer(IsShort()); | 326 | u64* const untracked_words = Array<Type::Untracked>(); |
| 327 | u64* const state_words = Array<type>(); | ||
| 256 | const u64 offset_end = std::min(offset + size, SizeBytes()); | 328 | const u64 offset_end = std::min(offset + size, SizeBytes()); |
| 257 | const u64 begin_page_index = offset / BYTES_PER_PAGE; | 329 | const u64 begin_page_index = offset / BYTES_PER_PAGE; |
| 258 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | 330 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; |
| @@ -268,13 +340,19 @@ private: | |||
| 268 | u64 bits = ~u64{0}; | 340 | u64 bits = ~u64{0}; |
| 269 | bits = (bits >> right_offset) << right_offset; | 341 | bits = (bits >> right_offset) << right_offset; |
| 270 | bits = (bits << left_offset) >> left_offset; | 342 | bits = (bits << left_offset) >> left_offset; |
| 271 | if constexpr (notify_rasterizer) { | 343 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { |
| 272 | NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); | 344 | NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); |
| 273 | } | 345 | } |
| 274 | if constexpr (enable) { | 346 | if constexpr (enable) { |
| 275 | state_words[word_index] |= bits; | 347 | state_words[word_index] |= bits; |
| 348 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 349 | untracked_words[word_index] |= bits; | ||
| 350 | } | ||
| 276 | } else { | 351 | } else { |
| 277 | state_words[word_index] &= ~bits; | 352 | state_words[word_index] &= ~bits; |
| 353 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 354 | untracked_words[word_index] &= ~bits; | ||
| 355 | } | ||
| 278 | } | 356 | } |
| 279 | page_index = 0; | 357 | page_index = 0; |
| 280 | ++word_index; | 358 | ++word_index; |
| @@ -291,7 +369,7 @@ private: | |||
| 291 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | 369 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages |
| 292 | */ | 370 | */ |
| 293 | template <bool add_to_rasterizer> | 371 | template <bool add_to_rasterizer> |
| 294 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { | 372 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |
| 295 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | 373 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; |
| 296 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 374 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 297 | while (changed_bits != 0) { | 375 | while (changed_bits != 0) { |
| @@ -315,21 +393,20 @@ private: | |||
| 315 | * @param query_cpu_range Base CPU address to loop over | 393 | * @param query_cpu_range Base CPU address to loop over |
| 316 | * @param size Size in bytes of the CPU range to loop over | 394 | * @param size Size in bytes of the CPU range to loop over |
| 317 | * @param func Function to call for each turned off region | 395 | * @param func Function to call for each turned off region |
| 318 | * | ||
| 319 | * @tparam gpu True for host GPU pages, false for CPU pages | ||
| 320 | * @tparam notify_rasterizer True when the rasterizer should be notified about state changes | ||
| 321 | */ | 396 | */ |
| 322 | template <bool gpu, bool notify_rasterizer, typename Func> | 397 | template <Type type, typename Func> |
| 323 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | 398 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { |
| 399 | static_assert(type != Type::Untracked); | ||
| 400 | |||
| 324 | const s64 difference = query_cpu_range - cpu_addr; | 401 | const s64 difference = query_cpu_range - cpu_addr; |
| 325 | const u64 query_begin = std::max<s64>(difference, 0); | 402 | const u64 query_begin = std::max<s64>(difference, 0); |
| 326 | size += std::min<s64>(difference, 0); | 403 | size += std::min<s64>(difference, 0); |
| 327 | if (query_begin >= SizeBytes() || size < 0) { | 404 | if (query_begin >= SizeBytes() || size < 0) { |
| 328 | return; | 405 | return; |
| 329 | } | 406 | } |
| 330 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 407 | u64* const untracked_words = Array<Type::Untracked>(); |
| 408 | u64* const state_words = Array<type>(); | ||
| 331 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | 409 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); |
| 332 | u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | ||
| 333 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | 410 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; |
| 334 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | 411 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); |
| 335 | 412 | ||
| @@ -345,7 +422,8 @@ private: | |||
| 345 | const u64 word_index_end = std::distance(state_words, last_modified_word); | 422 | const u64 word_index_end = std::distance(state_words, last_modified_word); |
| 346 | 423 | ||
| 347 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); | 424 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); |
| 348 | const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); | 425 | const unsigned local_page_end = |
| 426 | static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||
| 349 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | 427 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; |
| 350 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | 428 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; |
| 351 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | 429 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; |
| @@ -371,11 +449,13 @@ private: | |||
| 371 | const u64 current_word = state_words[word_index] & bits; | 449 | const u64 current_word = state_words[word_index] & bits; |
| 372 | state_words[word_index] &= ~bits; | 450 | state_words[word_index] &= ~bits; |
| 373 | 451 | ||
| 374 | // Exclude CPU modified pages when visiting GPU pages | 452 | if constexpr (type == Type::CPU) { |
| 375 | const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); | 453 | const u64 current_bits = untracked_words[word_index] & bits; |
| 376 | if constexpr (notify_rasterizer) { | 454 | untracked_words[word_index] &= ~bits; |
| 377 | NotifyRasterizer<true>(word_index, word, ~u64{0}); | 455 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); |
| 378 | } | 456 | } |
| 457 | // Exclude CPU modified pages when visiting GPU pages | ||
| 458 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||
| 379 | u64 page = page_begin; | 459 | u64 page = page_begin; |
| 380 | page_begin = 0; | 460 | page_begin = 0; |
| 381 | 461 | ||
| @@ -416,17 +496,20 @@ private: | |||
| 416 | * @param offset Offset in bytes from the start of the buffer | 496 | * @param offset Offset in bytes from the start of the buffer |
| 417 | * @param size Size in bytes of the region to query for modifications | 497 | * @param size Size in bytes of the region to query for modifications |
| 418 | */ | 498 | */ |
| 419 | template <bool gpu> | 499 | template <Type type> |
| 420 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | 500 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { |
| 421 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 501 | static_assert(type != Type::Untracked); |
| 422 | const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | 502 | |
| 503 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 504 | const u64* const state_words = Array<type>(); | ||
| 423 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 505 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 424 | const u64 word_begin = offset / BYTES_PER_WORD; | 506 | const u64 word_begin = offset / BYTES_PER_WORD; |
| 425 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | 507 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); |
| 426 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | 508 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); |
| 427 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | 509 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; |
| 428 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | 510 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { |
| 429 | const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | 511 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; |
| 512 | const u64 word = state_words[word_index] & ~off_word; | ||
| 430 | if (word == 0) { | 513 | if (word == 0) { |
| 431 | continue; | 514 | continue; |
| 432 | } | 515 | } |
| @@ -445,13 +528,13 @@ private: | |||
| 445 | * | 528 | * |
| 446 | * @param offset Offset in bytes from the start of the buffer | 529 | * @param offset Offset in bytes from the start of the buffer |
| 447 | * @param size Size in bytes of the region to query for modifications | 530 | * @param size Size in bytes of the region to query for modifications |
| 448 | * | ||
| 449 | * @tparam gpu True to query GPU modified pages, false for CPU pages | ||
| 450 | */ | 531 | */ |
| 451 | template <bool gpu> | 532 | template <Type type> |
| 452 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | 533 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |
| 453 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 534 | static_assert(type != Type::Untracked); |
| 454 | const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | 535 | |
| 536 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 537 | const u64* const state_words = Array<type>(); | ||
| 455 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 538 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 456 | const u64 word_begin = offset / BYTES_PER_WORD; | 539 | const u64 word_begin = offset / BYTES_PER_WORD; |
| 457 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | 540 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); |
| @@ -460,7 +543,8 @@ private: | |||
| 460 | u64 begin = std::numeric_limits<u64>::max(); | 543 | u64 begin = std::numeric_limits<u64>::max(); |
| 461 | u64 end = 0; | 544 | u64 end = 0; |
| 462 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | 545 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { |
| 463 | const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | 546 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; |
| 547 | const u64 word = state_words[word_index] & ~off_word; | ||
| 464 | if (word == 0) { | 548 | if (word == 0) { |
| 465 | continue; | 549 | continue; |
| 466 | } | 550 | } |
| @@ -488,7 +572,7 @@ private: | |||
| 488 | 572 | ||
| 489 | RasterizerInterface* rasterizer = nullptr; | 573 | RasterizerInterface* rasterizer = nullptr; |
| 490 | VAddr cpu_addr = 0; | 574 | VAddr cpu_addr = 0; |
| 491 | GpuCpuWords words; | 575 | Words words; |
| 492 | BufferFlagBits flags{}; | 576 | BufferFlagBits flags{}; |
| 493 | }; | 577 | }; |
| 494 | 578 | ||