diff options
| -rw-r--r-- | src/common/range_sets.inc | 184 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 250 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 131 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 1 |
5 files changed, 206 insertions, 361 deletions
diff --git a/src/common/range_sets.inc b/src/common/range_sets.inc index fa55a68fb..705ebd4a1 100644 --- a/src/common/range_sets.inc +++ b/src/common/range_sets.inc | |||
| @@ -6,9 +6,6 @@ | |||
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <utility> | 7 | #include <utility> |
| 8 | 8 | ||
| 9 | #define BOOST_NO_MT | ||
| 10 | #include <boost/pool/detail/mutex.hpp> | ||
| 11 | #undef BOOST_NO_MT | ||
| 12 | #include <boost/icl/interval.hpp> | 9 | #include <boost/icl/interval.hpp> |
| 13 | #include <boost/icl/interval_base_set.hpp> | 10 | #include <boost/icl/interval_base_set.hpp> |
| 14 | #include <boost/icl/interval_map.hpp> | 11 | #include <boost/icl/interval_map.hpp> |
| @@ -20,18 +17,16 @@ | |||
| 20 | 17 | ||
| 21 | #include "common/range_sets.h" | 18 | #include "common/range_sets.h" |
| 22 | 19 | ||
| 23 | namespace boost { | ||
| 24 | template <typename T> | ||
| 25 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 26 | } | ||
| 27 | |||
| 28 | namespace Common { | 20 | namespace Common { |
| 29 | 21 | ||
| 30 | template <typename AddressType> | 22 | template <typename AddressType> |
| 31 | struct RangeSet<AddressType>::RangeSetImpl { | 23 | struct RangeSet<AddressType>::RangeSetImpl { |
| 24 | template <class T> | ||
| 25 | using MyAllocator = boost::fast_pool_allocator<T, boost::default_user_allocator_new_delete, | ||
| 26 | boost::details::pool::default_mutex, 1024, 2048>; | ||
| 32 | using IntervalSet = boost::icl::interval_set< | 27 | using IntervalSet = boost::icl::interval_set< |
| 33 | AddressType, std::less, ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, AddressType, std::less), | 28 | AddressType, std::less, ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, AddressType, std::less), |
| 34 | boost::fast_pool_allocator>; | 29 | MyAllocator>; |
| 35 | using IntervalType = typename IntervalSet::interval_type; | 30 | using IntervalType = typename IntervalSet::interval_type; |
| 36 | 31 | ||
| 37 | RangeSetImpl() = default; | 32 | RangeSetImpl() = default; |
| @@ -49,18 +44,58 @@ struct RangeSet<AddressType>::RangeSetImpl { | |||
| 49 | m_ranges_set.subtract(interval); | 44 | m_ranges_set.subtract(interval); |
| 50 | } | 45 | } |
| 51 | 46 | ||
| 47 | template <typename Func> | ||
| 48 | void ForEach(Func&& func) const { | ||
| 49 | if (m_ranges_set.empty()) { | ||
| 50 | return; | ||
| 51 | } | ||
| 52 | auto it = m_ranges_set.begin(); | ||
| 53 | auto end_it = m_ranges_set.end(); | ||
| 54 | for (; it != end_it; it++) { | ||
| 55 | const AddressType inter_addr_end = it->upper(); | ||
| 56 | const AddressType inter_addr = it->lower(); | ||
| 57 | func(inter_addr, inter_addr_end); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | template <typename Func> | ||
| 62 | void ForEachInRange(AddressType base_addr, size_t size, Func&& func) const { | ||
| 63 | if (m_ranges_set.empty()) { | ||
| 64 | return; | ||
| 65 | } | ||
| 66 | const AddressType start_address = base_addr; | ||
| 67 | const AddressType end_address = start_address + size; | ||
| 68 | const RangeSetImpl::IntervalType search_interval{start_address, end_address}; | ||
| 69 | auto it = m_ranges_set.lower_bound(search_interval); | ||
| 70 | if (it == m_ranges_set.end()) { | ||
| 71 | return; | ||
| 72 | } | ||
| 73 | auto end_it = m_ranges_set.upper_bound(search_interval); | ||
| 74 | for (; it != end_it; it++) { | ||
| 75 | AddressType inter_addr_end = it->upper(); | ||
| 76 | AddressType inter_addr = it->lower(); | ||
| 77 | if (inter_addr_end > end_address) { | ||
| 78 | inter_addr_end = end_address; | ||
| 79 | } | ||
| 80 | if (inter_addr < start_address) { | ||
| 81 | inter_addr = start_address; | ||
| 82 | } | ||
| 83 | func(inter_addr, inter_addr_end); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 52 | IntervalSet m_ranges_set; | 87 | IntervalSet m_ranges_set; |
| 53 | }; | 88 | }; |
| 54 | 89 | ||
| 55 | template <typename AddressType> | 90 | template <typename AddressType> |
| 56 | struct SplitRangeSet<AddressType>::SplitRangeSetImpl { | 91 | struct SplitRangeSet<AddressType>::SplitRangeSetImpl { |
| 57 | 92 | template <class T> | |
| 58 | using IntervalSet = | 93 | using MyAllocator = boost::fast_pool_allocator<T, boost::default_user_allocator_new_delete, |
| 59 | boost::icl::split_interval_map<AddressType, s32, boost::icl::partial_enricher, std::less, | 94 | boost::details::pool::default_mutex, 1024, 2048>; |
| 60 | boost::icl::inplace_plus, boost::icl::inter_section, | 95 | using IntervalSet = boost::icl::split_interval_map< |
| 61 | ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, AddressType, | 96 | AddressType, s32, boost::icl::partial_enricher, std::less, boost::icl::inplace_plus, |
| 62 | std::less), | 97 | boost::icl::inter_section, |
| 63 | boost::fast_pool_allocator>; | 98 | ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, AddressType, std::less), MyAllocator>; |
| 64 | using IntervalType = typename IntervalSet::interval_type; | 99 | using IntervalType = typename IntervalSet::interval_type; |
| 65 | 100 | ||
| 66 | SplitRangeSetImpl() = default; | 101 | SplitRangeSetImpl() = default; |
| @@ -75,6 +110,9 @@ struct SplitRangeSet<AddressType>::SplitRangeSetImpl { | |||
| 75 | template <bool has_on_delete, typename Func> | 110 | template <bool has_on_delete, typename Func> |
| 76 | void Subtract(AddressType base_address, size_t size, s32 amount, | 111 | void Subtract(AddressType base_address, size_t size, s32 amount, |
| 77 | [[maybe_unused]] Func&& on_delete) { | 112 | [[maybe_unused]] Func&& on_delete) { |
| 113 | if (m_split_ranges_set.empty()) { | ||
| 114 | return; | ||
| 115 | } | ||
| 78 | AddressType end_address = base_address + static_cast<AddressType>(size); | 116 | AddressType end_address = base_address + static_cast<AddressType>(size); |
| 79 | IntervalType interval{base_address, end_address}; | 117 | IntervalType interval{base_address, end_address}; |
| 80 | bool any_removals = false; | 118 | bool any_removals = false; |
| @@ -101,6 +139,47 @@ struct SplitRangeSet<AddressType>::SplitRangeSetImpl { | |||
| 101 | } while (any_removals); | 139 | } while (any_removals); |
| 102 | } | 140 | } |
| 103 | 141 | ||
| 142 | template <typename Func> | ||
| 143 | void ForEach(Func&& func) const { | ||
| 144 | if (m_split_ranges_set.empty()) { | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | auto it = m_split_ranges_set.begin(); | ||
| 148 | auto end_it = m_split_ranges_set.end(); | ||
| 149 | for (; it != end_it; it++) { | ||
| 150 | const AddressType inter_addr_end = it->first.upper(); | ||
| 151 | const AddressType inter_addr = it->first.lower(); | ||
| 152 | func(inter_addr, inter_addr_end, it->second); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | |||
| 156 | template <typename Func> | ||
| 157 | void ForEachInRange(AddressType base_address, size_t size, Func&& func) const { | ||
| 158 | if (m_split_ranges_set.empty()) { | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | const AddressType start_address = base_address; | ||
| 162 | const AddressType end_address = start_address + size; | ||
| 163 | const SplitRangeSetImpl::IntervalType search_interval{start_address, end_address}; | ||
| 164 | auto it = m_split_ranges_set.lower_bound(search_interval); | ||
| 165 | if (it == m_split_ranges_set.end()) { | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | auto end_it = m_split_ranges_set.upper_bound(search_interval); | ||
| 169 | for (; it != end_it; it++) { | ||
| 170 | auto& inter = it->first; | ||
| 171 | AddressType inter_addr_end = inter.upper(); | ||
| 172 | AddressType inter_addr = inter.lower(); | ||
| 173 | if (inter_addr_end > end_address) { | ||
| 174 | inter_addr_end = end_address; | ||
| 175 | } | ||
| 176 | if (inter_addr < start_address) { | ||
| 177 | inter_addr = start_address; | ||
| 178 | } | ||
| 179 | func(inter_addr, inter_addr_end, it->second); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 104 | IntervalSet m_split_ranges_set; | 183 | IntervalSet m_split_ranges_set; |
| 105 | }; | 184 | }; |
| 106 | 185 | ||
| @@ -146,41 +225,13 @@ bool RangeSet<AddressType>::Empty() const { | |||
| 146 | template <typename AddressType> | 225 | template <typename AddressType> |
| 147 | template <typename Func> | 226 | template <typename Func> |
| 148 | void RangeSet<AddressType>::ForEach(Func&& func) const { | 227 | void RangeSet<AddressType>::ForEach(Func&& func) const { |
| 149 | if (m_impl->m_ranges_set.empty()) { | 228 | m_impl->ForEach(std::move(func)); |
| 150 | return; | ||
| 151 | } | ||
| 152 | auto it = m_impl->m_ranges_set.begin(); | ||
| 153 | auto end_it = m_impl->m_ranges_set.end(); | ||
| 154 | for (; it != end_it; it++) { | ||
| 155 | const AddressType inter_addr_end = it->upper(); | ||
| 156 | const AddressType inter_addr = it->lower(); | ||
| 157 | func(inter_addr, inter_addr_end); | ||
| 158 | } | ||
| 159 | } | 229 | } |
| 160 | 230 | ||
| 161 | template <typename AddressType> | 231 | template <typename AddressType> |
| 162 | template <typename Func> | 232 | template <typename Func> |
| 163 | void RangeSet<AddressType>::ForEachInRange(AddressType base_addr, size_t size, Func&& func) const { | 233 | void RangeSet<AddressType>::ForEachInRange(AddressType base_address, size_t size, Func&& func) const { |
| 164 | auto& range_set = m_impl->m_ranges_set; | 234 | m_impl->ForEachInRange(base_address, size, std::move(func)); |
| 165 | const AddressType start_address = base_addr; | ||
| 166 | const AddressType end_address = start_address + size; | ||
| 167 | const RangeSetImpl::IntervalType search_interval{start_address, end_address}; | ||
| 168 | auto it = range_set.lower_bound(search_interval); | ||
| 169 | if (it == range_set.end()) { | ||
| 170 | return; | ||
| 171 | } | ||
| 172 | auto end_it = range_set.upper_bound(search_interval); | ||
| 173 | for (; it != end_it; it++) { | ||
| 174 | AddressType inter_addr_end = it->upper(); | ||
| 175 | AddressType inter_addr = it->lower(); | ||
| 176 | if (inter_addr_end > end_address) { | ||
| 177 | inter_addr_end = end_address; | ||
| 178 | } | ||
| 179 | if (inter_addr < start_address) { | ||
| 180 | inter_addr = start_address; | ||
| 181 | } | ||
| 182 | func(inter_addr, inter_addr_end); | ||
| 183 | } | ||
| 184 | } | 235 | } |
| 185 | 236 | ||
| 186 | template <typename AddressType> | 237 | template <typename AddressType> |
| @@ -209,18 +260,18 @@ void SplitRangeSet<AddressType>::Add(AddressType base_address, size_t size) { | |||
| 209 | 260 | ||
| 210 | template <typename AddressType> | 261 | template <typename AddressType> |
| 211 | void SplitRangeSet<AddressType>::Subtract(AddressType base_address, size_t size) { | 262 | void SplitRangeSet<AddressType>::Subtract(AddressType base_address, size_t size) { |
| 212 | m_impl->Subtract<false>(base_address, size, 1, [](AddressType, AddressType) {}); | 263 | m_impl->template Subtract<false>(base_address, size, 1, [](AddressType, AddressType) {}); |
| 213 | } | 264 | } |
| 214 | 265 | ||
| 215 | template <typename AddressType> | 266 | template <typename AddressType> |
| 216 | template <typename Func> | 267 | template <typename Func> |
| 217 | void SplitRangeSet<AddressType>::Subtract(AddressType base_address, size_t size, Func&& on_delete) { | 268 | void SplitRangeSet<AddressType>::Subtract(AddressType base_address, size_t size, Func&& on_delete) { |
| 218 | m_impl->Subtract<true>(base_address, size, 1, on_delete); | 269 | m_impl->template Subtract<true, Func>(base_address, size, 1, std::move(on_delete)); |
| 219 | } | 270 | } |
| 220 | 271 | ||
| 221 | template <typename AddressType> | 272 | template <typename AddressType> |
| 222 | void SplitRangeSet<AddressType>::DeleteAll(AddressType base_address, size_t size) { | 273 | void SplitRangeSet<AddressType>::DeleteAll(AddressType base_address, size_t size) { |
| 223 | m_impl->Subtract<false>(base_address, size, std::numeric_limits<s32>::max(), | 274 | m_impl->template Subtract<false>(base_address, size, std::numeric_limits<s32>::max(), |
| 224 | [](AddressType, AddressType) {}); | 275 | [](AddressType, AddressType) {}); |
| 225 | } | 276 | } |
| 226 | 277 | ||
| @@ -237,43 +288,14 @@ bool SplitRangeSet<AddressType>::Empty() const { | |||
| 237 | template <typename AddressType> | 288 | template <typename AddressType> |
| 238 | template <typename Func> | 289 | template <typename Func> |
| 239 | void SplitRangeSet<AddressType>::ForEach(Func&& func) const { | 290 | void SplitRangeSet<AddressType>::ForEach(Func&& func) const { |
| 240 | if (m_impl->m_split_ranges_set.empty()) { | 291 | m_impl->ForEach(func); |
| 241 | return; | ||
| 242 | } | ||
| 243 | auto it = m_impl->m_split_ranges_set.begin(); | ||
| 244 | auto end_it = m_impl->m_split_ranges_set.end(); | ||
| 245 | for (; it != end_it; it++) { | ||
| 246 | const AddressType inter_addr_end = it->first.upper(); | ||
| 247 | const AddressType inter_addr = it->first.lower(); | ||
| 248 | func(inter_addr, inter_addr_end, it->second); | ||
| 249 | } | ||
| 250 | } | 292 | } |
| 251 | 293 | ||
| 252 | template <typename AddressType> | 294 | template <typename AddressType> |
| 253 | template <typename Func> | 295 | template <typename Func> |
| 254 | void SplitRangeSet<AddressType>::ForEachInRange(AddressType base_address, size_t size, | 296 | void SplitRangeSet<AddressType>::ForEachInRange(AddressType base_address, size_t size, |
| 255 | Func&& func) const { | 297 | Func&& func) const { |
| 256 | auto& range_set = m_impl->m_split_ranges_set; | 298 | m_impl->ForEachInRange(base_address, size, std::move(func)); |
| 257 | const AddressType start_address = base_address; | ||
| 258 | const AddressType end_address = start_address + size; | ||
| 259 | const SplitRangeSetImpl::IntervalType search_interval{start_address, end_address}; | ||
| 260 | auto it = range_set.lower_bound(search_interval); | ||
| 261 | if (it == range_set.end()) { | ||
| 262 | return; | ||
| 263 | } | ||
| 264 | auto end_it = range_set.upper_bound(search_interval); | ||
| 265 | for (; it != end_it; it++) { | ||
| 266 | auto& inter = it->first; | ||
| 267 | AddressType inter_addr_end = inter.upper(); | ||
| 268 | AddressType inter_addr = inter.lower(); | ||
| 269 | if (inter_addr_end > end_address) { | ||
| 270 | inter_addr_end = end_address; | ||
| 271 | } | ||
| 272 | if (inter_addr < start_address) { | ||
| 273 | inter_addr = start_address; | ||
| 274 | } | ||
| 275 | func(inter_addr, inter_addr_end, it->second); | ||
| 276 | } | ||
| 277 | } | 299 | } |
| 278 | 300 | ||
| 279 | } // namespace Common \ No newline at end of file | 301 | } // namespace Common \ No newline at end of file |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b4bf369d1..6d3d933c5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | 9 | ||
| 10 | #include "common/range_sets.inc" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache_base.h" | 11 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 11 | #include "video_core/guest_memory.h" | 12 | #include "video_core/guest_memory.h" |
| 12 | #include "video_core/host1x/gpu_device_memory_manager.h" | 13 | #include "video_core/host1x/gpu_device_memory_manager.h" |
| @@ -20,7 +21,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R | |||
| 20 | : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { | 21 | : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { |
| 21 | // Ensure the first slot is used for the null buffer | 22 | // Ensure the first slot is used for the null buffer |
| 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 23 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 23 | common_ranges.clear(); | 24 | gpu_modified_ranges.Clear(); |
| 24 | inline_buffer_id = NULL_BUFFER_ID; | 25 | inline_buffer_id = NULL_BUFFER_ID; |
| 25 | 26 | ||
| 26 | if (!runtime.CanReportMemoryUsage()) { | 27 | if (!runtime.CanReportMemoryUsage()) { |
| @@ -44,6 +45,9 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R | |||
| 44 | } | 45 | } |
| 45 | 46 | ||
| 46 | template <class P> | 47 | template <class P> |
| 48 | BufferCache<P>::~BufferCache() = default; | ||
| 49 | |||
| 50 | template <class P> | ||
| 47 | void BufferCache<P>::RunGarbageCollector() { | 51 | void BufferCache<P>::RunGarbageCollector() { |
| 48 | const bool aggressive_gc = total_used_memory >= critical_memory; | 52 | const bool aggressive_gc = total_used_memory >= critical_memory; |
| 49 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | 53 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; |
| @@ -96,20 +100,17 @@ void BufferCache<P>::TickFrame() { | |||
| 96 | ++frame_tick; | 100 | ++frame_tick; |
| 97 | delayed_destruction_ring.Tick(); | 101 | delayed_destruction_ring.Tick(); |
| 98 | 102 | ||
| 99 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 103 | for (auto& buffer : async_buffers_death_ring) { |
| 100 | for (auto& buffer : async_buffers_death_ring) { | 104 | runtime.FreeDeferredStagingBuffer(buffer); |
| 101 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 102 | } | ||
| 103 | async_buffers_death_ring.clear(); | ||
| 104 | } | 105 | } |
| 106 | async_buffers_death_ring.clear(); | ||
| 105 | } | 107 | } |
| 106 | 108 | ||
| 107 | template <class P> | 109 | template <class P> |
| 108 | void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { | 110 | void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { |
| 109 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { | 111 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 110 | const IntervalType subtract_interval{device_addr, device_addr + size}; | 112 | ClearDownload(device_addr, size); |
| 111 | ClearDownload(subtract_interval); | 113 | gpu_modified_ranges.Subtract(device_addr, size); |
| 112 | common_ranges.subtract(subtract_interval); | ||
| 113 | } | 114 | } |
| 114 | memory_tracker.MarkRegionAsCpuModified(device_addr, size); | 115 | memory_tracker.MarkRegionAsCpuModified(device_addr, size); |
| 115 | } | 116 | } |
| @@ -174,11 +175,11 @@ void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) { | |||
| 174 | } | 175 | } |
| 175 | 176 | ||
| 176 | template <class P> | 177 | template <class P> |
| 177 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | 178 | void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) { |
| 178 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); | 179 | async_downloads.DeleteAll(device_addr, size); |
| 179 | uncommitted_ranges.subtract(subtract_interval); | 180 | uncommitted_gpu_modified_ranges.Subtract(device_addr, size); |
| 180 | for (auto& interval_set : committed_ranges) { | 181 | for (auto& interval_set : committed_gpu_modified_ranges) { |
| 181 | interval_set.subtract(subtract_interval); | 182 | interval_set.Subtract(device_addr, size); |
| 182 | } | 183 | } |
| 183 | } | 184 | } |
| 184 | 185 | ||
| @@ -195,8 +196,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 195 | return false; | 196 | return false; |
| 196 | } | 197 | } |
| 197 | 198 | ||
| 198 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 199 | ClearDownload(*cpu_dest_address, amount); |
| 199 | ClearDownload(subtract_interval); | ||
| 200 | 200 | ||
| 201 | BufferId buffer_a; | 201 | BufferId buffer_a; |
| 202 | BufferId buffer_b; | 202 | BufferId buffer_b; |
| @@ -215,21 +215,20 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 215 | .size = amount, | 215 | .size = amount, |
| 216 | }}; | 216 | }}; |
| 217 | 217 | ||
| 218 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 218 | boost::container::small_vector<std::pair<DAddr, size_t>, 4> tmp_intervals; |
| 219 | auto mirror = [&](DAddr base_address, DAddr base_address_end) { | 219 | auto mirror = [&](DAddr base_address, DAddr base_address_end) { |
| 220 | const u64 size = base_address_end - base_address; | 220 | const u64 size = base_address_end - base_address; |
| 221 | const DAddr diff = base_address - *cpu_src_address; | 221 | const DAddr diff = base_address - *cpu_src_address; |
| 222 | const DAddr new_base_address = *cpu_dest_address + diff; | 222 | const DAddr new_base_address = *cpu_dest_address + diff; |
| 223 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 223 | tmp_intervals.push_back({new_base_address, size}); |
| 224 | tmp_intervals.push_back(add_interval); | 224 | uncommitted_gpu_modified_ranges.Add(new_base_address, size); |
| 225 | uncommitted_ranges.add(add_interval); | ||
| 226 | }; | 225 | }; |
| 227 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); | 226 | gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror); |
| 228 | // This subtraction in this order is important for overlapping copies. | 227 | // This subtraction in this order is important for overlapping copies. |
| 229 | common_ranges.subtract(subtract_interval); | 228 | gpu_modified_ranges.Subtract(*cpu_dest_address, amount); |
| 230 | const bool has_new_downloads = tmp_intervals.size() != 0; | 229 | const bool has_new_downloads = tmp_intervals.size() != 0; |
| 231 | for (const IntervalType& add_interval : tmp_intervals) { | 230 | for (const auto& pair : tmp_intervals) { |
| 232 | common_ranges.add(add_interval); | 231 | gpu_modified_ranges.Add(pair.first, pair.second); |
| 233 | } | 232 | } |
| 234 | const auto& copy = copies[0]; | 233 | const auto& copy = copies[0]; |
| 235 | src_buffer.MarkUsage(copy.src_offset, copy.size); | 234 | src_buffer.MarkUsage(copy.src_offset, copy.size); |
| @@ -257,9 +256,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 257 | } | 256 | } |
| 258 | 257 | ||
| 259 | const size_t size = amount * sizeof(u32); | 258 | const size_t size = amount * sizeof(u32); |
| 260 | const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; | 259 | ClearDownload(*cpu_dst_address, size); |
| 261 | ClearDownload(subtract_interval); | 260 | gpu_modified_ranges.Subtract(*cpu_dst_address, size); |
| 262 | common_ranges.subtract(subtract_interval); | ||
| 263 | 261 | ||
| 264 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); | 262 | const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); |
| 265 | Buffer& dest_buffer = slot_buffers[buffer]; | 263 | Buffer& dest_buffer = slot_buffers[buffer]; |
| @@ -300,11 +298,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 300 | MarkWrittenBuffer(buffer_id, device_addr, size); | 298 | MarkWrittenBuffer(buffer_id, device_addr, size); |
| 301 | break; | 299 | break; |
| 302 | case ObtainBufferOperation::DiscardWrite: { | 300 | case ObtainBufferOperation::DiscardWrite: { |
| 303 | DAddr device_addr_start = Common::AlignDown(device_addr, 64); | 301 | const DAddr device_addr_start = Common::AlignDown(device_addr, 64); |
| 304 | DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); | 302 | const DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); |
| 305 | IntervalType interval{device_addr_start, device_addr_end}; | 303 | const size_t new_size = device_addr_end - device_addr_start; |
| 306 | ClearDownload(interval); | 304 | ClearDownload(device_addr_start, new_size); |
| 307 | common_ranges.subtract(interval); | 305 | gpu_modified_ranges.Subtract(device_addr_start, new_size); |
| 308 | break; | 306 | break; |
| 309 | } | 307 | } |
| 310 | default: | 308 | default: |
| @@ -504,46 +502,40 @@ void BufferCache<P>::FlushCachedWrites() { | |||
| 504 | 502 | ||
| 505 | template <class P> | 503 | template <class P> |
| 506 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | 504 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { |
| 507 | return !uncommitted_ranges.empty() || !committed_ranges.empty(); | 505 | return !uncommitted_gpu_modified_ranges.Empty() || !committed_gpu_modified_ranges.empty(); |
| 508 | } | 506 | } |
| 509 | 507 | ||
| 510 | template <class P> | 508 | template <class P> |
| 511 | void BufferCache<P>::AccumulateFlushes() { | 509 | void BufferCache<P>::AccumulateFlushes() { |
| 512 | if (uncommitted_ranges.empty()) { | 510 | if (uncommitted_gpu_modified_ranges.Empty()) { |
| 513 | return; | 511 | return; |
| 514 | } | 512 | } |
| 515 | committed_ranges.emplace_back(std::move(uncommitted_ranges)); | 513 | committed_gpu_modified_ranges.emplace_back(std::move(uncommitted_gpu_modified_ranges)); |
| 516 | } | 514 | } |
| 517 | 515 | ||
| 518 | template <class P> | 516 | template <class P> |
| 519 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 517 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 520 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 518 | return (!async_buffers.empty() && async_buffers.front().has_value()); |
| 521 | return (!async_buffers.empty() && async_buffers.front().has_value()); | ||
| 522 | } else { | ||
| 523 | return false; | ||
| 524 | } | ||
| 525 | } | 519 | } |
| 526 | 520 | ||
| 527 | template <class P> | 521 | template <class P> |
| 528 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 522 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 529 | AccumulateFlushes(); | 523 | AccumulateFlushes(); |
| 530 | 524 | ||
| 531 | if (committed_ranges.empty()) { | 525 | if (committed_gpu_modified_ranges.empty()) { |
| 532 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 526 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 533 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 534 | } | ||
| 535 | return; | 527 | return; |
| 536 | } | 528 | } |
| 537 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 529 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 538 | 530 | ||
| 539 | auto it = committed_ranges.begin(); | 531 | auto it = committed_gpu_modified_ranges.begin(); |
| 540 | while (it != committed_ranges.end()) { | 532 | while (it != committed_gpu_modified_ranges.end()) { |
| 541 | auto& current_intervals = *it; | 533 | auto& current_intervals = *it; |
| 542 | auto next_it = std::next(it); | 534 | auto next_it = std::next(it); |
| 543 | while (next_it != committed_ranges.end()) { | 535 | while (next_it != committed_gpu_modified_ranges.end()) { |
| 544 | for (auto& interval : *next_it) { | 536 | next_it->ForEach([¤t_intervals](DAddr start, DAddr end) { |
| 545 | current_intervals.subtract(interval); | 537 | current_intervals.Subtract(start, end - start); |
| 546 | } | 538 | }); |
| 547 | next_it++; | 539 | next_it++; |
| 548 | } | 540 | } |
| 549 | it++; | 541 | it++; |
| @@ -552,10 +544,10 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 552 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; | 544 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; |
| 553 | u64 total_size_bytes = 0; | 545 | u64 total_size_bytes = 0; |
| 554 | u64 largest_copy = 0; | 546 | u64 largest_copy = 0; |
| 555 | for (const IntervalSet& intervals : committed_ranges) { | 547 | for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) { |
| 556 | for (auto& interval : intervals) { | 548 | range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) { |
| 557 | const std::size_t size = interval.upper() - interval.lower(); | 549 | const std::size_t size = interval_upper - interval_lower; |
| 558 | const DAddr device_addr = interval.lower(); | 550 | const DAddr device_addr = interval_lower; |
| 559 | ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 551 | ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 560 | const DAddr buffer_start = buffer.CpuAddr(); | 552 | const DAddr buffer_start = buffer.CpuAddr(); |
| 561 | const DAddr buffer_end = buffer_start + buffer.SizeBytes(); | 553 | const DAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| @@ -583,77 +575,35 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 583 | largest_copy = std::max(largest_copy, new_size); | 575 | largest_copy = std::max(largest_copy, new_size); |
| 584 | }; | 576 | }; |
| 585 | 577 | ||
| 586 | ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); | 578 | gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, |
| 579 | add_download); | ||
| 587 | }); | 580 | }); |
| 588 | }); | 581 | }); |
| 589 | } | 582 | }); |
| 590 | } | 583 | } |
| 591 | committed_ranges.clear(); | 584 | committed_gpu_modified_ranges.clear(); |
| 592 | if (downloads.empty()) { | 585 | if (downloads.empty()) { |
| 593 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 586 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); |
| 594 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 595 | } | ||
| 596 | return; | 587 | return; |
| 597 | } | 588 | } |
| 598 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 589 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 599 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | 590 | boost::container::small_vector<BufferCopy, 4> normalized_copies; |
| 600 | boost::container::small_vector<BufferCopy, 4> normalized_copies; | 591 | runtime.PreCopyBarrier(); |
| 601 | IntervalSet new_async_range{}; | 592 | for (auto& [copy, buffer_id] : downloads) { |
| 602 | runtime.PreCopyBarrier(); | 593 | copy.dst_offset += download_staging.offset; |
| 603 | for (auto& [copy, buffer_id] : downloads) { | 594 | const std::array copies{copy}; |
| 604 | copy.dst_offset += download_staging.offset; | 595 | BufferCopy second_copy{copy}; |
| 605 | const std::array copies{copy}; | 596 | Buffer& buffer = slot_buffers[buffer_id]; |
| 606 | BufferCopy second_copy{copy}; | 597 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 607 | Buffer& buffer = slot_buffers[buffer_id]; | 598 | const DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); |
| 608 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 599 | async_downloads.Add(orig_device_addr, copy.size); |
| 609 | DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); | 600 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 610 | const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; | 601 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 611 | async_downloads += std::make_pair(base_interval, 1); | 602 | normalized_copies.push_back(second_copy); |
| 612 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 613 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 614 | normalized_copies.push_back(second_copy); | ||
| 615 | } | ||
| 616 | runtime.PostCopyBarrier(); | ||
| 617 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 618 | async_buffers.emplace_back(download_staging); | ||
| 619 | } else { | ||
| 620 | if (!Settings::IsGPULevelHigh()) { | ||
| 621 | committed_ranges.clear(); | ||
| 622 | uncommitted_ranges.clear(); | ||
| 623 | } else { | ||
| 624 | if constexpr (USE_MEMORY_MAPS) { | ||
| 625 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 626 | runtime.PreCopyBarrier(); | ||
| 627 | for (auto& [copy, buffer_id] : downloads) { | ||
| 628 | // Have in mind the staging buffer offset for the copy | ||
| 629 | copy.dst_offset += download_staging.offset; | ||
| 630 | const std::array copies{copy}; | ||
| 631 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 632 | buffer.MarkUsage(copy.src_offset, copy.size); | ||
| 633 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||
| 634 | } | ||
| 635 | runtime.PostCopyBarrier(); | ||
| 636 | runtime.Finish(); | ||
| 637 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 638 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 639 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 640 | // Undo the modified offset | ||
| 641 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 642 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 643 | device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size); | ||
| 644 | } | ||
| 645 | } else { | ||
| 646 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 647 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 648 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 649 | buffer.ImmediateDownload(copy.src_offset, | ||
| 650 | immediate_buffer.subspan(0, copy.size)); | ||
| 651 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 652 | device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); | ||
| 653 | } | ||
| 654 | } | ||
| 655 | } | ||
| 656 | } | 603 | } |
| 604 | runtime.PostCopyBarrier(); | ||
| 605 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 606 | async_buffers.emplace_back(download_staging); | ||
| 657 | } | 607 | } |
| 658 | 608 | ||
| 659 | template <class P> | 609 | template <class P> |
| @@ -676,37 +626,31 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 676 | async_buffers.pop_front(); | 626 | async_buffers.pop_front(); |
| 677 | return; | 627 | return; |
| 678 | } | 628 | } |
| 679 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 629 | auto& downloads = pending_downloads.front(); |
| 680 | auto& downloads = pending_downloads.front(); | 630 | auto& async_buffer = async_buffers.front(); |
| 681 | auto& async_buffer = async_buffers.front(); | 631 | u8* base = async_buffer->mapped_span.data(); |
| 682 | u8* base = async_buffer->mapped_span.data(); | 632 | const size_t base_offset = async_buffer->offset; |
| 683 | const size_t base_offset = async_buffer->offset; | 633 | for (const auto& copy : downloads) { |
| 684 | for (const auto& copy : downloads) { | 634 | const DAddr device_addr = static_cast<DAddr>(copy.src_offset); |
| 685 | const DAddr device_addr = static_cast<DAddr>(copy.src_offset); | 635 | const u64 dst_offset = copy.dst_offset - base_offset; |
| 686 | const u64 dst_offset = copy.dst_offset - base_offset; | 636 | const u8* read_mapped_memory = base + dst_offset; |
| 687 | const u8* read_mapped_memory = base + dst_offset; | 637 | async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) { |
| 688 | ForEachInOverlapCounter( | 638 | device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], |
| 689 | async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { | 639 | end - start); |
| 690 | device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], | 640 | }); |
| 691 | end - start); | 641 | async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) { |
| 692 | if (count == 1) { | 642 | gpu_modified_ranges.Subtract(start, end - start); |
| 693 | const IntervalType base_interval{start, end}; | 643 | }); |
| 694 | common_ranges.subtract(base_interval); | ||
| 695 | } | ||
| 696 | }); | ||
| 697 | const IntervalType subtract_interval{device_addr, device_addr + copy.size}; | ||
| 698 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | ||
| 699 | } | ||
| 700 | async_buffers_death_ring.emplace_back(*async_buffer); | ||
| 701 | async_buffers.pop_front(); | ||
| 702 | pending_downloads.pop_front(); | ||
| 703 | } | 644 | } |
| 645 | async_buffers_death_ring.emplace_back(*async_buffer); | ||
| 646 | async_buffers.pop_front(); | ||
| 647 | pending_downloads.pop_front(); | ||
| 704 | } | 648 | } |
| 705 | 649 | ||
| 706 | template <class P> | 650 | template <class P> |
| 707 | bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { | 651 | bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 708 | bool is_dirty = false; | 652 | bool is_dirty = false; |
| 709 | ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); | 653 | gpu_modified_ranges.ForEachInRange(addr, size, [&](DAddr, DAddr) { is_dirty = true; }); |
| 710 | return is_dirty; | 654 | return is_dirty; |
| 711 | } | 655 | } |
| 712 | 656 | ||
| @@ -1320,10 +1264,8 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { | |||
| 1320 | template <class P> | 1264 | template <class P> |
| 1321 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { | 1265 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { |
| 1322 | memory_tracker.MarkRegionAsGpuModified(device_addr, size); | 1266 | memory_tracker.MarkRegionAsGpuModified(device_addr, size); |
| 1323 | 1267 | gpu_modified_ranges.Add(device_addr, size); | |
| 1324 | const IntervalType base_interval{device_addr, device_addr + size}; | 1268 | uncommitted_gpu_modified_ranges.Add(device_addr, size); |
| 1325 | common_ranges.add(base_interval); | ||
| 1326 | uncommitted_ranges.add(base_interval); | ||
| 1327 | } | 1269 | } |
| 1328 | 1270 | ||
| 1329 | template <class P> | 1271 | template <class P> |
| @@ -1600,9 +1542,8 @@ bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size, | |||
| 1600 | template <class P> | 1542 | template <class P> |
| 1601 | void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, | 1543 | void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 1602 | std::span<const u8> inlined_buffer) { | 1544 | std::span<const u8> inlined_buffer) { |
| 1603 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1545 | ClearDownload(dest_address, copy_size); |
| 1604 | ClearDownload(subtract_interval); | 1546 | gpu_modified_ranges.Subtract(dest_address, copy_size); |
| 1605 | common_ranges.subtract(subtract_interval); | ||
| 1606 | 1547 | ||
| 1607 | BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); | 1548 | BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); |
| 1608 | auto& buffer = slot_buffers[buffer_id]; | 1549 | auto& buffer = slot_buffers[buffer_id]; |
| @@ -1652,12 +1593,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 | |||
| 1652 | largest_copy = std::max(largest_copy, new_size); | 1593 | largest_copy = std::max(largest_copy, new_size); |
| 1653 | }; | 1594 | }; |
| 1654 | 1595 | ||
| 1655 | const DAddr start_address = device_addr_out; | 1596 | gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); |
| 1656 | const DAddr end_address = start_address + range_size; | 1597 | ClearDownload(device_addr_out, range_size); |
| 1657 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); | 1598 | gpu_modified_ranges.Subtract(device_addr_out, range_size); |
| 1658 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 1659 | ClearDownload(subtract_interval); | ||
| 1660 | common_ranges.subtract(subtract_interval); | ||
| 1661 | }); | 1599 | }); |
| 1662 | if (total_size_bytes == 0) { | 1600 | if (total_size_bytes == 0) { |
| 1663 | return; | 1601 | return; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 59124458d..448516651 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -13,25 +13,15 @@ | |||
| 13 | #include <unordered_map> | 13 | #include <unordered_map> |
| 14 | #include <vector> | 14 | #include <vector> |
| 15 | 15 | ||
| 16 | #include <boost/container/small_vector.hpp> | ||
| 17 | #define BOOST_NO_MT | ||
| 18 | #include <boost/pool/detail/mutex.hpp> | ||
| 19 | #undef BOOST_NO_MT | ||
| 20 | #include <boost/icl/interval.hpp> | ||
| 21 | #include <boost/icl/interval_base_set.hpp> | ||
| 22 | #include <boost/icl/interval_set.hpp> | ||
| 23 | #include <boost/icl/split_interval_map.hpp> | ||
| 24 | #include <boost/pool/pool.hpp> | ||
| 25 | #include <boost/pool/pool_alloc.hpp> | ||
| 26 | #include <boost/pool/poolfwd.hpp> | ||
| 27 | |||
| 28 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 29 | #include "common/div_ceil.h" | 17 | #include "common/div_ceil.h" |
| 30 | #include "common/literals.h" | 18 | #include "common/literals.h" |
| 31 | #include "common/lru_cache.h" | 19 | #include "common/lru_cache.h" |
| 32 | #include "common/microprofile.h" | 20 | #include "common/microprofile.h" |
| 21 | #include "common/range_sets.h" | ||
| 33 | #include "common/scope_exit.h" | 22 | #include "common/scope_exit.h" |
| 34 | #include "common/settings.h" | 23 | #include "common/settings.h" |
| 24 | #include "common/slot_vector.h" | ||
| 35 | #include "video_core/buffer_cache/buffer_base.h" | 25 | #include "video_core/buffer_cache/buffer_base.h" |
| 36 | #include "video_core/control/channel_state_cache.h" | 26 | #include "video_core/control/channel_state_cache.h" |
| 37 | #include "video_core/delayed_destruction_ring.h" | 27 | #include "video_core/delayed_destruction_ring.h" |
| @@ -41,14 +31,8 @@ | |||
| 41 | #include "video_core/engines/maxwell_3d.h" | 31 | #include "video_core/engines/maxwell_3d.h" |
| 42 | #include "video_core/memory_manager.h" | 32 | #include "video_core/memory_manager.h" |
| 43 | #include "video_core/surface.h" | 33 | #include "video_core/surface.h" |
| 44 | #include "common/slot_vector.h" | ||
| 45 | #include "video_core/texture_cache/types.h" | 34 | #include "video_core/texture_cache/types.h" |
| 46 | 35 | ||
| 47 | namespace boost { | ||
| 48 | template <typename T> | ||
| 49 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 50 | } | ||
| 51 | |||
| 52 | namespace VideoCommon { | 36 | namespace VideoCommon { |
| 53 | 37 | ||
| 54 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | 38 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); |
| @@ -184,7 +168,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 184 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | 168 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; |
| 185 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | 169 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; |
| 186 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | 170 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; |
| 187 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | ||
| 188 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; | 171 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; |
| 189 | 172 | ||
| 190 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | 173 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; |
| @@ -202,34 +185,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 202 | using Async_Buffer = typename P::Async_Buffer; | 185 | using Async_Buffer = typename P::Async_Buffer; |
| 203 | using MemoryTracker = typename P::MemoryTracker; | 186 | using MemoryTracker = typename P::MemoryTracker; |
| 204 | 187 | ||
| 205 | using IntervalCompare = std::less<DAddr>; | ||
| 206 | using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; | ||
| 207 | using IntervalAllocator = boost::fast_pool_allocator<DAddr>; | ||
| 208 | using IntervalSet = boost::icl::interval_set<DAddr>; | ||
| 209 | using IntervalType = typename IntervalSet::interval_type; | ||
| 210 | |||
| 211 | template <typename Type> | ||
| 212 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 213 | // types | ||
| 214 | typedef counter_add_functor<Type> type; | ||
| 215 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 216 | |||
| 217 | // public member functions | ||
| 218 | void operator()(Type& current, const Type& added) const { | ||
| 219 | current += added; | ||
| 220 | if (current < base_type::identity_element()) { | ||
| 221 | current = base_type::identity_element(); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | // public static functions | ||
| 226 | static void version(Type&){}; | ||
| 227 | }; | ||
| 228 | |||
| 229 | using OverlapCombine = counter_add_functor<int>; | ||
| 230 | using OverlapSection = boost::icl::inter_section<int>; | ||
| 231 | using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; | ||
| 232 | |||
| 233 | struct OverlapResult { | 188 | struct OverlapResult { |
| 234 | boost::container::small_vector<BufferId, 16> ids; | 189 | boost::container::small_vector<BufferId, 16> ids; |
| 235 | DAddr begin; | 190 | DAddr begin; |
| @@ -240,6 +195,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 240 | public: | 195 | public: |
| 241 | explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); | 196 | explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); |
| 242 | 197 | ||
| 198 | ~BufferCache(); | ||
| 199 | |||
| 243 | void TickFrame(); | 200 | void TickFrame(); |
| 244 | 201 | ||
| 245 | void WriteMemory(DAddr device_addr, u64 size); | 202 | void WriteMemory(DAddr device_addr, u64 size); |
| @@ -379,75 +336,6 @@ private: | |||
| 379 | } | 336 | } |
| 380 | } | 337 | } |
| 381 | 338 | ||
| 382 | template <typename Func> | ||
| 383 | void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { | ||
| 384 | const DAddr start_address = device_addr; | ||
| 385 | const DAddr end_address = start_address + size; | ||
| 386 | const IntervalType search_interval{start_address, end_address}; | ||
| 387 | auto it = current_range.lower_bound(search_interval); | ||
| 388 | if (it == current_range.end()) { | ||
| 389 | return; | ||
| 390 | } | ||
| 391 | auto end_it = current_range.upper_bound(search_interval); | ||
| 392 | for (; it != end_it; it++) { | ||
| 393 | DAddr inter_addr_end = it->upper(); | ||
| 394 | DAddr inter_addr = it->lower(); | ||
| 395 | if (inter_addr_end > end_address) { | ||
| 396 | inter_addr_end = end_address; | ||
| 397 | } | ||
| 398 | if (inter_addr < start_address) { | ||
| 399 | inter_addr = start_address; | ||
| 400 | } | ||
| 401 | func(inter_addr, inter_addr_end); | ||
| 402 | } | ||
| 403 | } | ||
| 404 | |||
| 405 | template <typename Func> | ||
| 406 | void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, | ||
| 407 | Func&& func) { | ||
| 408 | const DAddr start_address = device_addr; | ||
| 409 | const DAddr end_address = start_address + size; | ||
| 410 | const IntervalType search_interval{start_address, end_address}; | ||
| 411 | auto it = current_range.lower_bound(search_interval); | ||
| 412 | if (it == current_range.end()) { | ||
| 413 | return; | ||
| 414 | } | ||
| 415 | auto end_it = current_range.upper_bound(search_interval); | ||
| 416 | for (; it != end_it; it++) { | ||
| 417 | auto& inter = it->first; | ||
| 418 | DAddr inter_addr_end = inter.upper(); | ||
| 419 | DAddr inter_addr = inter.lower(); | ||
| 420 | if (inter_addr_end > end_address) { | ||
| 421 | inter_addr_end = end_address; | ||
| 422 | } | ||
| 423 | if (inter_addr < start_address) { | ||
| 424 | inter_addr = start_address; | ||
| 425 | } | ||
| 426 | func(inter_addr, inter_addr_end, it->second); | ||
| 427 | } | ||
| 428 | } | ||
| 429 | |||
| 430 | void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||
| 431 | const IntervalType search_interval, int subtract_value) { | ||
| 432 | bool any_removals = false; | ||
| 433 | current_range.add(std::make_pair(search_interval, subtract_value)); | ||
| 434 | do { | ||
| 435 | any_removals = false; | ||
| 436 | auto it = current_range.lower_bound(search_interval); | ||
| 437 | if (it == current_range.end()) { | ||
| 438 | return; | ||
| 439 | } | ||
| 440 | auto end_it = current_range.upper_bound(search_interval); | ||
| 441 | for (; it != end_it; it++) { | ||
| 442 | if (it->second <= 0) { | ||
| 443 | any_removals = true; | ||
| 444 | current_range.erase(it); | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | } | ||
| 448 | } while (any_removals); | ||
| 449 | } | ||
| 450 | |||
| 451 | static bool IsRangeGranular(DAddr device_addr, size_t size) { | 339 | static bool IsRangeGranular(DAddr device_addr, size_t size) { |
| 452 | return (device_addr & ~Core::DEVICE_PAGEMASK) == | 340 | return (device_addr & ~Core::DEVICE_PAGEMASK) == |
| 453 | ((device_addr + size) & ~Core::DEVICE_PAGEMASK); | 341 | ((device_addr + size) & ~Core::DEVICE_PAGEMASK); |
| @@ -552,7 +440,7 @@ private: | |||
| 552 | 440 | ||
| 553 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | 441 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; |
| 554 | 442 | ||
| 555 | void ClearDownload(IntervalType subtract_interval); | 443 | void ClearDownload(DAddr base_addr, u64 size); |
| 556 | 444 | ||
| 557 | void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, | 445 | void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 558 | std::span<const u8> inlined_buffer); | 446 | std::span<const u8> inlined_buffer); |
| @@ -567,13 +455,12 @@ private: | |||
| 567 | u32 last_index_count = 0; | 455 | u32 last_index_count = 0; |
| 568 | 456 | ||
| 569 | MemoryTracker memory_tracker; | 457 | MemoryTracker memory_tracker; |
| 570 | IntervalSet uncommitted_ranges; | 458 | Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges; |
| 571 | IntervalSet common_ranges; | 459 | Common::RangeSet<DAddr> gpu_modified_ranges; |
| 572 | IntervalSet cached_ranges; | 460 | std::deque<Common::RangeSet<DAddr>> committed_gpu_modified_ranges; |
| 573 | std::deque<IntervalSet> committed_ranges; | ||
| 574 | 461 | ||
| 575 | // Async Buffers | 462 | // Async Buffers |
| 576 | OverlapCounter async_downloads; | 463 | Common::SplitRangeSet<DAddr> async_downloads; |
| 577 | std::deque<std::optional<Async_Buffer>> async_buffers; | 464 | std::deque<std::optional<Async_Buffer>> async_buffers; |
| 578 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | 465 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; |
| 579 | std::optional<Async_Buffer> current_buffer; | 466 | std::optional<Async_Buffer> current_buffer; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 022275fd6..fd471e979 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -251,7 +251,6 @@ struct BufferCacheParams { | |||
| 251 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 251 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 252 | static constexpr bool USE_MEMORY_MAPS = true; | 252 | static constexpr bool USE_MEMORY_MAPS = true; |
| 253 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 253 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 254 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||
| 255 | 254 | ||
| 256 | // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads | 255 | // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads |
| 257 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; | 256 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ac14c9f86..efe960258 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -181,7 +181,6 @@ struct BufferCacheParams { | |||
| 181 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 181 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 182 | static constexpr bool USE_MEMORY_MAPS = true; | 182 | static constexpr bool USE_MEMORY_MAPS = true; |
| 183 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | 183 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; |
| 184 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||
| 185 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; | 184 | static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; |
| 186 | }; | 185 | }; |
| 187 | 186 | ||