diff options
| author | 2023-12-25 07:32:16 +0100 | |
|---|---|---|
| committer | 2024-01-18 21:12:30 -0500 | |
| commit | 0a2536a0df1f4aea406f2132d3edda0430acc9d1 (patch) | |
| tree | c0ad53890581c9c7e180c5ccb3b66e3c63e3ba64 /src/video_core/buffer_cache | |
| parent | SMMU: Implement backing CPU page protect/unprotect (diff) | |
| download | yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.gz yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.tar.xz yuzu-0a2536a0df1f4aea406f2132d3edda0430acc9d1.zip | |
SMMU: Initial adaptation to video_core.
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 3 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 450 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 98 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/memory_tracker_base.h | 18 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/word_manager.h | 24 |
5 files changed, 304 insertions, 289 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 0bb3bf8ae..40e98e395 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -33,13 +33,12 @@ struct NullBufferParams {}; | |||
| 33 | * | 33 | * |
| 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. | 34 | * The buffer size and address is forcefully aligned to CPU page boundaries. |
| 35 | */ | 35 | */ |
| 36 | template <class RasterizerInterface> | ||
| 37 | class BufferBase { | 36 | class BufferBase { |
| 38 | public: | 37 | public: |
| 39 | static constexpr u64 BASE_PAGE_BITS = 16; | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 40 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 41 | 40 | ||
| 42 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | 41 | explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) |
| 43 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} |
| 44 | 43 | ||
| 45 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6d1fc3887..6fe2e8b93 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -8,16 +8,16 @@ | |||
| 8 | #include <numeric> | 8 | #include <numeric> |
| 9 | 9 | ||
| 10 | #include "video_core/buffer_cache/buffer_cache_base.h" | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 11 | #include "video_core/guest_memory.h" | ||
| 12 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 11 | 13 | ||
| 12 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 13 | 15 | ||
| 14 | using Core::Memory::YUZU_PAGESIZE; | 16 | using Core::Memory::YUZU_PAGESIZE; |
| 15 | 17 | ||
| 16 | template <class P> | 18 | template <class P> |
| 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 19 | BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) |
| 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 20 | : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { |
| 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ | ||
| 20 | rasterizer} { | ||
| 21 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 23 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| @@ -29,17 +29,17 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 29 | return; | 29 | return; |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | 32 | const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 33 | const s64 min_spacing_expected = device_memory - 1_GiB; | 33 | const s64 min_spacing_expected = device_local_memory - 1_GiB; |
| 34 | const s64 min_spacing_critical = device_memory - 512_MiB; | 34 | const s64 min_spacing_critical = device_local_memory - 512_MiB; |
| 35 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | 35 | const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); |
| 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | 36 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | 37 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 38 | minimum_memory = static_cast<u64>( | 38 | minimum_memory = static_cast<u64>( |
| 39 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | 39 | std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), |
| 40 | DEFAULT_EXPECTED_MEMORY)); | 40 | DEFAULT_EXPECTED_MEMORY)); |
| 41 | critical_memory = static_cast<u64>( | 41 | critical_memory = static_cast<u64>( |
| 42 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | 42 | std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), |
| 43 | DEFAULT_CRITICAL_MEMORY)); | 43 | DEFAULT_CRITICAL_MEMORY)); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| @@ -105,71 +105,72 @@ void BufferCache<P>::TickFrame() { | |||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | template <class P> | 107 | template <class P> |
| 108 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 108 | void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { |
| 109 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 109 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 110 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; | 110 | const IntervalType subtract_interval{device_addr, device_addr + size}; |
| 111 | ClearDownload(subtract_interval); | 111 | ClearDownload(subtract_interval); |
| 112 | common_ranges.subtract(subtract_interval); | 112 | common_ranges.subtract(subtract_interval); |
| 113 | } | 113 | } |
| 114 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); | 114 | memory_tracker.MarkRegionAsCpuModified(device_addr, size); |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | template <class P> | 117 | template <class P> |
| 118 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 118 | void BufferCache<P>::CachedWriteMemory(DAddr device_addr, u64 size) { |
| 119 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 119 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 120 | if (!is_dirty) { | 120 | if (!is_dirty) { |
| 121 | return; | 121 | return; |
| 122 | } | 122 | } |
| 123 | VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); | 123 | DAddr aligned_start = Common::AlignDown(device_addr, YUZU_PAGESIZE); |
| 124 | VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); | 124 | DAddr aligned_end = Common::AlignUp(device_addr + size, YUZU_PAGESIZE); |
| 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 125 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 126 | WriteMemory(cpu_addr, size); | 126 | WriteMemory(device_addr, size); |
| 127 | return; | 127 | return; |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | tmp_buffer.resize_destructive(size); | 130 | tmp_buffer.resize_destructive(size); |
| 131 | cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); | 131 | device_memory.ReadBlockUnsafe(device_addr, tmp_buffer.data(), size); |
| 132 | 132 | ||
| 133 | InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | 133 | InlineMemoryImplementation(device_addr, size, tmp_buffer); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | template <class P> | 136 | template <class P> |
| 137 | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | 137 | bool BufferCache<P>::OnCPUWrite(DAddr device_addr, u64 size) { |
| 138 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | 138 | const bool is_dirty = IsRegionRegistered(device_addr, size); |
| 139 | if (!is_dirty) { | 139 | if (!is_dirty) { |
| 140 | return false; | 140 | return false; |
| 141 | } | 141 | } |
| 142 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | 142 | if (memory_tracker.IsRegionGpuModified(device_addr, size)) { |
| 143 | return true; | 143 | return true; |
| 144 | } | 144 | } |
| 145 | WriteMemory(cpu_addr, size); | 145 | WriteMemory(device_addr, size); |
| 146 | return false; | 146 | return false; |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | template <class P> | 149 | template <class P> |
| 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | 150 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DAddr device_addr, |
| 151 | u64 size) { | 151 | u64 size) { |
| 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 152 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| 153 | area.emplace(); | 153 | area.emplace(); |
| 154 | VAddr cpu_addr_start_aligned = Common::AlignDown(cpu_addr, Core::Memory::YUZU_PAGESIZE); | 154 | DAddr device_addr_start_aligned = Common::AlignDown(device_addr, Core::Memory::YUZU_PAGESIZE); |
| 155 | VAddr cpu_addr_end_aligned = Common::AlignUp(cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 155 | DAddr device_addr_end_aligned = |
| 156 | area->start_address = cpu_addr_start_aligned; | 156 | Common::AlignUp(device_addr + size, Core::Memory::YUZU_PAGESIZE); |
| 157 | area->end_address = cpu_addr_end_aligned; | 157 | area->start_address = device_addr_start_aligned; |
| 158 | if (memory_tracker.IsRegionPreflushable(cpu_addr, size)) { | 158 | area->end_address = device_addr_end_aligned; |
| 159 | if (memory_tracker.IsRegionPreflushable(device_addr, size)) { | ||
| 159 | area->preemtive = true; | 160 | area->preemtive = true; |
| 160 | return area; | 161 | return area; |
| 161 | }; | 162 | }; |
| 162 | area->preemtive = | 163 | area->preemtive = !IsRegionGpuModified(device_addr_start_aligned, |
| 163 | !IsRegionGpuModified(cpu_addr_start_aligned, cpu_addr_end_aligned - cpu_addr_start_aligned); | 164 | device_addr_end_aligned - device_addr_start_aligned); |
| 164 | memory_tracker.MarkRegionAsPreflushable(cpu_addr_start_aligned, | 165 | memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, |
| 165 | cpu_addr_end_aligned - cpu_addr_start_aligned); | 166 | device_addr_end_aligned - device_addr_start_aligned); |
| 166 | return area; | 167 | return area; |
| 167 | } | 168 | } |
| 168 | 169 | ||
| 169 | template <class P> | 170 | template <class P> |
| 170 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 171 | void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) { |
| 171 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 172 | ForEachBufferInRange(device_addr, size, [&](BufferId, Buffer& buffer) { |
| 172 | DownloadBufferMemory(buffer, cpu_addr, size); | 173 | DownloadBufferMemory(buffer, device_addr, size); |
| 173 | }); | 174 | }); |
| 174 | } | 175 | } |
| 175 | 176 | ||
| @@ -184,8 +185,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | |||
| 184 | 185 | ||
| 185 | template <class P> | 186 | template <class P> |
| 186 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 187 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 187 | const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); | 188 | const std::optional<DAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); |
| 188 | const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); | 189 | const std::optional<DAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); |
| 189 | if (!cpu_src_address || !cpu_dest_address) { | 190 | if (!cpu_src_address || !cpu_dest_address) { |
| 190 | return false; | 191 | return false; |
| 191 | } | 192 | } |
| @@ -216,10 +217,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 216 | }}; | 217 | }}; |
| 217 | 218 | ||
| 218 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 219 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 219 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 220 | auto mirror = [&](DAddr base_address, DAddr base_address_end) { |
| 220 | const u64 size = base_address_end - base_address; | 221 | const u64 size = base_address_end - base_address; |
| 221 | const VAddr diff = base_address - *cpu_src_address; | 222 | const DAddr diff = base_address - *cpu_src_address; |
| 222 | const VAddr new_base_address = *cpu_dest_address + diff; | 223 | const DAddr new_base_address = *cpu_dest_address + diff; |
| 223 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 224 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 224 | tmp_intervals.push_back(add_interval); | 225 | tmp_intervals.push_back(add_interval); |
| 225 | uncommitted_ranges.add(add_interval); | 226 | uncommitted_ranges.add(add_interval); |
| @@ -239,15 +240,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 239 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 240 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 240 | } | 241 | } |
| 241 | 242 | ||
| 242 | Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( | 243 | Tegra::Memory::DeviceGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( |
| 243 | cpu_memory, *cpu_src_address, amount, &tmp_buffer); | 244 | device_memory, *cpu_src_address, amount, &tmp_buffer); |
| 244 | tmp.SetAddressAndSize(*cpu_dest_address, amount); | 245 | tmp.SetAddressAndSize(*cpu_dest_address, amount); |
| 245 | return true; | 246 | return true; |
| 246 | } | 247 | } |
| 247 | 248 | ||
| 248 | template <class P> | 249 | template <class P> |
| 249 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | 250 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { |
| 250 | const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); | 251 | const std::optional<DAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); |
| 251 | if (!cpu_dst_address) { | 252 | if (!cpu_dst_address) { |
| 252 | return false; | 253 | return false; |
| 253 | } | 254 | } |
| @@ -273,23 +274,23 @@ template <class P> | |||
| 273 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 274 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 274 | ObtainBufferSynchronize sync_info, | 275 | ObtainBufferSynchronize sync_info, |
| 275 | ObtainBufferOperation post_op) { | 276 | ObtainBufferOperation post_op) { |
| 276 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 277 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 277 | if (!cpu_addr) { | 278 | if (!device_addr) { |
| 278 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | 279 | return {&slot_buffers[NULL_BUFFER_ID], 0}; |
| 279 | } | 280 | } |
| 280 | return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); | 281 | return ObtainCPUBuffer(*device_addr, size, sync_info, post_op); |
| 281 | } | 282 | } |
| 282 | 283 | ||
| 283 | template <class P> | 284 | template <class P> |
| 284 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | 285 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( |
| 285 | VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { | 286 | DAddr device_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { |
| 286 | const BufferId buffer_id = FindBuffer(cpu_addr, size); | 287 | const BufferId buffer_id = FindBuffer(device_addr, size); |
| 287 | Buffer& buffer = slot_buffers[buffer_id]; | 288 | Buffer& buffer = slot_buffers[buffer_id]; |
| 288 | 289 | ||
| 289 | // synchronize op | 290 | // synchronize op |
| 290 | switch (sync_info) { | 291 | switch (sync_info) { |
| 291 | case ObtainBufferSynchronize::FullSynchronize: | 292 | case ObtainBufferSynchronize::FullSynchronize: |
| 292 | SynchronizeBuffer(buffer, cpu_addr, size); | 293 | SynchronizeBuffer(buffer, device_addr, size); |
| 293 | break; | 294 | break; |
| 294 | default: | 295 | default: |
| 295 | break; | 296 | break; |
| @@ -297,12 +298,12 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 297 | 298 | ||
| 298 | switch (post_op) { | 299 | switch (post_op) { |
| 299 | case ObtainBufferOperation::MarkAsWritten: | 300 | case ObtainBufferOperation::MarkAsWritten: |
| 300 | MarkWrittenBuffer(buffer_id, cpu_addr, size); | 301 | MarkWrittenBuffer(buffer_id, device_addr, size); |
| 301 | break; | 302 | break; |
| 302 | case ObtainBufferOperation::DiscardWrite: { | 303 | case ObtainBufferOperation::DiscardWrite: { |
| 303 | VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); | 304 | DAddr device_addr_start = Common::AlignDown(device_addr, 64); |
| 304 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); | 305 | DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); |
| 305 | IntervalType interval{cpu_addr_start, cpu_addr_end}; | 306 | IntervalType interval{device_addr_start, device_addr_end}; |
| 306 | ClearDownload(interval); | 307 | ClearDownload(interval); |
| 307 | common_ranges.subtract(interval); | 308 | common_ranges.subtract(interval); |
| 308 | break; | 309 | break; |
| @@ -311,15 +312,15 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | |||
| 311 | break; | 312 | break; |
| 312 | } | 313 | } |
| 313 | 314 | ||
| 314 | return {&buffer, buffer.Offset(cpu_addr)}; | 315 | return {&buffer, buffer.Offset(device_addr)}; |
| 315 | } | 316 | } |
| 316 | 317 | ||
| 317 | template <class P> | 318 | template <class P> |
| 318 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 319 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 319 | u32 size) { | 320 | u32 size) { |
| 320 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 321 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 321 | const Binding binding{ | 322 | const Binding binding{ |
| 322 | .cpu_addr = *cpu_addr, | 323 | .device_addr = *device_addr, |
| 323 | .size = size, | 324 | .size = size, |
| 324 | .buffer_id = BufferId{}, | 325 | .buffer_id = BufferId{}, |
| 325 | }; | 326 | }; |
| @@ -555,16 +556,17 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 555 | for (const IntervalSet& intervals : committed_ranges) { | 556 | for (const IntervalSet& intervals : committed_ranges) { |
| 556 | for (auto& interval : intervals) { | 557 | for (auto& interval : intervals) { |
| 557 | const std::size_t size = interval.upper() - interval.lower(); | 558 | const std::size_t size = interval.upper() - interval.lower(); |
| 558 | const VAddr cpu_addr = interval.lower(); | 559 | const DAddr device_addr = interval.lower(); |
| 559 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 560 | ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 560 | const VAddr buffer_start = buffer.CpuAddr(); | 561 | const DAddr buffer_start = buffer.CpuAddr(); |
| 561 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); | 562 | const DAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 562 | const VAddr new_start = std::max(buffer_start, cpu_addr); | 563 | const DAddr new_start = std::max(buffer_start, device_addr); |
| 563 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); | 564 | const DAddr new_end = std::min(buffer_end, device_addr + size); |
| 564 | memory_tracker.ForEachDownloadRange( | 565 | memory_tracker.ForEachDownloadRange( |
| 565 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | 566 | new_start, new_end - new_start, false, |
| 566 | const VAddr buffer_addr = buffer.CpuAddr(); | 567 | [&](u64 device_addr_out, u64 range_size) { |
| 567 | const auto add_download = [&](VAddr start, VAddr end) { | 568 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 569 | const auto add_download = [&](DAddr start, DAddr end) { | ||
| 568 | const u64 new_offset = start - buffer_addr; | 570 | const u64 new_offset = start - buffer_addr; |
| 569 | const u64 new_size = end - start; | 571 | const u64 new_size = end - start; |
| 570 | downloads.push_back({ | 572 | downloads.push_back({ |
| @@ -582,7 +584,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 582 | largest_copy = std::max(largest_copy, new_size); | 584 | largest_copy = std::max(largest_copy, new_size); |
| 583 | }; | 585 | }; |
| 584 | 586 | ||
| 585 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); | 587 | ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); |
| 586 | }); | 588 | }); |
| 587 | }); | 589 | }); |
| 588 | } | 590 | } |
| @@ -605,8 +607,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 605 | BufferCopy second_copy{copy}; | 607 | BufferCopy second_copy{copy}; |
| 606 | Buffer& buffer = slot_buffers[buffer_id]; | 608 | Buffer& buffer = slot_buffers[buffer_id]; |
| 607 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | 609 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 608 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | 610 | DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); |
| 609 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | 611 | const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; |
| 610 | async_downloads += std::make_pair(base_interval, 1); | 612 | async_downloads += std::make_pair(base_interval, 1); |
| 611 | buffer.MarkUsage(copy.src_offset, copy.size); | 613 | buffer.MarkUsage(copy.src_offset, copy.size); |
| 612 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | 614 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| @@ -635,11 +637,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 635 | runtime.Finish(); | 637 | runtime.Finish(); |
| 636 | for (const auto& [copy, buffer_id] : downloads) { | 638 | for (const auto& [copy, buffer_id] : downloads) { |
| 637 | const Buffer& buffer = slot_buffers[buffer_id]; | 639 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 638 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 640 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 639 | // Undo the modified offset | 641 | // Undo the modified offset |
| 640 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 642 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 641 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 643 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; |
| 642 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 644 | device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size); |
| 643 | } | 645 | } |
| 644 | } else { | 646 | } else { |
| 645 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 647 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| @@ -647,8 +649,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 647 | Buffer& buffer = slot_buffers[buffer_id]; | 649 | Buffer& buffer = slot_buffers[buffer_id]; |
| 648 | buffer.ImmediateDownload(copy.src_offset, | 650 | buffer.ImmediateDownload(copy.src_offset, |
| 649 | immediate_buffer.subspan(0, copy.size)); | 651 | immediate_buffer.subspan(0, copy.size)); |
| 650 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 652 | const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; |
| 651 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 653 | device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 652 | } | 654 | } |
| 653 | } | 655 | } |
| 654 | } | 656 | } |
| @@ -681,19 +683,19 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 681 | u8* base = async_buffer->mapped_span.data(); | 683 | u8* base = async_buffer->mapped_span.data(); |
| 682 | const size_t base_offset = async_buffer->offset; | 684 | const size_t base_offset = async_buffer->offset; |
| 683 | for (const auto& copy : downloads) { | 685 | for (const auto& copy : downloads) { |
| 684 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | 686 | const DAddr device_addr = static_cast<DAddr>(copy.src_offset); |
| 685 | const u64 dst_offset = copy.dst_offset - base_offset; | 687 | const u64 dst_offset = copy.dst_offset - base_offset; |
| 686 | const u8* read_mapped_memory = base + dst_offset; | 688 | const u8* read_mapped_memory = base + dst_offset; |
| 687 | ForEachInOverlapCounter( | 689 | ForEachInOverlapCounter( |
| 688 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | 690 | async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { |
| 689 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | 691 | device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], |
| 690 | end - start); | 692 | end - start); |
| 691 | if (count == 1) { | 693 | if (count == 1) { |
| 692 | const IntervalType base_interval{start, end}; | 694 | const IntervalType base_interval{start, end}; |
| 693 | common_ranges.subtract(base_interval); | 695 | common_ranges.subtract(base_interval); |
| 694 | } | 696 | } |
| 695 | }); | 697 | }); |
| 696 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | 698 | const IntervalType subtract_interval{device_addr, device_addr + copy.size}; |
| 697 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | 699 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); |
| 698 | } | 700 | } |
| 699 | async_buffers_death_ring.emplace_back(*async_buffer); | 701 | async_buffers_death_ring.emplace_back(*async_buffer); |
| @@ -703,15 +705,15 @@ void BufferCache<P>::PopAsyncBuffers() { | |||
| 703 | } | 705 | } |
| 704 | 706 | ||
| 705 | template <class P> | 707 | template <class P> |
| 706 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 708 | bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { |
| 707 | bool is_dirty = false; | 709 | bool is_dirty = false; |
| 708 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | 710 | ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); |
| 709 | return is_dirty; | 711 | return is_dirty; |
| 710 | } | 712 | } |
| 711 | 713 | ||
| 712 | template <class P> | 714 | template <class P> |
| 713 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 715 | bool BufferCache<P>::IsRegionRegistered(DAddr addr, size_t size) { |
| 714 | const VAddr end_addr = addr + size; | 716 | const DAddr end_addr = addr + size; |
| 715 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 717 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 716 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { | 718 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 717 | const BufferId buffer_id = page_table[page]; | 719 | const BufferId buffer_id = page_table[page]; |
| @@ -720,8 +722,8 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 720 | continue; | 722 | continue; |
| 721 | } | 723 | } |
| 722 | Buffer& buffer = slot_buffers[buffer_id]; | 724 | Buffer& buffer = slot_buffers[buffer_id]; |
| 723 | const VAddr buf_start_addr = buffer.CpuAddr(); | 725 | const DAddr buf_start_addr = buffer.CpuAddr(); |
| 724 | const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); | 726 | const DAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); |
| 725 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 727 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 726 | return true; | 728 | return true; |
| 727 | } | 729 | } |
| @@ -731,7 +733,7 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 731 | } | 733 | } |
| 732 | 734 | ||
| 733 | template <class P> | 735 | template <class P> |
| 734 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 736 | bool BufferCache<P>::IsRegionCpuModified(DAddr addr, size_t size) { |
| 735 | return memory_tracker.IsRegionCpuModified(addr, size); | 737 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 736 | } | 738 | } |
| 737 | 739 | ||
| @@ -739,7 +741,7 @@ template <class P> | |||
| 739 | void BufferCache<P>::BindHostIndexBuffer() { | 741 | void BufferCache<P>::BindHostIndexBuffer() { |
| 740 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; | 742 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; |
| 741 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); | 743 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); |
| 742 | const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); | 744 | const u32 offset = buffer.Offset(channel_state->index_buffer.device_addr); |
| 743 | const u32 size = channel_state->index_buffer.size; | 745 | const u32 size = channel_state->index_buffer.size; |
| 744 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 746 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 745 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { | 747 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| @@ -754,7 +756,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 754 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 756 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 755 | } | 757 | } |
| 756 | } else { | 758 | } else { |
| 757 | SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); | 759 | SynchronizeBuffer(buffer, channel_state->index_buffer.device_addr, size); |
| 758 | } | 760 | } |
| 759 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 761 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 760 | const u32 new_offset = | 762 | const u32 new_offset = |
| @@ -777,7 +779,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 777 | const Binding& binding = channel_state->vertex_buffers[index]; | 779 | const Binding& binding = channel_state->vertex_buffers[index]; |
| 778 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 780 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 779 | TouchBuffer(buffer, binding.buffer_id); | 781 | TouchBuffer(buffer, binding.buffer_id); |
| 780 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 782 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 781 | if (!flags[Dirty::VertexBuffer0 + index]) { | 783 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 782 | continue; | 784 | continue; |
| 783 | } | 785 | } |
| @@ -797,7 +799,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 797 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 799 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 798 | 800 | ||
| 799 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; | 801 | const u32 stride = maxwell3d->regs.vertex_streams[index].stride; |
| 800 | const u32 offset = buffer.Offset(binding.cpu_addr); | 802 | const u32 offset = buffer.Offset(binding.device_addr); |
| 801 | buffer.MarkUsage(offset, binding.size); | 803 | buffer.MarkUsage(offset, binding.size); |
| 802 | 804 | ||
| 803 | host_bindings.buffers.push_back(&buffer); | 805 | host_bindings.buffers.push_back(&buffer); |
| @@ -814,7 +816,7 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { | |||
| 814 | const auto bind_buffer = [this](const Binding& binding) { | 816 | const auto bind_buffer = [this](const Binding& binding) { |
| 815 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 817 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 816 | TouchBuffer(buffer, binding.buffer_id); | 818 | TouchBuffer(buffer, binding.buffer_id); |
| 817 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 819 | SynchronizeBuffer(buffer, binding.device_addr, binding.size); |
| 818 | }; | 820 | }; |
| 819 | if (current_draw_indirect->include_count) { | 821 | if (current_draw_indirect->include_count) { |
| 820 | bind_buffer(channel_state->count_buffer_binding); | 822 | bind_buffer(channel_state->count_buffer_binding); |
| @@ -842,13 +844,13 @@ template <class P> | |||
| 842 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | 844 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, |
| 843 | bool needs_bind) { | 845 | bool needs_bind) { |
| 844 | const Binding& binding = channel_state->uniform_buffers[stage][index]; | 846 | const Binding& binding = channel_state->uniform_buffers[stage][index]; |
| 845 | const VAddr cpu_addr = binding.cpu_addr; | 847 | const DAddr device_addr = binding.device_addr; |
| 846 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); | 848 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); |
| 847 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 849 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 848 | TouchBuffer(buffer, binding.buffer_id); | 850 | TouchBuffer(buffer, binding.buffer_id); |
| 849 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 851 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 850 | size <= channel_state->uniform_buffer_skip_cache_size && | 852 | size <= channel_state->uniform_buffer_skip_cache_size && |
| 851 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); | 853 | !memory_tracker.IsRegionGpuModified(device_addr, size); |
| 852 | if (use_fast_buffer) { | 854 | if (use_fast_buffer) { |
| 853 | if constexpr (IS_OPENGL) { | 855 | if constexpr (IS_OPENGL) { |
| 854 | if (runtime.HasFastBufferSubData()) { | 856 | if (runtime.HasFastBufferSubData()) { |
| @@ -862,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 862 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; | 864 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 863 | runtime.BindFastUniformBuffer(stage, binding_index, size); | 865 | runtime.BindFastUniformBuffer(stage, binding_index, size); |
| 864 | } | 866 | } |
| 865 | const auto span = ImmediateBufferWithData(cpu_addr, size); | 867 | const auto span = ImmediateBufferWithData(device_addr, size); |
| 866 | runtime.PushFastUniformBuffer(stage, binding_index, span); | 868 | runtime.PushFastUniformBuffer(stage, binding_index, span); |
| 867 | return; | 869 | return; |
| 868 | } | 870 | } |
| @@ -873,11 +875,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 873 | } | 875 | } |
| 874 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan | 876 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 875 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | 877 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| 876 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 878 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 877 | return; | 879 | return; |
| 878 | } | 880 | } |
| 879 | // Classic cached path | 881 | // Classic cached path |
| 880 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); | 882 | const bool sync_cached = SynchronizeBuffer(buffer, device_addr, size); |
| 881 | if (sync_cached) { | 883 | if (sync_cached) { |
| 882 | ++channel_state->uniform_cache_hits[0]; | 884 | ++channel_state->uniform_cache_hits[0]; |
| 883 | } | 885 | } |
| @@ -892,7 +894,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 892 | if (!needs_bind) { | 894 | if (!needs_bind) { |
| 893 | return; | 895 | return; |
| 894 | } | 896 | } |
| 895 | const u32 offset = buffer.Offset(cpu_addr); | 897 | const u32 offset = buffer.Offset(device_addr); |
| 896 | if constexpr (IS_OPENGL) { | 898 | if constexpr (IS_OPENGL) { |
| 897 | // Fast buffer will be unbound | 899 | // Fast buffer will be unbound |
| 898 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | 900 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); |
| @@ -920,14 +922,14 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 920 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 922 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 921 | TouchBuffer(buffer, binding.buffer_id); | 923 | TouchBuffer(buffer, binding.buffer_id); |
| 922 | const u32 size = binding.size; | 924 | const u32 size = binding.size; |
| 923 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 925 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 924 | 926 | ||
| 925 | const u32 offset = buffer.Offset(binding.cpu_addr); | 927 | const u32 offset = buffer.Offset(binding.device_addr); |
| 926 | buffer.MarkUsage(offset, size); | 928 | buffer.MarkUsage(offset, size); |
| 927 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; | 929 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 928 | 930 | ||
| 929 | if (is_written) { | 931 | if (is_written) { |
| 930 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 932 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 931 | } | 933 | } |
| 932 | 934 | ||
| 933 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 935 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -945,14 +947,14 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 945 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; | 947 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; |
| 946 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 948 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 947 | const u32 size = binding.size; | 949 | const u32 size = binding.size; |
| 948 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 950 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 949 | 951 | ||
| 950 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; | 952 | const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0; |
| 951 | if (is_written) { | 953 | if (is_written) { |
| 952 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 954 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 953 | } | 955 | } |
| 954 | 956 | ||
| 955 | const u32 offset = buffer.Offset(binding.cpu_addr); | 957 | const u32 offset = buffer.Offset(binding.device_addr); |
| 956 | const PixelFormat format = binding.format; | 958 | const PixelFormat format = binding.format; |
| 957 | buffer.MarkUsage(offset, size); | 959 | buffer.MarkUsage(offset, size); |
| 958 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 960 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -982,11 +984,11 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 982 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 984 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 983 | TouchBuffer(buffer, binding.buffer_id); | 985 | TouchBuffer(buffer, binding.buffer_id); |
| 984 | const u32 size = binding.size; | 986 | const u32 size = binding.size; |
| 985 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 987 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 986 | 988 | ||
| 987 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 989 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 988 | 990 | ||
| 989 | const u32 offset = buffer.Offset(binding.cpu_addr); | 991 | const u32 offset = buffer.Offset(binding.device_addr); |
| 990 | buffer.MarkUsage(offset, size); | 992 | buffer.MarkUsage(offset, size); |
| 991 | host_bindings.buffers.push_back(&buffer); | 993 | host_bindings.buffers.push_back(&buffer); |
| 992 | host_bindings.offsets.push_back(offset); | 994 | host_bindings.offsets.push_back(offset); |
| @@ -1011,9 +1013,9 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1011 | TouchBuffer(buffer, binding.buffer_id); | 1013 | TouchBuffer(buffer, binding.buffer_id); |
| 1012 | const u32 size = | 1014 | const u32 size = |
| 1013 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); | 1015 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); |
| 1014 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1016 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1015 | 1017 | ||
| 1016 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1018 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1017 | buffer.MarkUsage(offset, size); | 1019 | buffer.MarkUsage(offset, size); |
| 1018 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1020 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 1019 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | 1021 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); |
| @@ -1032,15 +1034,15 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1032 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1034 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1033 | TouchBuffer(buffer, binding.buffer_id); | 1035 | TouchBuffer(buffer, binding.buffer_id); |
| 1034 | const u32 size = binding.size; | 1036 | const u32 size = binding.size; |
| 1035 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1037 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1036 | 1038 | ||
| 1037 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1039 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1038 | buffer.MarkUsage(offset, size); | 1040 | buffer.MarkUsage(offset, size); |
| 1039 | const bool is_written = | 1041 | const bool is_written = |
| 1040 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | 1042 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; |
| 1041 | 1043 | ||
| 1042 | if (is_written) { | 1044 | if (is_written) { |
| 1043 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1045 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1044 | } | 1046 | } |
| 1045 | 1047 | ||
| 1046 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 1048 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| @@ -1058,15 +1060,15 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1058 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; | 1060 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; |
| 1059 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1061 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1060 | const u32 size = binding.size; | 1062 | const u32 size = binding.size; |
| 1061 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1063 | SynchronizeBuffer(buffer, binding.device_addr, size); |
| 1062 | 1064 | ||
| 1063 | const bool is_written = | 1065 | const bool is_written = |
| 1064 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; | 1066 | ((channel_state->written_compute_texture_buffers >> index) & 1) != 0; |
| 1065 | if (is_written) { | 1067 | if (is_written) { |
| 1066 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size); | 1068 | MarkWrittenBuffer(binding.buffer_id, binding.device_addr, size); |
| 1067 | } | 1069 | } |
| 1068 | 1070 | ||
| 1069 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1071 | const u32 offset = buffer.Offset(binding.device_addr); |
| 1070 | const PixelFormat format = binding.format; | 1072 | const PixelFormat format = binding.format; |
| 1071 | buffer.MarkUsage(offset, size); | 1073 | buffer.MarkUsage(offset, size); |
| 1072 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 1074 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| @@ -1131,7 +1133,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1131 | inline_buffer_id = CreateBuffer(0, buffer_size); | 1133 | inline_buffer_id = CreateBuffer(0, buffer_size); |
| 1132 | } | 1134 | } |
| 1133 | channel_state->index_buffer = Binding{ | 1135 | channel_state->index_buffer = Binding{ |
| 1134 | .cpu_addr = 0, | 1136 | .device_addr = 0, |
| 1135 | .size = inline_index_size, | 1137 | .size = inline_index_size, |
| 1136 | .buffer_id = inline_buffer_id, | 1138 | .buffer_id = inline_buffer_id, |
| 1137 | }; | 1139 | }; |
| @@ -1140,19 +1142,19 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1140 | 1142 | ||
| 1141 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); | 1143 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1142 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | 1144 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); |
| 1143 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1145 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1144 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1146 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1145 | const u32 draw_size = | 1147 | const u32 draw_size = |
| 1146 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | 1148 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); |
| 1147 | const u32 size = std::min(address_size, draw_size); | 1149 | const u32 size = std::min(address_size, draw_size); |
| 1148 | if (size == 0 || !cpu_addr) { | 1150 | if (size == 0 || !device_addr) { |
| 1149 | channel_state->index_buffer = NULL_BINDING; | 1151 | channel_state->index_buffer = NULL_BINDING; |
| 1150 | return; | 1152 | return; |
| 1151 | } | 1153 | } |
| 1152 | channel_state->index_buffer = Binding{ | 1154 | channel_state->index_buffer = Binding{ |
| 1153 | .cpu_addr = *cpu_addr, | 1155 | .device_addr = *device_addr, |
| 1154 | .size = size, | 1156 | .size = size, |
| 1155 | .buffer_id = FindBuffer(*cpu_addr, size), | 1157 | .buffer_id = FindBuffer(*device_addr, size), |
| 1156 | }; | 1158 | }; |
| 1157 | } | 1159 | } |
| 1158 | 1160 | ||
| @@ -1178,19 +1180,19 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1178 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; | 1180 | const auto& limit = maxwell3d->regs.vertex_stream_limits[index]; |
| 1179 | const GPUVAddr gpu_addr_begin = array.Address(); | 1181 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1180 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1182 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1181 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1183 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1182 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1184 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1183 | u32 size = address_size; // TODO: Analyze stride and number of vertices | 1185 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1184 | if (array.enable == 0 || size == 0 || !cpu_addr) { | 1186 | if (array.enable == 0 || size == 0 || !device_addr) { |
| 1185 | channel_state->vertex_buffers[index] = NULL_BINDING; | 1187 | channel_state->vertex_buffers[index] = NULL_BINDING; |
| 1186 | return; | 1188 | return; |
| 1187 | } | 1189 | } |
| 1188 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1190 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1189 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1191 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1190 | } | 1192 | } |
| 1191 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1193 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1192 | channel_state->vertex_buffers[index] = Binding{ | 1194 | channel_state->vertex_buffers[index] = Binding{ |
| 1193 | .cpu_addr = *cpu_addr, | 1195 | .device_addr = *device_addr, |
| 1194 | .size = size, | 1196 | .size = size, |
| 1195 | .buffer_id = buffer_id, | 1197 | .buffer_id = buffer_id, |
| 1196 | }; | 1198 | }; |
| @@ -1199,15 +1201,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1199 | template <class P> | 1201 | template <class P> |
| 1200 | void BufferCache<P>::UpdateDrawIndirect() { | 1202 | void BufferCache<P>::UpdateDrawIndirect() { |
| 1201 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { | 1203 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { |
| 1202 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1204 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1203 | if (!cpu_addr) { | 1205 | if (!device_addr) { |
| 1204 | binding = NULL_BINDING; | 1206 | binding = NULL_BINDING; |
| 1205 | return; | 1207 | return; |
| 1206 | } | 1208 | } |
| 1207 | binding = Binding{ | 1209 | binding = Binding{ |
| 1208 | .cpu_addr = *cpu_addr, | 1210 | .device_addr = *device_addr, |
| 1209 | .size = static_cast<u32>(size), | 1211 | .size = static_cast<u32>(size), |
| 1210 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), | 1212 | .buffer_id = FindBuffer(*device_addr, static_cast<u32>(size)), |
| 1211 | }; | 1213 | }; |
| 1212 | }; | 1214 | }; |
| 1213 | if (current_draw_indirect->include_count) { | 1215 | if (current_draw_indirect->include_count) { |
| @@ -1231,7 +1233,7 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | |||
| 1231 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; | 1233 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; |
| 1232 | } | 1234 | } |
| 1233 | // Resolve buffer | 1235 | // Resolve buffer |
| 1234 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1236 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1235 | }); | 1237 | }); |
| 1236 | } | 1238 | } |
| 1237 | 1239 | ||
| @@ -1240,7 +1242,7 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | |||
| 1240 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { | 1242 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { |
| 1241 | // Resolve buffer | 1243 | // Resolve buffer |
| 1242 | Binding& binding = channel_state->storage_buffers[stage][index]; | 1244 | Binding& binding = channel_state->storage_buffers[stage][index]; |
| 1243 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1245 | const BufferId buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1244 | binding.buffer_id = buffer_id; | 1246 | binding.buffer_id = buffer_id; |
| 1245 | }); | 1247 | }); |
| 1246 | } | 1248 | } |
| @@ -1249,7 +1251,7 @@ template <class P> | |||
| 1249 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | 1251 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { |
| 1250 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { | 1252 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { |
| 1251 | Binding& binding = channel_state->texture_buffers[stage][index]; | 1253 | Binding& binding = channel_state->texture_buffers[stage][index]; |
| 1252 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1254 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1253 | }); | 1255 | }); |
| 1254 | } | 1256 | } |
| 1255 | 1257 | ||
| @@ -1268,14 +1270,14 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1268 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; | 1270 | const auto& binding = maxwell3d->regs.transform_feedback.buffers[index]; |
| 1269 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; | 1271 | const GPUVAddr gpu_addr = binding.Address() + binding.start_offset; |
| 1270 | const u32 size = binding.size; | 1272 | const u32 size = binding.size; |
| 1271 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1273 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1272 | if (binding.enable == 0 || size == 0 || !cpu_addr) { | 1274 | if (binding.enable == 0 || size == 0 || !device_addr) { |
| 1273 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; | 1275 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; |
| 1274 | return; | 1276 | return; |
| 1275 | } | 1277 | } |
| 1276 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1278 | const BufferId buffer_id = FindBuffer(*device_addr, size); |
| 1277 | channel_state->transform_feedback_buffers[index] = Binding{ | 1279 | channel_state->transform_feedback_buffers[index] = Binding{ |
| 1278 | .cpu_addr = *cpu_addr, | 1280 | .device_addr = *device_addr, |
| 1279 | .size = size, | 1281 | .size = size, |
| 1280 | .buffer_id = buffer_id, | 1282 | .buffer_id = buffer_id, |
| 1281 | }; | 1283 | }; |
| @@ -1289,13 +1291,13 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1289 | const auto& launch_desc = kepler_compute->launch_description; | 1291 | const auto& launch_desc = kepler_compute->launch_description; |
| 1290 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1292 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| 1291 | const auto& cbuf = launch_desc.const_buffer_config[index]; | 1293 | const auto& cbuf = launch_desc.const_buffer_config[index]; |
| 1292 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); | 1294 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); |
| 1293 | if (cpu_addr) { | 1295 | if (device_addr) { |
| 1294 | binding.cpu_addr = *cpu_addr; | 1296 | binding.device_addr = *device_addr; |
| 1295 | binding.size = cbuf.size; | 1297 | binding.size = cbuf.size; |
| 1296 | } | 1298 | } |
| 1297 | } | 1299 | } |
| 1298 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1300 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1299 | }); | 1301 | }); |
| 1300 | } | 1302 | } |
| 1301 | 1303 | ||
| @@ -1304,7 +1306,7 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1304 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { | 1306 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { |
| 1305 | // Resolve buffer | 1307 | // Resolve buffer |
| 1306 | Binding& binding = channel_state->compute_storage_buffers[index]; | 1308 | Binding& binding = channel_state->compute_storage_buffers[index]; |
| 1307 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1309 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1308 | }); | 1310 | }); |
| 1309 | } | 1311 | } |
| 1310 | 1312 | ||
| @@ -1312,45 +1314,63 @@ template <class P> | |||
| 1312 | void BufferCache<P>::UpdateComputeTextureBuffers() { | 1314 | void BufferCache<P>::UpdateComputeTextureBuffers() { |
| 1313 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { | 1315 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { |
| 1314 | Binding& binding = channel_state->compute_texture_buffers[index]; | 1316 | Binding& binding = channel_state->compute_texture_buffers[index]; |
| 1315 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1317 | binding.buffer_id = FindBuffer(binding.device_addr, binding.size); |
| 1316 | }); | 1318 | }); |
| 1317 | } | 1319 | } |
| 1318 | 1320 | ||
| 1319 | template <class P> | 1321 | template <class P> |
| 1320 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1322 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { |
| 1321 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); | 1323 | memory_tracker.MarkRegionAsGpuModified(device_addr, size); |
| 1322 | 1324 | ||
| 1323 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1325 | const IntervalType base_interval{device_addr, device_addr + size}; |
| 1324 | common_ranges.add(base_interval); | 1326 | common_ranges.add(base_interval); |
| 1325 | uncommitted_ranges.add(base_interval); | 1327 | uncommitted_ranges.add(base_interval); |
| 1326 | } | 1328 | } |
| 1327 | 1329 | ||
| 1328 | template <class P> | 1330 | template <class P> |
| 1329 | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | 1331 | BufferId BufferCache<P>::FindBuffer(DAddr device_addr, u32 size) { |
| 1330 | if (cpu_addr == 0) { | 1332 | if (device_addr == 0) { |
| 1331 | return NULL_BUFFER_ID; | 1333 | return NULL_BUFFER_ID; |
| 1332 | } | 1334 | } |
| 1333 | const u64 page = cpu_addr >> CACHING_PAGEBITS; | 1335 | const u64 page = device_addr >> CACHING_PAGEBITS; |
| 1334 | const BufferId buffer_id = page_table[page]; | 1336 | const BufferId buffer_id = page_table[page]; |
| 1335 | if (!buffer_id) { | 1337 | if (!buffer_id) { |
| 1336 | return CreateBuffer(cpu_addr, size); | 1338 | return CreateBuffer(device_addr, size); |
| 1337 | } | 1339 | } |
| 1338 | const Buffer& buffer = slot_buffers[buffer_id]; | 1340 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 1339 | if (buffer.IsInBounds(cpu_addr, size)) { | 1341 | if (buffer.IsInBounds(device_addr, size)) { |
| 1340 | return buffer_id; | 1342 | return buffer_id; |
| 1341 | } | 1343 | } |
| 1342 | return CreateBuffer(cpu_addr, size); | 1344 | return CreateBuffer(device_addr, size); |
| 1343 | } | 1345 | } |
| 1344 | 1346 | ||
| 1345 | template <class P> | 1347 | template <class P> |
| 1346 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1348 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(DAddr device_addr, |
| 1347 | u32 wanted_size) { | 1349 | u32 wanted_size) { |
| 1348 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | 1350 | static constexpr int STREAM_LEAP_THRESHOLD = 16; |
| 1349 | boost::container::small_vector<BufferId, 16> overlap_ids; | 1351 | boost::container::small_vector<BufferId, 16> overlap_ids; |
| 1350 | VAddr begin = cpu_addr; | 1352 | DAddr begin = device_addr; |
| 1351 | VAddr end = cpu_addr + wanted_size; | 1353 | DAddr end = device_addr + wanted_size; |
| 1352 | int stream_score = 0; | 1354 | int stream_score = 0; |
| 1353 | bool has_stream_leap = false; | 1355 | bool has_stream_leap = false; |
| 1356 | auto expand_begin = [&](DAddr add_value) { | ||
| 1357 | static constexpr DAddr min_page = CACHING_PAGESIZE + Core::Memory::YUZU_PAGESIZE; | ||
| 1358 | if (add_value > begin - min_page ) { | ||
| 1359 | begin = min_page; | ||
| 1360 | device_addr = Core::Memory::YUZU_PAGESIZE; | ||
| 1361 | return; | ||
| 1362 | } | ||
| 1363 | begin -= add_value; | ||
| 1364 | device_addr = begin - CACHING_PAGESIZE; | ||
| 1365 | }; | ||
| 1366 | auto expand_end = [&](DAddr add_value) { | ||
| 1367 | static constexpr DAddr max_page = 1ULL << Tegra::MaxwellDeviceMemoryManager::AS_BITS; | ||
| 1368 | if (add_value > max_page - end ) { | ||
| 1369 | end = max_page; | ||
| 1370 | return; | ||
| 1371 | } | ||
| 1372 | end += add_value; | ||
| 1373 | }; | ||
| 1354 | if (begin == 0) { | 1374 | if (begin == 0) { |
| 1355 | return OverlapResult{ | 1375 | return OverlapResult{ |
| 1356 | .ids = std::move(overlap_ids), | 1376 | .ids = std::move(overlap_ids), |
| @@ -1359,9 +1379,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1359 | .has_stream_leap = has_stream_leap, | 1379 | .has_stream_leap = has_stream_leap, |
| 1360 | }; | 1380 | }; |
| 1361 | } | 1381 | } |
| 1362 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); | 1382 | for (; device_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1363 | cpu_addr += CACHING_PAGESIZE) { | 1383 | device_addr += CACHING_PAGESIZE) { |
| 1364 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; | 1384 | const BufferId overlap_id = page_table[device_addr >> CACHING_PAGEBITS]; |
| 1365 | if (!overlap_id) { | 1385 | if (!overlap_id) { |
| 1366 | continue; | 1386 | continue; |
| 1367 | } | 1387 | } |
| @@ -1371,12 +1391,12 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1371 | } | 1391 | } |
| 1372 | overlap_ids.push_back(overlap_id); | 1392 | overlap_ids.push_back(overlap_id); |
| 1373 | overlap.Pick(); | 1393 | overlap.Pick(); |
| 1374 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | 1394 | const DAddr overlap_device_addr = overlap.CpuAddr(); |
| 1375 | const bool expands_left = overlap_cpu_addr < begin; | 1395 | const bool expands_left = overlap_device_addr < begin; |
| 1376 | if (expands_left) { | 1396 | if (expands_left) { |
| 1377 | begin = overlap_cpu_addr; | 1397 | begin = overlap_device_addr; |
| 1378 | } | 1398 | } |
| 1379 | const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); | 1399 | const DAddr overlap_end = overlap_device_addr + overlap.SizeBytes(); |
| 1380 | const bool expands_right = overlap_end > end; | 1400 | const bool expands_right = overlap_end > end; |
| 1381 | if (overlap_end > end) { | 1401 | if (overlap_end > end) { |
| 1382 | end = overlap_end; | 1402 | end = overlap_end; |
| @@ -1387,11 +1407,10 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1387 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1407 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1388 | has_stream_leap = true; | 1408 | has_stream_leap = true; |
| 1389 | if (expands_right) { | 1409 | if (expands_right) { |
| 1390 | begin -= CACHING_PAGESIZE * 256; | 1410 | expand_begin(CACHING_PAGESIZE * 128); |
| 1391 | cpu_addr = begin - CACHING_PAGESIZE; | ||
| 1392 | } | 1411 | } |
| 1393 | if (expands_left) { | 1412 | if (expands_left) { |
| 1394 | end += CACHING_PAGESIZE * 256; | 1413 | expand_end(CACHING_PAGESIZE * 128); |
| 1395 | } | 1414 | } |
| 1396 | } | 1415 | } |
| 1397 | } | 1416 | } |
| @@ -1424,13 +1443,13 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1424 | } | 1443 | } |
| 1425 | 1444 | ||
| 1426 | template <class P> | 1445 | template <class P> |
| 1427 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1446 | BufferId BufferCache<P>::CreateBuffer(DAddr device_addr, u32 wanted_size) { |
| 1428 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | 1447 | DAddr device_addr_end = Common::AlignUp(device_addr + wanted_size, CACHING_PAGESIZE); |
| 1429 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | 1448 | device_addr = Common::AlignDown(device_addr, CACHING_PAGESIZE); |
| 1430 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | 1449 | wanted_size = static_cast<u32>(device_addr_end - device_addr); |
| 1431 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1450 | const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); |
| 1432 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1451 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1433 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1452 | const BufferId new_buffer_id = slot_buffers.insert(runtime, overlap.begin, size); |
| 1434 | auto& new_buffer = slot_buffers[new_buffer_id]; | 1453 | auto& new_buffer = slot_buffers[new_buffer_id]; |
| 1435 | const size_t size_bytes = new_buffer.SizeBytes(); | 1454 | const size_t size_bytes = new_buffer.SizeBytes(); |
| 1436 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); | 1455 | runtime.ClearBuffer(new_buffer, 0, size_bytes, 0); |
| @@ -1465,10 +1484,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1465 | total_used_memory -= Common::AlignUp(size, 1024); | 1484 | total_used_memory -= Common::AlignUp(size, 1024); |
| 1466 | lru_cache.Free(buffer.getLRUID()); | 1485 | lru_cache.Free(buffer.getLRUID()); |
| 1467 | } | 1486 | } |
| 1468 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1487 | const DAddr device_addr_begin = buffer.CpuAddr(); |
| 1469 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1488 | const DAddr device_addr_end = device_addr_begin + size; |
| 1470 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; | 1489 | const u64 page_begin = device_addr_begin / CACHING_PAGESIZE; |
| 1471 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); | 1490 | const u64 page_end = Common::DivCeil(device_addr_end, CACHING_PAGESIZE); |
| 1472 | for (u64 page = page_begin; page != page_end; ++page) { | 1491 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1473 | if constexpr (insert) { | 1492 | if constexpr (insert) { |
| 1474 | page_table[page] = buffer_id; | 1493 | page_table[page] = buffer_id; |
| @@ -1486,15 +1505,15 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1486 | } | 1505 | } |
| 1487 | 1506 | ||
| 1488 | template <class P> | 1507 | template <class P> |
| 1489 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1508 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) { |
| 1490 | boost::container::small_vector<BufferCopy, 4> copies; | 1509 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1491 | u64 total_size_bytes = 0; | 1510 | u64 total_size_bytes = 0; |
| 1492 | u64 largest_copy = 0; | 1511 | u64 largest_copy = 0; |
| 1493 | VAddr buffer_start = buffer.CpuAddr(); | 1512 | DAddr buffer_start = buffer.CpuAddr(); |
| 1494 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1513 | memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1495 | copies.push_back(BufferCopy{ | 1514 | copies.push_back(BufferCopy{ |
| 1496 | .src_offset = total_size_bytes, | 1515 | .src_offset = total_size_bytes, |
| 1497 | .dst_offset = cpu_addr_out - buffer_start, | 1516 | .dst_offset = device_addr_out - buffer_start, |
| 1498 | .size = range_size, | 1517 | .size = range_size, |
| 1499 | }); | 1518 | }); |
| 1500 | total_size_bytes += range_size; | 1519 | total_size_bytes += range_size; |
| @@ -1526,14 +1545,14 @@ void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1526 | std::span<u8> immediate_buffer; | 1545 | std::span<u8> immediate_buffer; |
| 1527 | for (const BufferCopy& copy : copies) { | 1546 | for (const BufferCopy& copy : copies) { |
| 1528 | std::span<const u8> upload_span; | 1547 | std::span<const u8> upload_span; |
| 1529 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1548 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1530 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1549 | if (IsRangeGranular(device_addr, copy.size)) { |
| 1531 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1550 | upload_span = std::span(device_memory.GetPointer<u8>(device_addr), copy.size); |
| 1532 | } else { | 1551 | } else { |
| 1533 | if (immediate_buffer.empty()) { | 1552 | if (immediate_buffer.empty()) { |
| 1534 | immediate_buffer = ImmediateBuffer(largest_copy); | 1553 | immediate_buffer = ImmediateBuffer(largest_copy); |
| 1535 | } | 1554 | } |
| 1536 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1555 | device_memory.ReadBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); |
| 1537 | upload_span = immediate_buffer.subspan(0, copy.size); | 1556 | upload_span = immediate_buffer.subspan(0, copy.size); |
| 1538 | } | 1557 | } |
| 1539 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | 1558 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| @@ -1550,8 +1569,8 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1550 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1569 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1551 | for (BufferCopy& copy : copies) { | 1570 | for (BufferCopy& copy : copies) { |
| 1552 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1571 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1553 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1572 | const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1554 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1573 | device_memory.ReadBlockUnsafe(device_addr, src_pointer, copy.size); |
| 1555 | 1574 | ||
| 1556 | // Apply the staging offset | 1575 | // Apply the staging offset |
| 1557 | copy.src_offset += upload_staging.offset; | 1576 | copy.src_offset += upload_staging.offset; |
| @@ -1562,14 +1581,14 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, | |||
| 1562 | } | 1581 | } |
| 1563 | 1582 | ||
| 1564 | template <class P> | 1583 | template <class P> |
| 1565 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | 1584 | bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size, |
| 1566 | std::span<const u8> inlined_buffer) { | 1585 | std::span<const u8> inlined_buffer) { |
| 1567 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | 1586 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); |
| 1568 | if (!is_dirty) { | 1587 | if (!is_dirty) { |
| 1569 | return false; | 1588 | return false; |
| 1570 | } | 1589 | } |
| 1571 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); | 1590 | DAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1572 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | 1591 | DAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); |
| 1573 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | 1592 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { |
| 1574 | return false; | 1593 | return false; |
| 1575 | } | 1594 | } |
| @@ -1580,7 +1599,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1580 | } | 1599 | } |
| 1581 | 1600 | ||
| 1582 | template <class P> | 1601 | template <class P> |
| 1583 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 1602 | void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 1584 | std::span<const u8> inlined_buffer) { | 1603 | std::span<const u8> inlined_buffer) { |
| 1585 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1604 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |
| 1586 | ClearDownload(subtract_interval); | 1605 | ClearDownload(subtract_interval); |
| @@ -1612,14 +1631,14 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | |||
| 1612 | } | 1631 | } |
| 1613 | 1632 | ||
| 1614 | template <class P> | 1633 | template <class P> |
| 1615 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | 1634 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 size) { |
| 1616 | boost::container::small_vector<BufferCopy, 1> copies; | 1635 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1617 | u64 total_size_bytes = 0; | 1636 | u64 total_size_bytes = 0; |
| 1618 | u64 largest_copy = 0; | 1637 | u64 largest_copy = 0; |
| 1619 | memory_tracker.ForEachDownloadRangeAndClear( | 1638 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1620 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | 1639 | device_addr, size, [&](u64 device_addr_out, u64 range_size) { |
| 1621 | const VAddr buffer_addr = buffer.CpuAddr(); | 1640 | const DAddr buffer_addr = buffer.CpuAddr(); |
| 1622 | const auto add_download = [&](VAddr start, VAddr end) { | 1641 | const auto add_download = [&](DAddr start, DAddr end) { |
| 1623 | const u64 new_offset = start - buffer_addr; | 1642 | const u64 new_offset = start - buffer_addr; |
| 1624 | const u64 new_size = end - start; | 1643 | const u64 new_size = end - start; |
| 1625 | copies.push_back(BufferCopy{ | 1644 | copies.push_back(BufferCopy{ |
| @@ -1634,8 +1653,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1634 | largest_copy = std::max(largest_copy, new_size); | 1653 | largest_copy = std::max(largest_copy, new_size); |
| 1635 | }; | 1654 | }; |
| 1636 | 1655 | ||
| 1637 | const VAddr start_address = cpu_addr_out; | 1656 | const DAddr start_address = device_addr_out; |
| 1638 | const VAddr end_address = start_address + range_size; | 1657 | const DAddr end_address = start_address + range_size; |
| 1639 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); | 1658 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1640 | const IntervalType subtract_interval{start_address, end_address}; | 1659 | const IntervalType subtract_interval{start_address, end_address}; |
| 1641 | ClearDownload(subtract_interval); | 1660 | ClearDownload(subtract_interval); |
| @@ -1658,18 +1677,18 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1658 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); | 1677 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); |
| 1659 | runtime.Finish(); | 1678 | runtime.Finish(); |
| 1660 | for (const BufferCopy& copy : copies) { | 1679 | for (const BufferCopy& copy : copies) { |
| 1661 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1680 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1662 | // Undo the modified offset | 1681 | // Undo the modified offset |
| 1663 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 1682 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 1664 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | 1683 | const u8* copy_mapped_memory = mapped_memory + dst_offset; |
| 1665 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | 1684 | device_memory.WriteBlockUnsafe(copy_device_addr, copy_mapped_memory, copy.size); |
| 1666 | } | 1685 | } |
| 1667 | } else { | 1686 | } else { |
| 1668 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 1687 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |
| 1669 | for (const BufferCopy& copy : copies) { | 1688 | for (const BufferCopy& copy : copies) { |
| 1670 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 1689 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |
| 1671 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 1690 | const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; |
| 1672 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | 1691 | device_memory.WriteBlockUnsafe(copy_device_addr, immediate_buffer.data(), copy.size); |
| 1673 | } | 1692 | } |
| 1674 | } | 1693 | } |
| 1675 | } | 1694 | } |
| @@ -1758,20 +1777,20 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1758 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); | 1777 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); |
| 1759 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; | 1778 | const u32 aligned_size = static_cast<u32>(gpu_addr - aligned_gpu_addr) + size; |
| 1760 | 1779 | ||
| 1761 | const std::optional<VAddr> aligned_cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | 1780 | const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); |
| 1762 | if (!aligned_cpu_addr || size == 0) { | 1781 | if (!aligned_device_addr || size == 0) { |
| 1763 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1782 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 1764 | return NULL_BINDING; | 1783 | return NULL_BINDING; |
| 1765 | } | 1784 | } |
| 1766 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1785 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1767 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", | 1786 | ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}", |
| 1768 | cbuf_index); | 1787 | cbuf_index); |
| 1769 | // The end address used for size calculation does not need to be aligned | 1788 | // The end address used for size calculation does not need to be aligned |
| 1770 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1789 | const DAddr cpu_end = Common::AlignUp(*device_addr + size, Core::Memory::YUZU_PAGESIZE); |
| 1771 | 1790 | ||
| 1772 | const Binding binding{ | 1791 | const Binding binding{ |
| 1773 | .cpu_addr = *aligned_cpu_addr, | 1792 | .device_addr = *aligned_device_addr, |
| 1774 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_cpu_addr), | 1793 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *aligned_device_addr), |
| 1775 | .buffer_id = BufferId{}, | 1794 | .buffer_id = BufferId{}, |
| 1776 | }; | 1795 | }; |
| 1777 | return binding; | 1796 | return binding; |
| @@ -1780,15 +1799,15 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1780 | template <class P> | 1799 | template <class P> |
| 1781 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 1800 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 1782 | PixelFormat format) { | 1801 | PixelFormat format) { |
| 1783 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1802 | const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1784 | TextureBufferBinding binding; | 1803 | TextureBufferBinding binding; |
| 1785 | if (!cpu_addr || size == 0) { | 1804 | if (!device_addr || size == 0) { |
| 1786 | binding.cpu_addr = 0; | 1805 | binding.device_addr = 0; |
| 1787 | binding.size = 0; | 1806 | binding.size = 0; |
| 1788 | binding.buffer_id = NULL_BUFFER_ID; | 1807 | binding.buffer_id = NULL_BUFFER_ID; |
| 1789 | binding.format = PixelFormat::Invalid; | 1808 | binding.format = PixelFormat::Invalid; |
| 1790 | } else { | 1809 | } else { |
| 1791 | binding.cpu_addr = *cpu_addr; | 1810 | binding.device_addr = *device_addr; |
| 1792 | binding.size = size; | 1811 | binding.size = size; |
| 1793 | binding.buffer_id = BufferId{}; | 1812 | binding.buffer_id = BufferId{}; |
| 1794 | binding.format = format; | 1813 | binding.format = format; |
| @@ -1797,14 +1816,14 @@ TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, | |||
| 1797 | } | 1816 | } |
| 1798 | 1817 | ||
| 1799 | template <class P> | 1818 | template <class P> |
| 1800 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | 1819 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(DAddr device_addr, size_t size) { |
| 1801 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | 1820 | u8* const base_pointer = device_memory.GetPointer<u8>(device_addr); |
| 1802 | if (IsRangeGranular(cpu_addr, size) || | 1821 | if (IsRangeGranular(device_addr, size) || |
| 1803 | base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { | 1822 | base_pointer + size == device_memory.GetPointer<u8>(device_addr + size)) { |
| 1804 | return std::span(base_pointer, size); | 1823 | return std::span(base_pointer, size); |
| 1805 | } else { | 1824 | } else { |
| 1806 | const std::span<u8> span = ImmediateBuffer(size); | 1825 | const std::span<u8> span = ImmediateBuffer(size); |
| 1807 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | 1826 | device_memory.ReadBlockUnsafe(device_addr, span.data(), size); |
| 1808 | return span; | 1827 | return span; |
| 1809 | } | 1828 | } |
| 1810 | } | 1829 | } |
| @@ -1828,13 +1847,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) | |||
| 1828 | template <class P> | 1847 | template <class P> |
| 1829 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { | 1848 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { |
| 1830 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; | 1849 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; |
| 1831 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); | 1850 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.device_addr)); |
| 1832 | } | 1851 | } |
| 1833 | 1852 | ||
| 1834 | template <class P> | 1853 | template <class P> |
| 1835 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { | 1854 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { |
| 1836 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; | 1855 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; |
| 1837 | return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); | 1856 | return std::make_pair(&buffer, |
| 1857 | buffer.Offset(channel_state->indirect_buffer_binding.device_addr)); | ||
| 1838 | } | 1858 | } |
| 1839 | 1859 | ||
| 1840 | } // namespace VideoCommon | 1860 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index d6d696d8c..4074003e4 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include "common/microprofile.h" | 32 | #include "common/microprofile.h" |
| 33 | #include "common/scope_exit.h" | 33 | #include "common/scope_exit.h" |
| 34 | #include "common/settings.h" | 34 | #include "common/settings.h" |
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | 35 | #include "video_core/buffer_cache/buffer_base.h" |
| 37 | #include "video_core/control/channel_state_cache.h" | 36 | #include "video_core/control/channel_state_cache.h" |
| 38 | #include "video_core/delayed_destruction_ring.h" | 37 | #include "video_core/delayed_destruction_ring.h" |
| @@ -41,7 +40,6 @@ | |||
| 41 | #include "video_core/engines/kepler_compute.h" | 40 | #include "video_core/engines/kepler_compute.h" |
| 42 | #include "video_core/engines/maxwell_3d.h" | 41 | #include "video_core/engines/maxwell_3d.h" |
| 43 | #include "video_core/memory_manager.h" | 42 | #include "video_core/memory_manager.h" |
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | 43 | #include "video_core/surface.h" |
| 46 | #include "video_core/texture_cache/slot_vector.h" | 44 | #include "video_core/texture_cache/slot_vector.h" |
| 47 | #include "video_core/texture_cache/types.h" | 45 | #include "video_core/texture_cache/types.h" |
| @@ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0}; | |||
| 94 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | 92 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 95 | 93 | ||
| 96 | struct Binding { | 94 | struct Binding { |
| 97 | VAddr cpu_addr{}; | 95 | DAddr device_addr{}; |
| 98 | u32 size{}; | 96 | u32 size{}; |
| 99 | BufferId buffer_id; | 97 | BufferId buffer_id; |
| 100 | }; | 98 | }; |
| @@ -104,7 +102,7 @@ struct TextureBufferBinding : Binding { | |||
| 104 | }; | 102 | }; |
| 105 | 103 | ||
| 106 | static constexpr Binding NULL_BINDING{ | 104 | static constexpr Binding NULL_BINDING{ |
| 107 | .cpu_addr = 0, | 105 | .device_addr = 0, |
| 108 | .size = 0, | 106 | .size = 0, |
| 109 | .buffer_id = NULL_BUFFER_ID, | 107 | .buffer_id = NULL_BUFFER_ID, |
| 110 | }; | 108 | }; |
| @@ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 204 | using Async_Buffer = typename P::Async_Buffer; | 202 | using Async_Buffer = typename P::Async_Buffer; |
| 205 | using MemoryTracker = typename P::MemoryTracker; | 203 | using MemoryTracker = typename P::MemoryTracker; |
| 206 | 204 | ||
| 207 | using IntervalCompare = std::less<VAddr>; | 205 | using IntervalCompare = std::less<DAddr>; |
| 208 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | 206 | using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; |
| 209 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | 207 | using IntervalAllocator = boost::fast_pool_allocator<DAddr>; |
| 210 | using IntervalSet = boost::icl::interval_set<VAddr>; | 208 | using IntervalSet = boost::icl::interval_set<DAddr>; |
| 211 | using IntervalType = typename IntervalSet::interval_type; | 209 | using IntervalType = typename IntervalSet::interval_type; |
| 212 | 210 | ||
| 213 | template <typename Type> | 211 | template <typename Type> |
| @@ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 230 | 228 | ||
| 231 | using OverlapCombine = counter_add_functor<int>; | 229 | using OverlapCombine = counter_add_functor<int>; |
| 232 | using OverlapSection = boost::icl::inter_section<int>; | 230 | using OverlapSection = boost::icl::inter_section<int>; |
| 233 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 231 | using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; |
| 234 | 232 | ||
| 235 | struct OverlapResult { | 233 | struct OverlapResult { |
| 236 | boost::container::small_vector<BufferId, 16> ids; | 234 | boost::container::small_vector<BufferId, 16> ids; |
| 237 | VAddr begin; | 235 | DAddr begin; |
| 238 | VAddr end; | 236 | DAddr end; |
| 239 | bool has_stream_leap = false; | 237 | bool has_stream_leap = false; |
| 240 | }; | 238 | }; |
| 241 | 239 | ||
| 242 | public: | 240 | public: |
| 243 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 241 | explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); |
| 244 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 245 | 242 | ||
| 246 | void TickFrame(); | 243 | void TickFrame(); |
| 247 | 244 | ||
| 248 | void WriteMemory(VAddr cpu_addr, u64 size); | 245 | void WriteMemory(DAddr device_addr, u64 size); |
| 249 | 246 | ||
| 250 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 247 | void CachedWriteMemory(DAddr device_addr, u64 size); |
| 251 | 248 | ||
| 252 | bool OnCPUWrite(VAddr cpu_addr, u64 size); | 249 | bool OnCPUWrite(DAddr device_addr, u64 size); |
| 253 | 250 | ||
| 254 | void DownloadMemory(VAddr cpu_addr, u64 size); | 251 | void DownloadMemory(DAddr device_addr, u64 size); |
| 255 | 252 | ||
| 256 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 253 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size); |
| 257 | 254 | ||
| 258 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | 255 | bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |
| 259 | 256 | ||
| 260 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 257 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 261 | 258 | ||
| @@ -300,7 +297,7 @@ public: | |||
| 300 | ObtainBufferSynchronize sync_info, | 297 | ObtainBufferSynchronize sync_info, |
| 301 | ObtainBufferOperation post_op); | 298 | ObtainBufferOperation post_op); |
| 302 | 299 | ||
| 303 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | 300 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size, |
| 304 | ObtainBufferSynchronize sync_info, | 301 | ObtainBufferSynchronize sync_info, |
| 305 | ObtainBufferOperation post_op); | 302 | ObtainBufferOperation post_op); |
| 306 | void FlushCachedWrites(); | 303 | void FlushCachedWrites(); |
| @@ -326,13 +323,13 @@ public: | |||
| 326 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | 323 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); |
| 327 | 324 | ||
| 328 | /// Return true when a CPU region is modified from the GPU | 325 | /// Return true when a CPU region is modified from the GPU |
| 329 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 326 | [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); |
| 330 | 327 | ||
| 331 | /// Return true when a region is registered on the cache | 328 | /// Return true when a region is registered on the cache |
| 332 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | 329 | [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size); |
| 333 | 330 | ||
| 334 | /// Return true when a CPU region is modified from the CPU | 331 | /// Return true when a CPU region is modified from the CPU |
| 335 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 332 | [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size); |
| 336 | 333 | ||
| 337 | void SetDrawIndirect( | 334 | void SetDrawIndirect( |
| 338 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | 335 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { |
| @@ -366,9 +363,9 @@ private: | |||
| 366 | } | 363 | } |
| 367 | 364 | ||
| 368 | template <typename Func> | 365 | template <typename Func> |
| 369 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | 366 | void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) { |
| 370 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | 367 | const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); |
| 371 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | 368 | for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { |
| 372 | const BufferId buffer_id = page_table[page]; | 369 | const BufferId buffer_id = page_table[page]; |
| 373 | if (!buffer_id) { | 370 | if (!buffer_id) { |
| 374 | ++page; | 371 | ++page; |
| @@ -377,15 +374,15 @@ private: | |||
| 377 | Buffer& buffer = slot_buffers[buffer_id]; | 374 | Buffer& buffer = slot_buffers[buffer_id]; |
| 378 | func(buffer_id, buffer); | 375 | func(buffer_id, buffer); |
| 379 | 376 | ||
| 380 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 377 | const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); |
| 381 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | 378 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 382 | } | 379 | } |
| 383 | } | 380 | } |
| 384 | 381 | ||
| 385 | template <typename Func> | 382 | template <typename Func> |
| 386 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | 383 | void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { |
| 387 | const VAddr start_address = cpu_addr; | 384 | const DAddr start_address = device_addr; |
| 388 | const VAddr end_address = start_address + size; | 385 | const DAddr end_address = start_address + size; |
| 389 | const IntervalType search_interval{start_address, end_address}; | 386 | const IntervalType search_interval{start_address, end_address}; |
| 390 | auto it = current_range.lower_bound(search_interval); | 387 | auto it = current_range.lower_bound(search_interval); |
| 391 | if (it == current_range.end()) { | 388 | if (it == current_range.end()) { |
| @@ -393,8 +390,8 @@ private: | |||
| 393 | } | 390 | } |
| 394 | auto end_it = current_range.upper_bound(search_interval); | 391 | auto end_it = current_range.upper_bound(search_interval); |
| 395 | for (; it != end_it; it++) { | 392 | for (; it != end_it; it++) { |
| 396 | VAddr inter_addr_end = it->upper(); | 393 | DAddr inter_addr_end = it->upper(); |
| 397 | VAddr inter_addr = it->lower(); | 394 | DAddr inter_addr = it->lower(); |
| 398 | if (inter_addr_end > end_address) { | 395 | if (inter_addr_end > end_address) { |
| 399 | inter_addr_end = end_address; | 396 | inter_addr_end = end_address; |
| 400 | } | 397 | } |
| @@ -406,10 +403,10 @@ private: | |||
| 406 | } | 403 | } |
| 407 | 404 | ||
| 408 | template <typename Func> | 405 | template <typename Func> |
| 409 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | 406 | void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, |
| 410 | Func&& func) { | 407 | Func&& func) { |
| 411 | const VAddr start_address = cpu_addr; | 408 | const DAddr start_address = device_addr; |
| 412 | const VAddr end_address = start_address + size; | 409 | const DAddr end_address = start_address + size; |
| 413 | const IntervalType search_interval{start_address, end_address}; | 410 | const IntervalType search_interval{start_address, end_address}; |
| 414 | auto it = current_range.lower_bound(search_interval); | 411 | auto it = current_range.lower_bound(search_interval); |
| 415 | if (it == current_range.end()) { | 412 | if (it == current_range.end()) { |
| @@ -418,8 +415,8 @@ private: | |||
| 418 | auto end_it = current_range.upper_bound(search_interval); | 415 | auto end_it = current_range.upper_bound(search_interval); |
| 419 | for (; it != end_it; it++) { | 416 | for (; it != end_it; it++) { |
| 420 | auto& inter = it->first; | 417 | auto& inter = it->first; |
| 421 | VAddr inter_addr_end = inter.upper(); | 418 | DAddr inter_addr_end = inter.upper(); |
| 422 | VAddr inter_addr = inter.lower(); | 419 | DAddr inter_addr = inter.lower(); |
| 423 | if (inter_addr_end > end_address) { | 420 | if (inter_addr_end > end_address) { |
| 424 | inter_addr_end = end_address; | 421 | inter_addr_end = end_address; |
| 425 | } | 422 | } |
| @@ -451,9 +448,9 @@ private: | |||
| 451 | } while (any_removals); | 448 | } while (any_removals); |
| 452 | } | 449 | } |
| 453 | 450 | ||
| 454 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | 451 | static bool IsRangeGranular(DAddr device_addr, size_t size) { |
| 455 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | 452 | return (device_addr & ~Core::Memory::YUZU_PAGEMASK) == |
| 456 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | 453 | ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK); |
| 457 | } | 454 | } |
| 458 | 455 | ||
| 459 | void RunGarbageCollector(); | 456 | void RunGarbageCollector(); |
| @@ -508,15 +505,15 @@ private: | |||
| 508 | 505 | ||
| 509 | void UpdateComputeTextureBuffers(); | 506 | void UpdateComputeTextureBuffers(); |
| 510 | 507 | ||
| 511 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | 508 | void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size); |
| 512 | 509 | ||
| 513 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | 510 | [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size); |
| 514 | 511 | ||
| 515 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | 512 | [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size); |
| 516 | 513 | ||
| 517 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | 514 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); |
| 518 | 515 | ||
| 519 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | 516 | [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size); |
| 520 | 517 | ||
| 521 | void Register(BufferId buffer_id); | 518 | void Register(BufferId buffer_id); |
| 522 | 519 | ||
| @@ -527,7 +524,7 @@ private: | |||
| 527 | 524 | ||
| 528 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | 525 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; |
| 529 | 526 | ||
| 530 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 527 | bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size); |
| 531 | 528 | ||
| 532 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 529 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 533 | std::span<BufferCopy> copies); | 530 | std::span<BufferCopy> copies); |
| @@ -539,7 +536,7 @@ private: | |||
| 539 | 536 | ||
| 540 | void DownloadBufferMemory(Buffer& buffer_id); | 537 | void DownloadBufferMemory(Buffer& buffer_id); |
| 541 | 538 | ||
| 542 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | 539 | void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size); |
| 543 | 540 | ||
| 544 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | 541 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); |
| 545 | 542 | ||
| @@ -549,7 +546,7 @@ private: | |||
| 549 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 546 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 550 | PixelFormat format); | 547 | PixelFormat format); |
| 551 | 548 | ||
| 552 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | 549 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size); |
| 553 | 550 | ||
| 554 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | 551 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); |
| 555 | 552 | ||
| @@ -557,11 +554,10 @@ private: | |||
| 557 | 554 | ||
| 558 | void ClearDownload(IntervalType subtract_interval); | 555 | void ClearDownload(IntervalType subtract_interval); |
| 559 | 556 | ||
| 560 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 557 | void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, |
| 561 | std::span<const u8> inlined_buffer); | 558 | std::span<const u8> inlined_buffer); |
| 562 | 559 | ||
| 563 | VideoCore::RasterizerInterface& rasterizer; | 560 | Tegra::MaxwellDeviceMemoryManager& device_memory; |
| 564 | Core::Memory::Memory& cpu_memory; | ||
| 565 | 561 | ||
| 566 | SlotVector<Buffer> slot_buffers; | 562 | SlotVector<Buffer> slot_buffers; |
| 567 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 563 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| @@ -598,7 +594,7 @@ private: | |||
| 598 | u64 critical_memory = 0; | 594 | u64 critical_memory = 0; |
| 599 | BufferId inline_buffer_id; | 595 | BufferId inline_buffer_id; |
| 600 | 596 | ||
| 601 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 597 | std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table; |
| 602 | Common::ScratchBuffer<u8> tmp_buffer; | 598 | Common::ScratchBuffer<u8> tmp_buffer; |
| 603 | }; | 599 | }; |
| 604 | 600 | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index 6c1c8287b..c95eed1f6 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -17,19 +17,19 @@ | |||
| 17 | 17 | ||
| 18 | namespace VideoCommon { | 18 | namespace VideoCommon { |
| 19 | 19 | ||
| 20 | template <class RasterizerInterface> | 20 | template <typename DeviceTracker> |
| 21 | class MemoryTrackerBase { | 21 | class MemoryTrackerBase { |
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 34; |
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; |
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; |
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; |
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); |
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; |
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; |
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | 29 | using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>; |
| 30 | 30 | ||
| 31 | public: | 31 | public: |
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | 32 | MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {} |
| 33 | ~MemoryTrackerBase() = default; | 33 | ~MemoryTrackerBase() = default; |
| 34 | 34 | ||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | 35 | /// Returns the inclusive CPU modified range in a begin end pair |
| @@ -74,7 +74,7 @@ public: | |||
| 74 | }); | 74 | }); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /// Mark region as CPU modified, notifying the rasterizer about this change | 77 | /// Mark region as CPU modified, notifying the device_tracker about this change |
| 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 78 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 79 | IteratePages<true>(dirty_cpu_addr, query_size, | 79 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 80 | [](Manager* manager, u64 offset, size_t size) { | 80 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -83,7 +83,7 @@ public: | |||
| 83 | }); | 83 | }); |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /// Unmark region as CPU modified, notifying the rasterizer about this change | 86 | /// Unmark region as CPU modified, notifying the device_tracker about this change |
| 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | 87 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |
| 88 | IteratePages<true>(dirty_cpu_addr, query_size, | 88 | IteratePages<true>(dirty_cpu_addr, query_size, |
| 89 | [](Manager* manager, u64 offset, size_t size) { | 89 | [](Manager* manager, u64 offset, size_t size) { |
| @@ -139,7 +139,7 @@ public: | |||
| 139 | }); | 139 | }); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | 142 | /// Flushes cached CPU writes, and notify the device_tracker about the deltas |
| 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | 143 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { |
| 144 | IteratePages<false>(query_cpu_addr, query_size, | 144 | IteratePages<false>(query_cpu_addr, query_size, |
| 145 | [](Manager* manager, [[maybe_unused]] u64 offset, | 145 | [](Manager* manager, [[maybe_unused]] u64 offset, |
| @@ -280,7 +280,7 @@ private: | |||
| 280 | manager_pool.emplace_back(); | 280 | manager_pool.emplace_back(); |
| 281 | auto& last_pool = manager_pool.back(); | 281 | auto& last_pool = manager_pool.back(); |
| 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | 282 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { |
| 283 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | 283 | new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE); |
| 284 | free_managers.push_back(&last_pool[i]); | 284 | free_managers.push_back(&last_pool[i]); |
| 285 | } | 285 | } |
| 286 | return on_return(); | 286 | return on_return(); |
| @@ -293,7 +293,7 @@ private: | |||
| 293 | 293 | ||
| 294 | std::unordered_set<u32> cached_pages; | 294 | std::unordered_set<u32> cached_pages; |
| 295 | 295 | ||
| 296 | RasterizerInterface* rasterizer = nullptr; | 296 | DeviceTracker* device_tracker = nullptr; |
| 297 | }; | 297 | }; |
| 298 | 298 | ||
| 299 | } // namespace VideoCommon | 299 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index a336bde41..56ab4f5f1 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -163,11 +163,11 @@ struct Words { | |||
| 163 | WordsArray<stack_words> preflushable; | 163 | WordsArray<stack_words> preflushable; |
| 164 | }; | 164 | }; |
| 165 | 165 | ||
| 166 | template <class RasterizerInterface, size_t stack_words = 1> | 166 | template <class DeviceTracker, size_t stack_words = 1> |
| 167 | class WordManager { | 167 | class WordManager { |
| 168 | public: | 168 | public: |
| 169 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | 169 | explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes) |
| 170 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | 170 | : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {} |
| 171 | 171 | ||
| 172 | explicit WordManager() = default; | 172 | explicit WordManager() = default; |
| 173 | 173 | ||
| @@ -279,7 +279,7 @@ public: | |||
| 279 | } | 279 | } |
| 280 | 280 | ||
| 281 | /** | 281 | /** |
| 282 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | 282 | * Loop over each page in the given range, turn off those bits and notify the tracker if |
| 283 | * needed. Call the given function on each turned off range. | 283 | * needed. Call the given function on each turned off range. |
| 284 | * | 284 | * |
| 285 | * @param query_cpu_range Base CPU address to loop over | 285 | * @param query_cpu_range Base CPU address to loop over |
| @@ -459,26 +459,26 @@ private: | |||
| 459 | } | 459 | } |
| 460 | 460 | ||
| 461 | /** | 461 | /** |
| 462 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | 462 | * Notify tracker about changes in the CPU tracking state of a word in the buffer |
| 463 | * | 463 | * |
| 464 | * @param word_index Index to the word to notify to the rasterizer | 464 | * @param word_index Index to the word to notify to the tracker |
| 465 | * @param current_bits Current state of the word | 465 | * @param current_bits Current state of the word |
| 466 | * @param new_bits New state of the word | 466 | * @param new_bits New state of the word |
| 467 | * | 467 | * |
| 468 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | 468 | * @tparam add_to_tracker True when the tracker should start tracking the new pages |
| 469 | */ | 469 | */ |
| 470 | template <bool add_to_rasterizer> | 470 | template <bool add_to_tracker> |
| 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | 471 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |
| 472 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | 472 | u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; |
| 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 473 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | 474 | IteratePages(changed_bits, [&](size_t offset, size_t size) { |
| 475 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | 475 | tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, |
| 476 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | 476 | size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1); |
| 477 | }); | 477 | }); |
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | VAddr cpu_addr = 0; | 480 | VAddr cpu_addr = 0; |
| 481 | RasterizerInterface* rasterizer = nullptr; | 481 | DeviceTracker* tracker = nullptr; |
| 482 | Words<stack_words> words; | 482 | Words<stack_words> words; |
| 483 | }; | 483 | }; |
| 484 | 484 | ||