diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5f5a59bba..4def8f076 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -329,6 +329,8 @@ private: | |||
| 329 | 329 | ||
| 330 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | 330 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; |
| 331 | 331 | ||
| 332 | void ClearDownload(IntervalType subtract_interval); | ||
| 333 | |||
| 332 | VideoCore::RasterizerInterface& rasterizer; | 334 | VideoCore::RasterizerInterface& rasterizer; |
| 333 | Tegra::Engines::Maxwell3D& maxwell3d; | 335 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 334 | Tegra::Engines::KeplerCompute& kepler_compute; | 336 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -468,6 +470,14 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | |||
| 468 | } | 470 | } |
| 469 | 471 | ||
| 470 | template <class P> | 472 | template <class P> |
| 473 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | ||
| 474 | uncommitted_ranges.subtract(subtract_interval); | ||
| 475 | for (auto& interval_set : committed_ranges) { | ||
| 476 | interval_set.subtract(subtract_interval); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | template <class P> | ||
| 471 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 481 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 472 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | 482 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); |
| 473 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | 483 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); |
| @@ -481,10 +491,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 481 | } | 491 | } |
| 482 | 492 | ||
| 483 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 493 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 484 | uncommitted_ranges.subtract(subtract_interval); | 494 | ClearDownload(subtract_interval); |
| 485 | for (auto& interval_set : committed_ranges) { | ||
| 486 | interval_set.subtract(subtract_interval); | ||
| 487 | } | ||
| 488 | 495 | ||
| 489 | BufferId buffer_a; | 496 | BufferId buffer_a; |
| 490 | BufferId buffer_b; | 497 | BufferId buffer_b; |
| @@ -496,7 +503,6 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 496 | auto& src_buffer = slot_buffers[buffer_a]; | 503 | auto& src_buffer = slot_buffers[buffer_a]; |
| 497 | auto& dest_buffer = slot_buffers[buffer_b]; | 504 | auto& dest_buffer = slot_buffers[buffer_b]; |
| 498 | SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); | 505 | SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); |
| 499 | SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount)); | ||
| 500 | std::array copies{BufferCopy{ | 506 | std::array copies{BufferCopy{ |
| 501 | .src_offset = src_buffer.Offset(*cpu_src_address), | 507 | .src_offset = src_buffer.Offset(*cpu_src_address), |
| 502 | .dst_offset = dest_buffer.Offset(*cpu_dest_address), | 508 | .dst_offset = dest_buffer.Offset(*cpu_dest_address), |
| @@ -515,12 +521,17 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 515 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 521 | ForEachWrittenRange(*cpu_src_address, amount, mirror); |
| 516 | // This subtraction in this order is important for overlapping copies. | 522 | // This subtraction in this order is important for overlapping copies. |
| 517 | common_ranges.subtract(subtract_interval); | 523 | common_ranges.subtract(subtract_interval); |
| 524 | bool atleast_1_download = tmp_intervals.size() != 0; | ||
| 518 | for (const IntervalType add_interval : tmp_intervals) { | 525 | for (const IntervalType add_interval : tmp_intervals) { |
| 519 | common_ranges.add(add_interval); | 526 | common_ranges.add(add_interval); |
| 520 | } | 527 | } |
| 521 | 528 | ||
| 529 | if (dest_buffer.HasCachedWrites()) { | ||
| 530 | dest_buffer.FlushCachedWrites(); | ||
| 531 | } | ||
| 522 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 532 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 523 | if (IsRegionGpuModified(*cpu_src_address, amount)) { | 533 | dest_buffer.UnmarkRegionAsCpuModified(*cpu_dest_address, amount); |
| 534 | if (atleast_1_download) { | ||
| 524 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 535 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 525 | } | 536 | } |
| 526 | std::vector<u8> tmp_buffer(amount); | 537 | std::vector<u8> tmp_buffer(amount); |
| @@ -541,10 +552,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 541 | } | 552 | } |
| 542 | 553 | ||
| 543 | const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; | 554 | const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + amount * sizeof(u32)}; |
| 544 | uncommitted_ranges.subtract(subtract_interval); | 555 | ClearDownload(subtract_interval); |
| 545 | for (auto& interval_set : committed_ranges) { | ||
| 546 | interval_set.subtract(subtract_interval); | ||
| 547 | } | ||
| 548 | common_ranges.subtract(subtract_interval); | 556 | common_ranges.subtract(subtract_interval); |
| 549 | 557 | ||
| 550 | const size_t size = amount * sizeof(u32); | 558 | const size_t size = amount * sizeof(u32); |
| @@ -557,6 +565,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 557 | auto& dest_buffer = slot_buffers[buffer]; | 565 | auto& dest_buffer = slot_buffers[buffer]; |
| 558 | const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); | 566 | const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr()); |
| 559 | runtime.ClearBuffer(dest_buffer, offset, size, value); | 567 | runtime.ClearBuffer(dest_buffer, offset, size, value); |
| 568 | dest_buffer.UnmarkRegionAsCpuModified(*cpu_dst_address, size); | ||
| 560 | return true; | 569 | return true; |
| 561 | } | 570 | } |
| 562 | 571 | ||
| @@ -1482,6 +1491,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1482 | const VAddr end_address = start_address + range_size; | 1491 | const VAddr end_address = start_address + range_size; |
| 1483 | ForEachWrittenRange(start_address, range_size, add_download); | 1492 | ForEachWrittenRange(start_address, range_size, add_download); |
| 1484 | const IntervalType subtract_interval{start_address, end_address}; | 1493 | const IntervalType subtract_interval{start_address, end_address}; |
| 1494 | ClearDownload(subtract_interval); | ||
| 1485 | common_ranges.subtract(subtract_interval); | 1495 | common_ranges.subtract(subtract_interval); |
| 1486 | }); | 1496 | }); |
| 1487 | if (total_size_bytes == 0) { | 1497 | if (total_size_bytes == 0) { |