diff options
| author | 2023-04-30 17:14:06 +0200 | |
|---|---|---|
| committer | 2023-05-07 23:46:12 +0200 | |
| commit | c6cac2ffaad4ac27f35cea25022d9c59c7ecfbf4 (patch) | |
| tree | 0d71092cfad84e47a193917028200e2fc216f5e6 /src/core/memory.cpp | |
| parent | Merge pull request #10097 from german77/nfp_full (diff) | |
| download | yuzu-c6cac2ffaad4ac27f35cea25022d9c59c7ecfbf4.tar.gz yuzu-c6cac2ffaad4ac27f35cea25022d9c59c7ecfbf4.tar.xz yuzu-c6cac2ffaad4ac27f35cea25022d9c59c7ecfbf4.zip | |
GPU: Add Reactive flushing
Diffstat (limited to 'src/core/memory.cpp')
| -rw-r--r-- | src/core/memory.cpp | 27 |
1 files changed, 21 insertions, 6 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a9667463f..7b79cb8bc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -13,10 +13,13 @@ | |||
| 13 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 14 | #include "core/core.h" | 14 | #include "core/core.h" |
| 15 | #include "core/device_memory.h" | 15 | #include "core/device_memory.h" |
| 16 | #include "core/hardware_properties.h" | ||
| 16 | #include "core/hle/kernel/k_page_table.h" | 17 | #include "core/hle/kernel/k_page_table.h" |
| 17 | #include "core/hle/kernel/k_process.h" | 18 | #include "core/hle/kernel/k_process.h" |
| 18 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 19 | #include "video_core/gpu.h" | 20 | #include "video_core/gpu.h" |
| 21 | #include "video_core/rasterizer_download_area.h" | ||
| 22 | |||
| 20 | 23 | ||
| 21 | namespace Core::Memory { | 24 | namespace Core::Memory { |
| 22 | 25 | ||
| @@ -243,7 +246,7 @@ struct Memory::Impl { | |||
| 243 | [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount, | 246 | [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount, |
| 244 | const u8* const host_ptr) { | 247 | const u8* const host_ptr) { |
| 245 | if constexpr (!UNSAFE) { | 248 | if constexpr (!UNSAFE) { |
| 246 | system.GPU().FlushRegion(GetInteger(current_vaddr), copy_amount); | 249 | HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount); |
| 247 | } | 250 | } |
| 248 | std::memcpy(dest_buffer, host_ptr, copy_amount); | 251 | std::memcpy(dest_buffer, host_ptr, copy_amount); |
| 249 | }, | 252 | }, |
| @@ -334,7 +337,7 @@ struct Memory::Impl { | |||
| 334 | }, | 337 | }, |
| 335 | [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount, | 338 | [&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount, |
| 336 | u8* const host_ptr) { | 339 | u8* const host_ptr) { |
| 337 | system.GPU().FlushRegion(GetInteger(current_vaddr), copy_amount); | 340 | HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount); |
| 338 | WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount); | 341 | WriteBlockImpl<false>(process, dest_addr, host_ptr, copy_amount); |
| 339 | }, | 342 | }, |
| 340 | [&](const std::size_t copy_amount) { | 343 | [&](const std::size_t copy_amount) { |
| @@ -373,7 +376,7 @@ struct Memory::Impl { | |||
| 373 | const std::size_t block_size) { | 376 | const std::size_t block_size) { |
| 374 | // dc ivac: Invalidate to point of coherency | 377 | // dc ivac: Invalidate to point of coherency |
| 375 | // GPU flush -> CPU invalidate | 378 | // GPU flush -> CPU invalidate |
| 376 | system.GPU().FlushRegion(GetInteger(current_vaddr), block_size); | 379 | HandleRasterizerDownload(GetInteger(current_vaddr), block_size); |
| 377 | }; | 380 | }; |
| 378 | return PerformCacheOperation(process, dest_addr, size, on_rasterizer); | 381 | return PerformCacheOperation(process, dest_addr, size, on_rasterizer); |
| 379 | } | 382 | } |
| @@ -462,8 +465,7 @@ struct Memory::Impl { | |||
| 462 | } | 465 | } |
| 463 | 466 | ||
| 464 | if (Settings::IsFastmemEnabled()) { | 467 | if (Settings::IsFastmemEnabled()) { |
| 465 | const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; | 468 | system.DeviceMemory().buffer.Protect(vaddr, size, !cached, !cached); |
| 466 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | ||
| 467 | } | 469 | } |
| 468 | 470 | ||
| 469 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU | 471 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU |
| @@ -651,7 +653,9 @@ struct Memory::Impl { | |||
| 651 | LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, | 653 | LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, |
| 652 | GetInteger(vaddr)); | 654 | GetInteger(vaddr)); |
| 653 | }, | 655 | }, |
| 654 | [&]() { system.GPU().FlushRegion(GetInteger(vaddr), sizeof(T)); }); | 656 | [&]() { |
| 657 | HandleRasterizerDownload(GetInteger(vaddr), sizeof(T)); | ||
| 658 | }); | ||
| 655 | if (ptr) { | 659 | if (ptr) { |
| 656 | std::memcpy(&result, ptr, sizeof(T)); | 660 | std::memcpy(&result, ptr, sizeof(T)); |
| 657 | } | 661 | } |
| @@ -712,7 +716,18 @@ struct Memory::Impl { | |||
| 712 | return true; | 716 | return true; |
| 713 | } | 717 | } |
| 714 | 718 | ||
| 719 | void HandleRasterizerDownload(VAddr address, size_t size) { | ||
| 720 | const size_t core = system.GetCurrentHostThreadID(); | ||
| 721 | auto& current_area = rasterizer_areas[core]; | ||
| 722 | const VAddr end_address = address + size; | ||
| 723 | if (current_area.start_address <= address && end_address <= current_area.end_address) [[likely]] { | ||
| 724 | return; | ||
| 725 | } | ||
| 726 | current_area = system.GPU().OnCPURead(address, size); | ||
| 727 | } | ||
| 728 | |||
| 715 | Common::PageTable* current_page_table = nullptr; | 729 | Common::PageTable* current_page_table = nullptr; |
| 730 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | ||
| 716 | Core::System& system; | 731 | Core::System& system; |
| 717 | }; | 732 | }; |
| 718 | 733 | ||