diff options
| author | 2024-01-22 10:55:39 -0500 | |
|---|---|---|
| committer | 2024-01-22 10:55:39 -0500 | |
| commit | 8bd10473d60503c7acddc399604a51b9c9947541 (patch) | |
| tree | f713f84942681321fca27ba028e31d6c74a09013 /src/core/memory.cpp | |
| parent | Merge pull request #12747 from t895/homescreen-widget (diff) | |
| parent | device_memory_manager: use unique_lock for update (diff) | |
| download | yuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.gz yuzu-8bd10473d60503c7acddc399604a51b9c9947541.tar.xz yuzu-8bd10473d60503c7acddc399604a51b9c9947541.zip | |
Merge pull request #12579 from FernandoS27/smmu
Core: Implement Device Mapping & GPU SMMU
Diffstat (limited to 'src/core/memory.cpp')
| -rw-r--r-- | src/core/memory.cpp | 108 |
1 files changed, 60 insertions, 48 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 8176a41be..1c218566f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include "core/hle/kernel/k_process.h" | 24 | #include "core/hle/kernel/k_process.h" |
| 25 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 26 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 27 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||
| 28 | #include "video_core/host1x/host1x.h" | ||
| 27 | #include "video_core/rasterizer_download_area.h" | 29 | #include "video_core/rasterizer_download_area.h" |
| 28 | 30 | ||
| 29 | namespace Core::Memory { | 31 | namespace Core::Memory { |
| @@ -637,17 +639,6 @@ struct Memory::Impl { | |||
| 637 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), | 639 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), |
| 638 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | 640 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); |
| 639 | 641 | ||
| 640 | // During boot, current_page_table might not be set yet, in which case we need not flush | ||
| 641 | if (system.IsPoweredOn()) { | ||
| 642 | auto& gpu = system.GPU(); | ||
| 643 | for (u64 i = 0; i < size; i++) { | ||
| 644 | const auto page = base + i; | ||
| 645 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | ||
| 646 | gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | const auto end = base + size; | 642 | const auto end = base + size; |
| 652 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | 643 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| 653 | base + page_table.pointers.size()); | 644 | base + page_table.pointers.size()); |
| @@ -811,21 +802,33 @@ struct Memory::Impl { | |||
| 811 | return true; | 802 | return true; |
| 812 | } | 803 | } |
| 813 | 804 | ||
| 814 | void HandleRasterizerDownload(VAddr address, size_t size) { | 805 | void HandleRasterizerDownload(VAddr v_address, size_t size) { |
| 806 | const auto* p = GetPointerImpl( | ||
| 807 | v_address, []() {}, []() {}); | ||
| 808 | if (!gpu_device_memory) [[unlikely]] { | ||
| 809 | gpu_device_memory = &system.Host1x().MemoryManager(); | ||
| 810 | } | ||
| 815 | const size_t core = system.GetCurrentHostThreadID(); | 811 | const size_t core = system.GetCurrentHostThreadID(); |
| 816 | auto& current_area = rasterizer_read_areas[core]; | 812 | auto& current_area = rasterizer_read_areas[core]; |
| 817 | const VAddr end_address = address + size; | 813 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 818 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 814 | const DAddr end_address = address + size; |
| 819 | [[likely]] { | 815 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 820 | return; | 816 | [[likely]] { |
| 821 | } | 817 | return; |
| 822 | current_area = system.GPU().OnCPURead(address, size); | 818 | } |
| 819 | current_area = system.GPU().OnCPURead(address, size); | ||
| 820 | }); | ||
| 823 | } | 821 | } |
| 824 | 822 | ||
| 825 | void HandleRasterizerWrite(VAddr address, size_t size) { | 823 | void HandleRasterizerWrite(VAddr v_address, size_t size) { |
| 824 | const auto* p = GetPointerImpl( | ||
| 825 | v_address, []() {}, []() {}); | ||
| 826 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | 826 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 827 | const size_t core = std::min(system.GetCurrentHostThreadID(), | 827 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 828 | sys_core); // any other calls threads go to syscore. | 828 | sys_core); // any other calls threads go to syscore. |
| 829 | if (!gpu_device_memory) [[unlikely]] { | ||
| 830 | gpu_device_memory = &system.Host1x().MemoryManager(); | ||
| 831 | } | ||
| 829 | // Guard on sys_core; | 832 | // Guard on sys_core; |
| 830 | if (core == sys_core) [[unlikely]] { | 833 | if (core == sys_core) [[unlikely]] { |
| 831 | sys_core_guard.lock(); | 834 | sys_core_guard.lock(); |
| @@ -835,36 +838,53 @@ struct Memory::Impl { | |||
| 835 | sys_core_guard.unlock(); | 838 | sys_core_guard.unlock(); |
| 836 | } | 839 | } |
| 837 | }); | 840 | }); |
| 838 | auto& current_area = rasterizer_write_areas[core]; | 841 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 839 | VAddr subaddress = address >> YUZU_PAGEBITS; | 842 | auto& current_area = rasterizer_write_areas[core]; |
| 840 | bool do_collection = current_area.last_address == subaddress; | 843 | PAddr subaddress = address >> YUZU_PAGEBITS; |
| 841 | if (!do_collection) [[unlikely]] { | 844 | bool do_collection = current_area.last_address == subaddress; |
| 842 | do_collection = system.GPU().OnCPUWrite(address, size); | 845 | if (!do_collection) [[unlikely]] { |
| 843 | if (!do_collection) { | 846 | do_collection = system.GPU().OnCPUWrite(address, size); |
| 844 | return; | 847 | if (!do_collection) { |
| 848 | return; | ||
| 849 | } | ||
| 850 | current_area.last_address = subaddress; | ||
| 845 | } | 851 | } |
| 846 | current_area.last_address = subaddress; | 852 | gpu_dirty_managers[core].Collect(address, size); |
| 847 | } | 853 | }); |
| 848 | gpu_dirty_managers[core].Collect(address, size); | ||
| 849 | } | 854 | } |
| 850 | 855 | ||
| 851 | struct GPUDirtyState { | 856 | struct GPUDirtyState { |
| 852 | VAddr last_address; | 857 | PAddr last_address; |
| 853 | }; | 858 | }; |
| 854 | 859 | ||
| 855 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | 860 | void InvalidateGPUMemory(u8* p, size_t size) { |
| 856 | system.GPU().InvalidateRegion(GetInteger(dest_addr), size); | 861 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 857 | } | 862 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 858 | 863 | sys_core); // any other calls threads go to syscore. | |
| 859 | void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | 864 | if (!gpu_device_memory) [[unlikely]] { |
| 860 | system.GPU().FlushRegion(GetInteger(dest_addr), size); | 865 | gpu_device_memory = &system.Host1x().MemoryManager(); |
| 866 | } | ||
| 867 | // Guard on sys_core; | ||
| 868 | if (core == sys_core) [[unlikely]] { | ||
| 869 | sys_core_guard.lock(); | ||
| 870 | } | ||
| 871 | SCOPE_EXIT({ | ||
| 872 | if (core == sys_core) [[unlikely]] { | ||
| 873 | sys_core_guard.unlock(); | ||
| 874 | } | ||
| 875 | }); | ||
| 876 | auto& gpu = system.GPU(); | ||
| 877 | gpu_device_memory->ApplyOpOnPointer( | ||
| 878 | p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); }); | ||
| 861 | } | 879 | } |
| 862 | 880 | ||
| 863 | Core::System& system; | 881 | Core::System& system; |
| 882 | Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{}; | ||
| 864 | Common::PageTable* current_page_table = nullptr; | 883 | Common::PageTable* current_page_table = nullptr; |
| 865 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | 884 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> |
| 866 | rasterizer_read_areas{}; | 885 | rasterizer_read_areas{}; |
| 867 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | 886 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; |
| 887 | std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{}; | ||
| 868 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | 888 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; |
| 869 | std::mutex sys_core_guard; | 889 | std::mutex sys_core_guard; |
| 870 | 890 | ||
| @@ -1059,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) | |||
| 1059 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); | 1079 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); |
| 1060 | } | 1080 | } |
| 1061 | 1081 | ||
| 1062 | void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1063 | impl->InvalidateRegion(dest_addr, size); | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1067 | impl->FlushRegion(dest_addr, size); | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | 1082 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { |
| 1071 | [[maybe_unused]] bool mapped = true; | 1083 | [[maybe_unused]] bool mapped = true; |
| 1072 | [[maybe_unused]] bool rasterizer = false; | 1084 | [[maybe_unused]] bool rasterizer = false; |
| @@ -1078,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | |||
| 1078 | GetInteger(vaddr)); | 1090 | GetInteger(vaddr)); |
| 1079 | mapped = false; | 1091 | mapped = false; |
| 1080 | }, | 1092 | }, |
| 1081 | [&] { | 1093 | [&] { rasterizer = true; }); |
| 1082 | impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); | 1094 | if (rasterizer) { |
| 1083 | rasterizer = true; | 1095 | impl->InvalidateGPUMemory(ptr, size); |
| 1084 | }); | 1096 | } |
| 1085 | 1097 | ||
| 1086 | #ifdef __linux__ | 1098 | #ifdef __linux__ |
| 1087 | if (!rasterizer && mapped) { | 1099 | if (!rasterizer && mapped) { |