diff options
| author | 2023-06-28 19:32:50 +0200 | |
|---|---|---|
| committer | 2023-06-28 21:32:45 +0200 | |
| commit | da440da9f54cc860f3c69da685a415d5ec9d7b64 (patch) | |
| tree | 5a7a4a56462244970e1356a723e6a8a77477f820 /src/core | |
| parent | MemoryTracking: Initial setup of atomic writes. (diff) | |
| download | yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.gz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.xz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.zip | |
Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
Diffstat (limited to 'src/core')
| -rw-r--r-- | src/core/core.cpp | 8 | ||||
| -rw-r--r-- | src/core/gpu_dirty_memory_manager.h | 14 | ||||
| -rw-r--r-- | src/core/memory.cpp | 39 | ||||
| -rw-r--r-- | src/core/memory.h | 6 |
4 files changed, 54 insertions, 13 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index deefeb301..9e3eb3795 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #include "video_core/renderer_base.h" | 55 | #include "video_core/renderer_base.h" |
| 56 | #include "video_core/video_core.h" | 56 | #include "video_core/video_core.h" |
| 57 | 57 | ||
| 58 | |||
| 59 | MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); | 58 | MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); |
| 60 | MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); | 59 | MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); |
| 61 | MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); | 60 | MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); |
| @@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 132 | struct System::Impl { | 131 | struct System::Impl { |
| 133 | explicit Impl(System& system) | 132 | explicit Impl(System& system) |
| 134 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, | 133 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, |
| 135 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} | 134 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, |
| 135 | gpu_dirty_memory_write_manager{} { | ||
| 136 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 137 | } | ||
| 136 | 138 | ||
| 137 | void Initialize(System& system) { | 139 | void Initialize(System& system) { |
| 138 | device_memory = std::make_unique<Core::DeviceMemory>(); | 140 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| @@ -236,6 +238,8 @@ struct System::Impl { | |||
| 236 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). | 238 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). |
| 237 | ReinitializeIfNecessary(system); | 239 | ReinitializeIfNecessary(system); |
| 238 | 240 | ||
| 241 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 242 | |||
| 239 | kernel.Initialize(); | 243 | kernel.Initialize(); |
| 240 | cpu_manager.Initialize(); | 244 | cpu_manager.Initialize(); |
| 241 | 245 | ||
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9c3d41d11..789b7530f 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 1 | #pragma once | 4 | #pragma once |
| 2 | 5 | ||
| 3 | #include <atomic> | 6 | #include <atomic> |
| @@ -59,8 +62,7 @@ public: | |||
| 59 | mask = mask >> empty_bits; | 62 | mask = mask >> empty_bits; |
| 60 | 63 | ||
| 61 | const size_t continuous_bits = std::countr_one(mask); | 64 | const size_t continuous_bits = std::countr_one(mask); |
| 62 | callback((transform.address << Memory::YUZU_PAGEBITS) + offset, | 65 | callback((transform.address << page_bits) + offset, continuous_bits << align_bits); |
| 63 | continuous_bits << align_bits); | ||
| 64 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | 66 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
| 65 | offset += continuous_bits << align_bits; | 67 | offset += continuous_bits << align_bits; |
| 66 | } | 68 | } |
| @@ -74,6 +76,10 @@ private: | |||
| 74 | u64 mask; | 76 | u64 mask; |
| 75 | }; | 77 | }; |
| 76 | 78 | ||
| 79 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; | ||
| 80 | constexpr static size_t page_size = 1ULL << page_bits; | ||
| 81 | constexpr static size_t page_mask = page_size - 1; | ||
| 82 | |||
| 77 | constexpr static size_t align_bits = 6U; | 83 | constexpr static size_t align_bits = 6U; |
| 78 | constexpr static size_t align_size = 1U << align_bits; | 84 | constexpr static size_t align_size = 1U << align_bits; |
| 79 | constexpr static size_t align_mask = align_size - 1; | 85 | constexpr static size_t align_mask = align_size - 1; |
| @@ -94,11 +100,11 @@ private: | |||
| 94 | } | 100 | } |
| 95 | 101 | ||
| 96 | TransformAddress BuildTransform(VAddr address, size_t size) { | 102 | TransformAddress BuildTransform(VAddr address, size_t size) { |
| 97 | const size_t minor_address = address & Memory::YUZU_PAGEMASK; | 103 | const size_t minor_address = address & page_mask; |
| 98 | const size_t minor_bit = minor_address >> align_bits; | 104 | const size_t minor_bit = minor_address >> align_bits; |
| 99 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | 105 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
| 100 | TransformAddress result{}; | 106 | TransformAddress result{}; |
| 101 | result.address = address >> Memory::YUZU_PAGEBITS; | 107 | result.address = address >> page_bits; |
| 102 | result.mask = CreateMask<u64>(top_bit, minor_bit); | 108 | result.mask = CreateMask<u64>(top_bit, minor_bit); |
| 103 | return result; | 109 | return result; |
| 104 | } | 110 | } |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 60b246bdd..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <span> | ||
| 6 | 7 | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/atomic_ops.h" | 9 | #include "common/atomic_ops.h" |
| @@ -679,7 +680,7 @@ struct Memory::Impl { | |||
| 679 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | 680 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, |
| 680 | GetInteger(vaddr), static_cast<u64>(data)); | 681 | GetInteger(vaddr), static_cast<u64>(data)); |
| 681 | }, | 682 | }, |
| 682 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 683 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 683 | if (ptr) { | 684 | if (ptr) { |
| 684 | std::memcpy(ptr, &data, sizeof(T)); | 685 | std::memcpy(ptr, &data, sizeof(T)); |
| 685 | } | 686 | } |
| @@ -693,7 +694,7 @@ struct Memory::Impl { | |||
| 693 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | 694 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", |
| 694 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | 695 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); |
| 695 | }, | 696 | }, |
| 696 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 697 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 697 | if (ptr) { | 698 | if (ptr) { |
| 698 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | 699 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); |
| 699 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 700 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -708,7 +709,7 @@ struct Memory::Impl { | |||
| 708 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | 709 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", |
| 709 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | 710 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); |
| 710 | }, | 711 | }, |
| 711 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); | 712 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); |
| 712 | if (ptr) { | 713 | if (ptr) { |
| 713 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | 714 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); |
| 714 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 715 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -718,7 +719,7 @@ struct Memory::Impl { | |||
| 718 | 719 | ||
| 719 | void HandleRasterizerDownload(VAddr address, size_t size) { | 720 | void HandleRasterizerDownload(VAddr address, size_t size) { |
| 720 | const size_t core = system.GetCurrentHostThreadID(); | 721 | const size_t core = system.GetCurrentHostThreadID(); |
| 721 | auto& current_area = rasterizer_areas[core]; | 722 | auto& current_area = rasterizer_read_areas[core]; |
| 722 | const VAddr end_address = address + size; | 723 | const VAddr end_address = address + size; |
| 723 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 724 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 724 | [[likely]] { | 725 | [[likely]] { |
| @@ -727,9 +728,31 @@ struct Memory::Impl { | |||
| 727 | current_area = system.GPU().OnCPURead(address, size); | 728 | current_area = system.GPU().OnCPURead(address, size); |
| 728 | } | 729 | } |
| 729 | 730 | ||
| 730 | Common::PageTable* current_page_table = nullptr; | 731 | void HandleRasterizerWrite(VAddr address, size_t size) { |
| 731 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | 732 | const size_t core = system.GetCurrentHostThreadID(); |
| 733 | auto& current_area = rasterizer_write_areas[core]; | ||
| 734 | VAddr subaddress = address >> YUZU_PAGEBITS; | ||
| 735 | bool do_collection = current_area.last_address == subaddress; | ||
| 736 | if (!do_collection) [[unlikely]] { | ||
| 737 | do_collection = system.GPU().OnCPUWrite(address, size); | ||
| 738 | if (!do_collection) { | ||
| 739 | return; | ||
| 740 | } | ||
| 741 | current_area.last_address = subaddress; | ||
| 742 | } | ||
| 743 | gpu_dirty_managers[core].Collect(address, size); | ||
| 744 | } | ||
| 745 | |||
| 746 | struct GPUDirtyState { | ||
| 747 | VAddr last_address; | ||
| 748 | }; | ||
| 749 | |||
| 732 | Core::System& system; | 750 | Core::System& system; |
| 751 | Common::PageTable* current_page_table = nullptr; | ||
| 752 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||
| 753 | rasterizer_read_areas{}; | ||
| 754 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||
| 755 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||
| 733 | }; | 756 | }; |
| 734 | 757 | ||
| 735 | Memory::Memory(Core::System& system_) : system{system_} { | 758 | Memory::Memory(Core::System& system_) : system{system_} { |
| @@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | |||
| 877 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | 900 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); |
| 878 | } | 901 | } |
| 879 | 902 | ||
| 903 | void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||
| 904 | impl->gpu_dirty_managers = managers; | ||
| 905 | } | ||
| 906 | |||
| 880 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | 907 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { |
| 881 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | 908 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); |
| 882 | } | 909 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 72a0be813..ea01824f8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <span> | ||
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include "common/typed_address.h" | 10 | #include "common/typed_address.h" |
| 10 | #include "core/hle/result.h" | 11 | #include "core/hle/result.h" |
| @@ -15,7 +16,8 @@ struct PageTable; | |||
| 15 | 16 | ||
| 16 | namespace Core { | 17 | namespace Core { |
| 17 | class System; | 18 | class System; |
| 18 | } | 19 | class GPUDirtyMemoryManager; |
| 20 | } // namespace Core | ||
| 19 | 21 | ||
| 20 | namespace Kernel { | 22 | namespace Kernel { |
| 21 | class PhysicalMemory; | 23 | class PhysicalMemory; |
| @@ -458,6 +460,8 @@ public: | |||
| 458 | */ | 460 | */ |
| 459 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | 461 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |
| 460 | 462 | ||
| 463 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||
| 464 | |||
| 461 | private: | 465 | private: |
| 462 | Core::System& system; | 466 | Core::System& system; |
| 463 | 467 | ||