diff options
| author | 2023-06-28 19:32:50 +0200 | |
|---|---|---|
| committer | 2023-06-28 21:32:45 +0200 | |
| commit | da440da9f54cc860f3c69da685a415d5ec9d7b64 (patch) | |
| tree | 5a7a4a56462244970e1356a723e6a8a77477f820 /src/core/memory.cpp | |
| parent | MemoryTracking: Initial setup of atomic writes. (diff) | |
| download | yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.gz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.xz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.zip | |
Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
Diffstat (limited to 'src/core/memory.cpp')
| -rw-r--r-- | src/core/memory.cpp | 39 |
1 files changed, 33 insertions, 6 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 60b246bdd..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <span> | ||
| 6 | 7 | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/atomic_ops.h" | 9 | #include "common/atomic_ops.h" |
| @@ -679,7 +680,7 @@ struct Memory::Impl { | |||
| 679 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | 680 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, |
| 680 | GetInteger(vaddr), static_cast<u64>(data)); | 681 | GetInteger(vaddr), static_cast<u64>(data)); |
| 681 | }, | 682 | }, |
| 682 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 683 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 683 | if (ptr) { | 684 | if (ptr) { |
| 684 | std::memcpy(ptr, &data, sizeof(T)); | 685 | std::memcpy(ptr, &data, sizeof(T)); |
| 685 | } | 686 | } |
| @@ -693,7 +694,7 @@ struct Memory::Impl { | |||
| 693 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | 694 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", |
| 694 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | 695 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); |
| 695 | }, | 696 | }, |
| 696 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 697 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 697 | if (ptr) { | 698 | if (ptr) { |
| 698 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | 699 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); |
| 699 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 700 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -708,7 +709,7 @@ struct Memory::Impl { | |||
| 708 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | 709 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", |
| 709 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | 710 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); |
| 710 | }, | 711 | }, |
| 711 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); | 712 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); |
| 712 | if (ptr) { | 713 | if (ptr) { |
| 713 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | 714 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); |
| 714 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 715 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -718,7 +719,7 @@ struct Memory::Impl { | |||
| 718 | 719 | ||
| 719 | void HandleRasterizerDownload(VAddr address, size_t size) { | 720 | void HandleRasterizerDownload(VAddr address, size_t size) { |
| 720 | const size_t core = system.GetCurrentHostThreadID(); | 721 | const size_t core = system.GetCurrentHostThreadID(); |
| 721 | auto& current_area = rasterizer_areas[core]; | 722 | auto& current_area = rasterizer_read_areas[core]; |
| 722 | const VAddr end_address = address + size; | 723 | const VAddr end_address = address + size; |
| 723 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 724 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 724 | [[likely]] { | 725 | [[likely]] { |
| @@ -727,9 +728,31 @@ struct Memory::Impl { | |||
| 727 | current_area = system.GPU().OnCPURead(address, size); | 728 | current_area = system.GPU().OnCPURead(address, size); |
| 728 | } | 729 | } |
| 729 | 730 | ||
| 730 | Common::PageTable* current_page_table = nullptr; | 731 | void HandleRasterizerWrite(VAddr address, size_t size) { |
| 731 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | 732 | const size_t core = system.GetCurrentHostThreadID(); |
| 733 | auto& current_area = rasterizer_write_areas[core]; | ||
| 734 | VAddr subaddress = address >> YUZU_PAGEBITS; | ||
| 735 | bool do_collection = current_area.last_address == subaddress; | ||
| 736 | if (!do_collection) [[unlikely]] { | ||
| 737 | do_collection = system.GPU().OnCPUWrite(address, size); | ||
| 738 | if (!do_collection) { | ||
| 739 | return; | ||
| 740 | } | ||
| 741 | current_area.last_address = subaddress; | ||
| 742 | } | ||
| 743 | gpu_dirty_managers[core].Collect(address, size); | ||
| 744 | } | ||
| 745 | |||
| 746 | struct GPUDirtyState { | ||
| 747 | VAddr last_address; | ||
| 748 | }; | ||
| 749 | |||
| 732 | Core::System& system; | 750 | Core::System& system; |
| 751 | Common::PageTable* current_page_table = nullptr; | ||
| 752 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||
| 753 | rasterizer_read_areas{}; | ||
| 754 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||
| 755 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||
| 733 | }; | 756 | }; |
| 734 | 757 | ||
| 735 | Memory::Memory(Core::System& system_) : system{system_} { | 758 | Memory::Memory(Core::System& system_) : system{system_} { |
| @@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | |||
| 877 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | 900 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); |
| 878 | } | 901 | } |
| 879 | 902 | ||
| 903 | void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||
| 904 | impl->gpu_dirty_managers = managers; | ||
| 905 | } | ||
| 906 | |||
| 880 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | 907 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { |
| 881 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | 908 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); |
| 882 | } | 909 | } |