summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-06-28 19:32:50 +0200
committerGravatar Fernando Sahmkow2023-06-28 21:32:45 +0200
commitda440da9f54cc860f3c69da685a415d5ec9d7b64 (patch)
tree5a7a4a56462244970e1356a723e6a8a77477f820 /src/core
parentMemoryTracking: Initial setup of atomic writes. (diff)
downloadyuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.gz
yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.xz
yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.zip
Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
Diffstat (limited to 'src/core')
-rw-r--r--src/core/core.cpp8
-rw-r--r--src/core/gpu_dirty_memory_manager.h14
-rw-r--r--src/core/memory.cpp39
-rw-r--r--src/core/memory.h6
4 files changed, 54 insertions, 13 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index deefeb301..9e3eb3795 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -55,7 +55,6 @@
55#include "video_core/renderer_base.h" 55#include "video_core/renderer_base.h"
56#include "video_core/video_core.h" 56#include "video_core/video_core.h"
57 57
58
59MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); 58MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
60MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); 59MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
61MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); 60MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
@@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
132struct System::Impl { 131struct System::Impl {
133 explicit Impl(System& system) 132 explicit Impl(System& system)
134 : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, 133 : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
135 cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} 134 cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
135 gpu_dirty_memory_write_manager{} {
136 memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
137 }
136 138
137 void Initialize(System& system) { 139 void Initialize(System& system) {
138 device_memory = std::make_unique<Core::DeviceMemory>(); 140 device_memory = std::make_unique<Core::DeviceMemory>();
@@ -236,6 +238,8 @@ struct System::Impl {
236 // Setting changes may require a full system reinitialization (e.g., disabling multicore). 238 // Setting changes may require a full system reinitialization (e.g., disabling multicore).
237 ReinitializeIfNecessary(system); 239 ReinitializeIfNecessary(system);
238 240
241 memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
242
239 kernel.Initialize(); 243 kernel.Initialize();
240 cpu_manager.Initialize(); 244 cpu_manager.Initialize();
241 245
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
index 9c3d41d11..789b7530f 100644
--- a/src/core/gpu_dirty_memory_manager.h
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -1,3 +1,6 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
1#pragma once 4#pragma once
2 5
3#include <atomic> 6#include <atomic>
@@ -59,8 +62,7 @@ public:
59 mask = mask >> empty_bits; 62 mask = mask >> empty_bits;
60 63
61 const size_t continuous_bits = std::countr_one(mask); 64 const size_t continuous_bits = std::countr_one(mask);
62 callback((transform.address << Memory::YUZU_PAGEBITS) + offset, 65 callback((transform.address << page_bits) + offset, continuous_bits << align_bits);
63 continuous_bits << align_bits);
64 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; 66 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
65 offset += continuous_bits << align_bits; 67 offset += continuous_bits << align_bits;
66 } 68 }
@@ -74,6 +76,10 @@ private:
74 u64 mask; 76 u64 mask;
75 }; 77 };
76 78
79 constexpr static size_t page_bits = Memory::YUZU_PAGEBITS;
80 constexpr static size_t page_size = 1ULL << page_bits;
81 constexpr static size_t page_mask = page_size - 1;
82
77 constexpr static size_t align_bits = 6U; 83 constexpr static size_t align_bits = 6U;
78 constexpr static size_t align_size = 1U << align_bits; 84 constexpr static size_t align_size = 1U << align_bits;
79 constexpr static size_t align_mask = align_size - 1; 85 constexpr static size_t align_mask = align_size - 1;
@@ -94,11 +100,11 @@ private:
94 } 100 }
95 101
96 TransformAddress BuildTransform(VAddr address, size_t size) { 102 TransformAddress BuildTransform(VAddr address, size_t size) {
97 const size_t minor_address = address & Memory::YUZU_PAGEMASK; 103 const size_t minor_address = address & page_mask;
98 const size_t minor_bit = minor_address >> align_bits; 104 const size_t minor_bit = minor_address >> align_bits;
99 const size_t top_bit = (minor_address + size + align_mask) >> align_bits; 105 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
100 TransformAddress result{}; 106 TransformAddress result{};
101 result.address = address >> Memory::YUZU_PAGEBITS; 107 result.address = address >> page_bits;
102 result.mask = CreateMask<u64>(top_bit, minor_bit); 108 result.mask = CreateMask<u64>(top_bit, minor_bit);
103 return result; 109 return result;
104 } 110 }
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 60b246bdd..257406f09 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,6 +3,7 @@
3 3
4#include <algorithm> 4#include <algorithm>
5#include <cstring> 5#include <cstring>
6#include <span>
6 7
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/atomic_ops.h" 9#include "common/atomic_ops.h"
@@ -679,7 +680,7 @@ struct Memory::Impl {
679 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, 680 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
680 GetInteger(vaddr), static_cast<u64>(data)); 681 GetInteger(vaddr), static_cast<u64>(data));
681 }, 682 },
682 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); 683 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
683 if (ptr) { 684 if (ptr) {
684 std::memcpy(ptr, &data, sizeof(T)); 685 std::memcpy(ptr, &data, sizeof(T));
685 } 686 }
@@ -693,7 +694,7 @@ struct Memory::Impl {
693 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", 694 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
694 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); 695 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
695 }, 696 },
696 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); 697 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
697 if (ptr) { 698 if (ptr) {
698 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); 699 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
699 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 700 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -708,7 +709,7 @@ struct Memory::Impl {
708 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", 709 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
709 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); 710 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
710 }, 711 },
711 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); 712 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
712 if (ptr) { 713 if (ptr) {
713 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); 714 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
714 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 715 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -718,7 +719,7 @@ struct Memory::Impl {
718 719
719 void HandleRasterizerDownload(VAddr address, size_t size) { 720 void HandleRasterizerDownload(VAddr address, size_t size) {
720 const size_t core = system.GetCurrentHostThreadID(); 721 const size_t core = system.GetCurrentHostThreadID();
721 auto& current_area = rasterizer_areas[core]; 722 auto& current_area = rasterizer_read_areas[core];
722 const VAddr end_address = address + size; 723 const VAddr end_address = address + size;
723 if (current_area.start_address <= address && end_address <= current_area.end_address) 724 if (current_area.start_address <= address && end_address <= current_area.end_address)
724 [[likely]] { 725 [[likely]] {
@@ -727,9 +728,31 @@ struct Memory::Impl {
727 current_area = system.GPU().OnCPURead(address, size); 728 current_area = system.GPU().OnCPURead(address, size);
728 } 729 }
729 730
730 Common::PageTable* current_page_table = nullptr; 731 void HandleRasterizerWrite(VAddr address, size_t size) {
731 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; 732 const size_t core = system.GetCurrentHostThreadID();
733 auto& current_area = rasterizer_write_areas[core];
734 VAddr subaddress = address >> YUZU_PAGEBITS;
735 bool do_collection = current_area.last_address == subaddress;
736 if (!do_collection) [[unlikely]] {
737 do_collection = system.GPU().OnCPUWrite(address, size);
738 if (!do_collection) {
739 return;
740 }
741 current_area.last_address = subaddress;
742 }
743 gpu_dirty_managers[core].Collect(address, size);
744 }
745
746 struct GPUDirtyState {
747 VAddr last_address;
748 };
749
732 Core::System& system; 750 Core::System& system;
751 Common::PageTable* current_page_table = nullptr;
752 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
753 rasterizer_read_areas{};
754 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
755 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
733}; 756};
734 757
735Memory::Memory(Core::System& system_) : system{system_} { 758Memory::Memory(Core::System& system_) : system{system_} {
@@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
877 impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); 900 impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
878} 901}
879 902
903void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
904 impl->gpu_dirty_managers = managers;
905}
906
880Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { 907Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
881 return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); 908 return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
882} 909}
diff --git a/src/core/memory.h b/src/core/memory.h
index 72a0be813..ea01824f8 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,6 +5,7 @@
5 5
6#include <cstddef> 6#include <cstddef>
7#include <memory> 7#include <memory>
8#include <span>
8#include <string> 9#include <string>
9#include "common/typed_address.h" 10#include "common/typed_address.h"
10#include "core/hle/result.h" 11#include "core/hle/result.h"
@@ -15,7 +16,8 @@ struct PageTable;
15 16
16namespace Core { 17namespace Core {
17class System; 18class System;
18} 19class GPUDirtyMemoryManager;
20} // namespace Core
19 21
20namespace Kernel { 22namespace Kernel {
21class PhysicalMemory; 23class PhysicalMemory;
@@ -458,6 +460,8 @@ public:
458 */ 460 */
459 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 461 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
460 462
463 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
464
461private: 465private:
462 Core::System& system; 466 Core::System& system;
463 467