diff options
| author | 2023-06-29 12:24:56 +0200 | |
|---|---|---|
| committer | 2023-06-29 12:25:12 +0200 | |
| commit | 0e6b559c98e3dee54c3c9eaef2d3e59f3871882d (patch) | |
| tree | 7bbbb964bab0734f2031ca528a50422ac2b84eac | |
| parent | Memory Tracking: Optimize tracking to only use atomic writes when contested w... (diff) | |
| download | yuzu-0e6b559c98e3dee54c3c9eaef2d3e59f3871882d.tar.gz yuzu-0e6b559c98e3dee54c3c9eaef2d3e59f3871882d.tar.xz yuzu-0e6b559c98e3dee54c3c9eaef2d3e59f3871882d.zip | |
Memory Tracker: Use 64 bit atomics instead of 128 bits
| -rw-r--r-- | src/core/gpu_dirty_memory_manager.h | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 789b7530f..9687531e8 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -16,7 +16,10 @@ namespace Core { | |||
| 16 | 16 | ||
| 17 | class GPUDirtyMemoryManager { | 17 | class GPUDirtyMemoryManager { |
| 18 | public: | 18 | public: |
| 19 | GPUDirtyMemoryManager() : current{default_transform} {} | 19 | GPUDirtyMemoryManager() : current{default_transform} { |
| 20 | back_buffer.reserve(256); | ||
| 21 | front_buffer.reserve(256); | ||
| 22 | } | ||
| 20 | 23 | ||
| 21 | ~GPUDirtyMemoryManager() = default; | 24 | ~GPUDirtyMemoryManager() = default; |
| 22 | 25 | ||
| @@ -62,7 +65,8 @@ public: | |||
| 62 | mask = mask >> empty_bits; | 65 | mask = mask >> empty_bits; |
| 63 | 66 | ||
| 64 | const size_t continuous_bits = std::countr_one(mask); | 67 | const size_t continuous_bits = std::countr_one(mask); |
| 65 | callback((transform.address << page_bits) + offset, continuous_bits << align_bits); | 68 | callback((static_cast<VAddr>(transform.address) << page_bits) + offset, |
| 69 | continuous_bits << align_bits); | ||
| 66 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
| 67 | offset += continuous_bits << align_bits; | 71 | offset += continuous_bits << align_bits; |
| 68 | } | 72 | } |
| @@ -71,19 +75,19 @@ public: | |||
| 71 | } | 75 | } |
| 72 | 76 | ||
| 73 | private: | 77 | private: |
| 74 | struct alignas(16) TransformAddress { | 78 | struct alignas(8) TransformAddress { |
| 75 | VAddr address; | 79 | u32 address; |
| 76 | u64 mask; | 80 | u32 mask; |
| 77 | }; | 81 | }; |
| 78 | 82 | ||
| 79 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; | 83 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; |
| 80 | constexpr static size_t page_size = 1ULL << page_bits; | 84 | constexpr static size_t page_size = 1ULL << page_bits; |
| 81 | constexpr static size_t page_mask = page_size - 1; | 85 | constexpr static size_t page_mask = page_size - 1; |
| 82 | 86 | ||
| 83 | constexpr static size_t align_bits = 6U; | 87 | constexpr static size_t align_bits = 6U; |
| 84 | constexpr static size_t align_size = 1U << align_bits; | 88 | constexpr static size_t align_size = 1U << align_bits; |
| 85 | constexpr static size_t align_mask = align_size - 1; | 89 | constexpr static size_t align_mask = align_size - 1; |
| 86 | constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL}; | 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; |
| 87 | 91 | ||
| 88 | bool IsValid(VAddr address) { | 92 | bool IsValid(VAddr address) { |
| 89 | return address < (1ULL << 39); | 93 | return address < (1ULL << 39); |
| @@ -104,8 +108,8 @@ private: | |||
| 104 | const size_t minor_bit = minor_address >> align_bits; | 108 | const size_t minor_bit = minor_address >> align_bits; |
| 105 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
| 106 | TransformAddress result{}; | 110 | TransformAddress result{}; |
| 107 | result.address = address >> page_bits; | 111 | result.address = static_cast<u32>(address >> page_bits); |
| 108 | result.mask = CreateMask<u64>(top_bit, minor_bit); | 112 | result.mask = CreateMask<u32>(top_bit, minor_bit); |
| 109 | return result; | 113 | return result; |
| 110 | } | 114 | } |
| 111 | 115 | ||