summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/core.cpp26
-rw-r--r--src/core/core.h11
-rw-r--r--src/core/gpu_dirty_memory_manager.h112
-rw-r--r--src/core/memory.cpp7
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h26
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h2
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp9
8 files changed, 183 insertions, 14 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b74fd0a58..deefeb301 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,6 +27,7 @@
27#include "core/file_sys/savedata_factory.h" 27#include "core/file_sys/savedata_factory.h"
28#include "core/file_sys/vfs_concat.h" 28#include "core/file_sys/vfs_concat.h"
29#include "core/file_sys/vfs_real.h" 29#include "core/file_sys/vfs_real.h"
30#include "core/gpu_dirty_memory_manager.h"
30#include "core/hid/hid_core.h" 31#include "core/hid/hid_core.h"
31#include "core/hle/kernel/k_memory_manager.h" 32#include "core/hle/kernel/k_memory_manager.h"
32#include "core/hle/kernel/k_process.h" 33#include "core/hle/kernel/k_process.h"
@@ -54,6 +55,7 @@
54#include "video_core/renderer_base.h" 55#include "video_core/renderer_base.h"
55#include "video_core/video_core.h" 56#include "video_core/video_core.h"
56 57
58
57MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); 59MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
58MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); 60MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
59MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); 61MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
@@ -540,6 +542,9 @@ struct System::Impl {
540 542
541 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; 543 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
542 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; 544 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
545
546 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
547 gpu_dirty_memory_write_manager{};
543}; 548};
544 549
545System::System() : impl{std::make_unique<Impl>(*this)} {} 550System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -629,10 +634,31 @@ void System::PrepareReschedule(const u32 core_index) {
629 impl->kernel.PrepareReschedule(core_index); 634 impl->kernel.PrepareReschedule(core_index);
630} 635}
631 636
637Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
638 const std::size_t core = impl->kernel.GetCurrentHostThreadID();
639 return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
640 ? core
641 : Core::Hardware::NUM_CPU_CORES - 1];
642}
643
644/// Provides a constant reference to the current gou dirty memory manager.
645const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
646 const std::size_t core = impl->kernel.GetCurrentHostThreadID();
647 return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
648 ? core
649 : Core::Hardware::NUM_CPU_CORES - 1];
650}
651
632size_t System::GetCurrentHostThreadID() const { 652size_t System::GetCurrentHostThreadID() const {
633 return impl->kernel.GetCurrentHostThreadID(); 653 return impl->kernel.GetCurrentHostThreadID();
634} 654}
635 655
656void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
657 for (auto& manager : impl->gpu_dirty_memory_write_manager) {
658 manager.Gather(callback);
659 }
660}
661
636PerfStatsResults System::GetAndResetPerfStats() { 662PerfStatsResults System::GetAndResetPerfStats() {
637 return impl->GetAndResetPerfStats(); 663 return impl->GetAndResetPerfStats();
638} 664}
diff --git a/src/core/core.h b/src/core/core.h
index 93afc9303..14b2f7785 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -108,9 +108,10 @@ class CpuManager;
108class Debugger; 108class Debugger;
109class DeviceMemory; 109class DeviceMemory;
110class ExclusiveMonitor; 110class ExclusiveMonitor;
111class SpeedLimiter; 111class GPUDirtyMemoryManager;
112class PerfStats; 112class PerfStats;
113class Reporter; 113class Reporter;
114class SpeedLimiter;
114class TelemetrySession; 115class TelemetrySession;
115 116
116struct PerfStatsResults; 117struct PerfStatsResults;
@@ -225,6 +226,14 @@ public:
225 /// Prepare the core emulation for a reschedule 226 /// Prepare the core emulation for a reschedule
226 void PrepareReschedule(u32 core_index); 227 void PrepareReschedule(u32 core_index);
227 228
229 /// Provides a reference to the gou dirty memory manager.
230 [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
231
232 /// Provides a constant reference to the current gou dirty memory manager.
233 [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
234
235 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
236
228 [[nodiscard]] size_t GetCurrentHostThreadID() const; 237 [[nodiscard]] size_t GetCurrentHostThreadID() const;
229 238
230 /// Gets and resets core performance statistics 239 /// Gets and resets core performance statistics
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
new file mode 100644
index 000000000..9c3d41d11
--- /dev/null
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -0,0 +1,112 @@
1#pragma once
2
3#include <atomic>
4#include <bit>
5#include <functional>
6#include <mutex>
7#include <utility>
8#include <vector>
9
10#include "core/memory.h"
11
12namespace Core {
13
14class GPUDirtyMemoryManager {
15public:
16 GPUDirtyMemoryManager() : current{default_transform} {}
17
18 ~GPUDirtyMemoryManager() = default;
19
20 void Collect(VAddr address, size_t size) {
21 TransformAddress t = BuildTransform(address, size);
22 TransformAddress tmp, original;
23 do {
24 tmp = current.load(std::memory_order_acquire);
25 original = tmp;
26 if (tmp.address != t.address) {
27 if (IsValid(tmp.address)) {
28 std::scoped_lock lk(guard);
29 back_buffer.emplace_back(tmp);
30 current.exchange(t, std::memory_order_relaxed);
31 return;
32 }
33 tmp.address = t.address;
34 tmp.mask = 0;
35 }
36 if ((tmp.mask | t.mask) == tmp.mask) {
37 return;
38 }
39 tmp.mask |= t.mask;
40 } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
41 std::memory_order_relaxed));
42 }
43
44 void Gather(std::function<void(VAddr, size_t)>& callback) {
45 {
46 std::scoped_lock lk(guard);
47 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
48 front_buffer.swap(back_buffer);
49 if (IsValid(t.address)) {
50 front_buffer.emplace_back(t);
51 }
52 }
53 for (auto& transform : front_buffer) {
54 size_t offset = 0;
55 u64 mask = transform.mask;
56 while (mask != 0) {
57 const size_t empty_bits = std::countr_zero(mask);
58 offset += empty_bits << align_bits;
59 mask = mask >> empty_bits;
60
61 const size_t continuous_bits = std::countr_one(mask);
62 callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
63 continuous_bits << align_bits);
64 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
65 offset += continuous_bits << align_bits;
66 }
67 }
68 front_buffer.clear();
69 }
70
71private:
72 struct alignas(16) TransformAddress {
73 VAddr address;
74 u64 mask;
75 };
76
77 constexpr static size_t align_bits = 6U;
78 constexpr static size_t align_size = 1U << align_bits;
79 constexpr static size_t align_mask = align_size - 1;
80 constexpr static TransformAddress default_transform = {.address = ~0ULL, .mask = 0ULL};
81
82 bool IsValid(VAddr address) {
83 return address < (1ULL << 39);
84 }
85
86 template <typename T>
87 T CreateMask(size_t top_bit, size_t minor_bit) {
88 T mask = ~T(0);
89 mask <<= (sizeof(T) * 8 - top_bit);
90 mask >>= (sizeof(T) * 8 - top_bit);
91 mask >>= minor_bit;
92 mask <<= minor_bit;
93 return mask;
94 }
95
96 TransformAddress BuildTransform(VAddr address, size_t size) {
97 const size_t minor_address = address & Memory::YUZU_PAGEMASK;
98 const size_t minor_bit = minor_address >> align_bits;
99 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
100 TransformAddress result{};
101 result.address = address >> Memory::YUZU_PAGEBITS;
102 result.mask = CreateMask<u64>(top_bit, minor_bit);
103 return result;
104 }
105
106 std::atomic<TransformAddress> current{};
107 std::mutex guard;
108 std::vector<TransformAddress> back_buffer;
109 std::vector<TransformAddress> front_buffer;
110};
111
112} // namespace Core
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 514ba0d66..60b246bdd 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -13,6 +13,7 @@
13#include "common/swap.h" 13#include "common/swap.h"
14#include "core/core.h" 14#include "core/core.h"
15#include "core/device_memory.h" 15#include "core/device_memory.h"
16#include "core/gpu_dirty_memory_manager.h"
16#include "core/hardware_properties.h" 17#include "core/hardware_properties.h"
17#include "core/hle/kernel/k_page_table.h" 18#include "core/hle/kernel/k_page_table.h"
18#include "core/hle/kernel/k_process.h" 19#include "core/hle/kernel/k_process.h"
@@ -678,7 +679,7 @@ struct Memory::Impl {
678 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, 679 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
679 GetInteger(vaddr), static_cast<u64>(data)); 680 GetInteger(vaddr), static_cast<u64>(data));
680 }, 681 },
681 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); 682 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
682 if (ptr) { 683 if (ptr) {
683 std::memcpy(ptr, &data, sizeof(T)); 684 std::memcpy(ptr, &data, sizeof(T));
684 } 685 }
@@ -692,7 +693,7 @@ struct Memory::Impl {
692 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", 693 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
693 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); 694 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
694 }, 695 },
695 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); 696 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
696 if (ptr) { 697 if (ptr) {
697 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); 698 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
698 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 699 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -707,7 +708,7 @@ struct Memory::Impl {
707 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", 708 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
708 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); 709 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
709 }, 710 },
710 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); 711 [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
711 if (ptr) { 712 if (ptr) {
712 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); 713 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
713 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 714 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 58a45ab67..9239ad862 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -115,7 +115,21 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
115 115
116template <class P> 116template <class P>
117void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 117void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
118 memory_tracker.CachedCpuWrite(cpu_addr, size); 118 const bool is_dirty = IsRegionRegistered(cpu_addr, size);
119 if (!is_dirty) {
120 return;
121 }
122 VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
123 VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
124 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
125 WriteMemory(cpu_addr, size);
126 return;
127 }
128
129 tmp_buffer.resize_destructive(size);
130 cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
131
132 InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
119} 133}
120 134
121template <class P> 135template <class P>
@@ -1553,6 +1567,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1553 return false; 1567 return false;
1554 } 1568 }
1555 1569
1570 InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
1571
1572 return true;
1573}
1574
1575template <class P>
1576void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
1577 std::span<const u8> inlined_buffer) {
1556 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1578 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1557 ClearDownload(subtract_interval); 1579 ClearDownload(subtract_interval);
1558 common_ranges.subtract(subtract_interval); 1580 common_ranges.subtract(subtract_interval);
@@ -1574,8 +1596,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1574 } else { 1596 } else {
1575 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); 1597 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
1576 } 1598 }
1577
1578 return true;
1579} 1599}
1580 1600
1581template <class P> 1601template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index fe6068cfe..4d9bab7f7 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -543,6 +543,8 @@ private:
543 543
544 void ClearDownload(IntervalType subtract_interval); 544 void ClearDownload(IntervalType subtract_interval);
545 545
546 void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
547
546 VideoCore::RasterizerInterface& rasterizer; 548 VideoCore::RasterizerInterface& rasterizer;
547 Core::Memory::Memory& cpu_memory; 549 Core::Memory::Memory& cpu_memory;
548 550
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index db385076d..f823a1e2b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,7 +95,9 @@ struct GPU::Impl {
95 95
96 /// Synchronizes CPU writes with Host GPU memory. 96 /// Synchronizes CPU writes with Host GPU memory.
97 void InvalidateGPUCache() { 97 void InvalidateGPUCache() {
98 rasterizer->InvalidateGPUCache(); 98 std::function<void(VAddr, size_t)> callback_writes(
99 [this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); });
100 system.GatherGPUDirtyMemory(callback_writes);
99 } 101 }
100 102
101 /// Signal the ending of command list. 103 /// Signal the ending of command list.
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f7c0d939a..a63a29e61 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -570,7 +570,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
570 if (addr == 0 || size == 0) { 570 if (addr == 0 || size == 0) {
571 return; 571 return;
572 } 572 }
573 pipeline_cache.OnCPUWrite(addr, size); 573
574 { 574 {
575 std::scoped_lock lock{texture_cache.mutex}; 575 std::scoped_lock lock{texture_cache.mutex};
576 texture_cache.WriteMemory(addr, size); 576 texture_cache.WriteMemory(addr, size);
@@ -579,14 +579,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
579 std::scoped_lock lock{buffer_cache.mutex}; 579 std::scoped_lock lock{buffer_cache.mutex};
580 buffer_cache.CachedWriteMemory(addr, size); 580 buffer_cache.CachedWriteMemory(addr, size);
581 } 581 }
582 pipeline_cache.InvalidateRegion(addr, size);
582} 583}
583 584
584void RasterizerVulkan::InvalidateGPUCache() { 585void RasterizerVulkan::InvalidateGPUCache() {
585 pipeline_cache.SyncGuestHost(); 586 gpu.InvalidateGPUCache();
586 {
587 std::scoped_lock lock{buffer_cache.mutex};
588 buffer_cache.FlushCachedWrites();
589 }
590} 587}
591 588
592void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 589void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {