summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/core.cpp32
-rw-r--r--src/core/core.h11
-rw-r--r--src/core/gpu_dirty_memory_manager.h122
-rw-r--r--src/core/memory.cpp40
-rw-r--r--src/core/memory.h6
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h39
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h5
-rw-r--r--src/video_core/fence_manager.h2
-rw-r--r--src/video_core/gpu.cpp12
-rw-r--r--src/video_core/gpu.h4
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp5
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp35
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/shader_cache.cpp2
-rw-r--r--src/video_core/shader_cache.h2
20 files changed, 329 insertions, 41 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index b74fd0a58..9e3eb3795 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,6 +27,7 @@
27#include "core/file_sys/savedata_factory.h" 27#include "core/file_sys/savedata_factory.h"
28#include "core/file_sys/vfs_concat.h" 28#include "core/file_sys/vfs_concat.h"
29#include "core/file_sys/vfs_real.h" 29#include "core/file_sys/vfs_real.h"
30#include "core/gpu_dirty_memory_manager.h"
30#include "core/hid/hid_core.h" 31#include "core/hid/hid_core.h"
31#include "core/hle/kernel/k_memory_manager.h" 32#include "core/hle/kernel/k_memory_manager.h"
32#include "core/hle/kernel/k_process.h" 33#include "core/hle/kernel/k_process.h"
@@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
130struct System::Impl { 131struct System::Impl {
131 explicit Impl(System& system) 132 explicit Impl(System& system)
132 : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, 133 : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
133 cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} 134 cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
135 gpu_dirty_memory_write_manager{} {
136 memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
137 }
134 138
135 void Initialize(System& system) { 139 void Initialize(System& system) {
136 device_memory = std::make_unique<Core::DeviceMemory>(); 140 device_memory = std::make_unique<Core::DeviceMemory>();
@@ -234,6 +238,8 @@ struct System::Impl {
234 // Setting changes may require a full system reinitialization (e.g., disabling multicore). 238 // Setting changes may require a full system reinitialization (e.g., disabling multicore).
235 ReinitializeIfNecessary(system); 239 ReinitializeIfNecessary(system);
236 240
241 memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
242
237 kernel.Initialize(); 243 kernel.Initialize();
238 cpu_manager.Initialize(); 244 cpu_manager.Initialize();
239 245
@@ -540,6 +546,9 @@ struct System::Impl {
540 546
541 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; 547 std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
542 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; 548 std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{};
549
550 std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
551 gpu_dirty_memory_write_manager{};
543}; 552};
544 553
545System::System() : impl{std::make_unique<Impl>(*this)} {} 554System::System() : impl{std::make_unique<Impl>(*this)} {}
@@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) {
629 impl->kernel.PrepareReschedule(core_index); 638 impl->kernel.PrepareReschedule(core_index);
630} 639}
631 640
641Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() {
642 const std::size_t core = impl->kernel.GetCurrentHostThreadID();
643 return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
644 ? core
645 : Core::Hardware::NUM_CPU_CORES - 1];
646}
647
648/// Provides a constant reference to the current gou dirty memory manager.
649const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const {
650 const std::size_t core = impl->kernel.GetCurrentHostThreadID();
651 return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES
652 ? core
653 : Core::Hardware::NUM_CPU_CORES - 1];
654}
655
632size_t System::GetCurrentHostThreadID() const { 656size_t System::GetCurrentHostThreadID() const {
633 return impl->kernel.GetCurrentHostThreadID(); 657 return impl->kernel.GetCurrentHostThreadID();
634} 658}
635 659
660void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) {
661 for (auto& manager : impl->gpu_dirty_memory_write_manager) {
662 manager.Gather(callback);
663 }
664}
665
636PerfStatsResults System::GetAndResetPerfStats() { 666PerfStatsResults System::GetAndResetPerfStats() {
637 return impl->GetAndResetPerfStats(); 667 return impl->GetAndResetPerfStats();
638} 668}
diff --git a/src/core/core.h b/src/core/core.h
index 93afc9303..14b2f7785 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -108,9 +108,10 @@ class CpuManager;
108class Debugger; 108class Debugger;
109class DeviceMemory; 109class DeviceMemory;
110class ExclusiveMonitor; 110class ExclusiveMonitor;
111class SpeedLimiter; 111class GPUDirtyMemoryManager;
112class PerfStats; 112class PerfStats;
113class Reporter; 113class Reporter;
114class SpeedLimiter;
114class TelemetrySession; 115class TelemetrySession;
115 116
116struct PerfStatsResults; 117struct PerfStatsResults;
@@ -225,6 +226,14 @@ public:
225 /// Prepare the core emulation for a reschedule 226 /// Prepare the core emulation for a reschedule
226 void PrepareReschedule(u32 core_index); 227 void PrepareReschedule(u32 core_index);
227 228
229 /// Provides a reference to the gou dirty memory manager.
230 [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager();
231
232 /// Provides a constant reference to the current gou dirty memory manager.
233 [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const;
234
235 void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback);
236
228 [[nodiscard]] size_t GetCurrentHostThreadID() const; 237 [[nodiscard]] size_t GetCurrentHostThreadID() const;
229 238
230 /// Gets and resets core performance statistics 239 /// Gets and resets core performance statistics
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
new file mode 100644
index 000000000..9687531e8
--- /dev/null
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -0,0 +1,122 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <bit>
8#include <functional>
9#include <mutex>
10#include <utility>
11#include <vector>
12
13#include "core/memory.h"
14
15namespace Core {
16
17class GPUDirtyMemoryManager {
18public:
19 GPUDirtyMemoryManager() : current{default_transform} {
20 back_buffer.reserve(256);
21 front_buffer.reserve(256);
22 }
23
24 ~GPUDirtyMemoryManager() = default;
25
26 void Collect(VAddr address, size_t size) {
27 TransformAddress t = BuildTransform(address, size);
28 TransformAddress tmp, original;
29 do {
30 tmp = current.load(std::memory_order_acquire);
31 original = tmp;
32 if (tmp.address != t.address) {
33 if (IsValid(tmp.address)) {
34 std::scoped_lock lk(guard);
35 back_buffer.emplace_back(tmp);
36 current.exchange(t, std::memory_order_relaxed);
37 return;
38 }
39 tmp.address = t.address;
40 tmp.mask = 0;
41 }
42 if ((tmp.mask | t.mask) == tmp.mask) {
43 return;
44 }
45 tmp.mask |= t.mask;
46 } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release,
47 std::memory_order_relaxed));
48 }
49
50 void Gather(std::function<void(VAddr, size_t)>& callback) {
51 {
52 std::scoped_lock lk(guard);
53 TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed);
54 front_buffer.swap(back_buffer);
55 if (IsValid(t.address)) {
56 front_buffer.emplace_back(t);
57 }
58 }
59 for (auto& transform : front_buffer) {
60 size_t offset = 0;
61 u64 mask = transform.mask;
62 while (mask != 0) {
63 const size_t empty_bits = std::countr_zero(mask);
64 offset += empty_bits << align_bits;
65 mask = mask >> empty_bits;
66
67 const size_t continuous_bits = std::countr_one(mask);
68 callback((static_cast<VAddr>(transform.address) << page_bits) + offset,
69 continuous_bits << align_bits);
70 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
71 offset += continuous_bits << align_bits;
72 }
73 }
74 front_buffer.clear();
75 }
76
77private:
78 struct alignas(8) TransformAddress {
79 u32 address;
80 u32 mask;
81 };
82
83 constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1;
84 constexpr static size_t page_size = 1ULL << page_bits;
85 constexpr static size_t page_mask = page_size - 1;
86
87 constexpr static size_t align_bits = 6U;
88 constexpr static size_t align_size = 1U << align_bits;
89 constexpr static size_t align_mask = align_size - 1;
90 constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U};
91
92 bool IsValid(VAddr address) {
93 return address < (1ULL << 39);
94 }
95
96 template <typename T>
97 T CreateMask(size_t top_bit, size_t minor_bit) {
98 T mask = ~T(0);
99 mask <<= (sizeof(T) * 8 - top_bit);
100 mask >>= (sizeof(T) * 8 - top_bit);
101 mask >>= minor_bit;
102 mask <<= minor_bit;
103 return mask;
104 }
105
106 TransformAddress BuildTransform(VAddr address, size_t size) {
107 const size_t minor_address = address & page_mask;
108 const size_t minor_bit = minor_address >> align_bits;
109 const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
110 TransformAddress result{};
111 result.address = static_cast<u32>(address >> page_bits);
112 result.mask = CreateMask<u32>(top_bit, minor_bit);
113 return result;
114 }
115
116 std::atomic<TransformAddress> current{};
117 std::mutex guard;
118 std::vector<TransformAddress> back_buffer;
119 std::vector<TransformAddress> front_buffer;
120};
121
122} // namespace Core
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 514ba0d66..257406f09 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,6 +3,7 @@
3 3
4#include <algorithm> 4#include <algorithm>
5#include <cstring> 5#include <cstring>
6#include <span>
6 7
7#include "common/assert.h" 8#include "common/assert.h"
8#include "common/atomic_ops.h" 9#include "common/atomic_ops.h"
@@ -13,6 +14,7 @@
13#include "common/swap.h" 14#include "common/swap.h"
14#include "core/core.h" 15#include "core/core.h"
15#include "core/device_memory.h" 16#include "core/device_memory.h"
17#include "core/gpu_dirty_memory_manager.h"
16#include "core/hardware_properties.h" 18#include "core/hardware_properties.h"
17#include "core/hle/kernel/k_page_table.h" 19#include "core/hle/kernel/k_page_table.h"
18#include "core/hle/kernel/k_process.h" 20#include "core/hle/kernel/k_process.h"
@@ -678,7 +680,7 @@ struct Memory::Impl {
678 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, 680 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
679 GetInteger(vaddr), static_cast<u64>(data)); 681 GetInteger(vaddr), static_cast<u64>(data));
680 }, 682 },
681 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); 683 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
682 if (ptr) { 684 if (ptr) {
683 std::memcpy(ptr, &data, sizeof(T)); 685 std::memcpy(ptr, &data, sizeof(T));
684 } 686 }
@@ -692,7 +694,7 @@ struct Memory::Impl {
692 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", 694 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
693 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); 695 sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
694 }, 696 },
695 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); 697 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
696 if (ptr) { 698 if (ptr) {
697 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); 699 const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
698 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 700 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -707,7 +709,7 @@ struct Memory::Impl {
707 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", 709 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
708 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); 710 GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
709 }, 711 },
710 [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); 712 [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
711 if (ptr) { 713 if (ptr) {
712 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); 714 const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
713 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); 715 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -717,7 +719,7 @@ struct Memory::Impl {
717 719
718 void HandleRasterizerDownload(VAddr address, size_t size) { 720 void HandleRasterizerDownload(VAddr address, size_t size) {
719 const size_t core = system.GetCurrentHostThreadID(); 721 const size_t core = system.GetCurrentHostThreadID();
720 auto& current_area = rasterizer_areas[core]; 722 auto& current_area = rasterizer_read_areas[core];
721 const VAddr end_address = address + size; 723 const VAddr end_address = address + size;
722 if (current_area.start_address <= address && end_address <= current_area.end_address) 724 if (current_area.start_address <= address && end_address <= current_area.end_address)
723 [[likely]] { 725 [[likely]] {
@@ -726,9 +728,31 @@ struct Memory::Impl {
726 current_area = system.GPU().OnCPURead(address, size); 728 current_area = system.GPU().OnCPURead(address, size);
727 } 729 }
728 730
729 Common::PageTable* current_page_table = nullptr; 731 void HandleRasterizerWrite(VAddr address, size_t size) {
730 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; 732 const size_t core = system.GetCurrentHostThreadID();
733 auto& current_area = rasterizer_write_areas[core];
734 VAddr subaddress = address >> YUZU_PAGEBITS;
735 bool do_collection = current_area.last_address == subaddress;
736 if (!do_collection) [[unlikely]] {
737 do_collection = system.GPU().OnCPUWrite(address, size);
738 if (!do_collection) {
739 return;
740 }
741 current_area.last_address = subaddress;
742 }
743 gpu_dirty_managers[core].Collect(address, size);
744 }
745
746 struct GPUDirtyState {
747 VAddr last_address;
748 };
749
731 Core::System& system; 750 Core::System& system;
751 Common::PageTable* current_page_table = nullptr;
752 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
753 rasterizer_read_areas{};
754 std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
755 std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
732}; 756};
733 757
734Memory::Memory(Core::System& system_) : system{system_} { 758Memory::Memory(Core::System& system_) : system{system_} {
@@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
876 impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); 900 impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
877} 901}
878 902
903void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
904 impl->gpu_dirty_managers = managers;
905}
906
879Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { 907Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
880 return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); 908 return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
881} 909}
diff --git a/src/core/memory.h b/src/core/memory.h
index 72a0be813..ea01824f8 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,6 +5,7 @@
5 5
6#include <cstddef> 6#include <cstddef>
7#include <memory> 7#include <memory>
8#include <span>
8#include <string> 9#include <string>
9#include "common/typed_address.h" 10#include "common/typed_address.h"
10#include "core/hle/result.h" 11#include "core/hle/result.h"
@@ -15,7 +16,8 @@ struct PageTable;
15 16
16namespace Core { 17namespace Core {
17class System; 18class System;
18} 19class GPUDirtyMemoryManager;
20} // namespace Core
19 21
20namespace Kernel { 22namespace Kernel {
21class PhysicalMemory; 23class PhysicalMemory;
@@ -458,6 +460,8 @@ public:
458 */ 460 */
459 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 461 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
460 462
463 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
464
461private: 465private:
462 Core::System& system; 466 Core::System& system;
463 467
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 58a45ab67..b5ed3380f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
115 115
116template <class P> 116template <class P>
117void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 117void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
118 memory_tracker.CachedCpuWrite(cpu_addr, size); 118 const bool is_dirty = IsRegionRegistered(cpu_addr, size);
119 if (!is_dirty) {
120 return;
121 }
122 VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE);
123 VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE);
124 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
125 WriteMemory(cpu_addr, size);
126 return;
127 }
128
129 tmp_buffer.resize_destructive(size);
130 cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size);
131
132 InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
133}
134
135template <class P>
136bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
137 const bool is_dirty = IsRegionRegistered(cpu_addr, size);
138 if (!is_dirty) {
139 return false;
140 }
141 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
142 return true;
143 }
144 WriteMemory(cpu_addr, size);
145 return false;
119} 146}
120 147
121template <class P> 148template <class P>
@@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1553 return false; 1580 return false;
1554 } 1581 }
1555 1582
1583 InlineMemoryImplementation(dest_address, copy_size, inlined_buffer);
1584
1585 return true;
1586}
1587
1588template <class P>
1589void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
1590 std::span<const u8> inlined_buffer) {
1556 const IntervalType subtract_interval{dest_address, dest_address + copy_size}; 1591 const IntervalType subtract_interval{dest_address, dest_address + copy_size};
1557 ClearDownload(subtract_interval); 1592 ClearDownload(subtract_interval);
1558 common_ranges.subtract(subtract_interval); 1593 common_ranges.subtract(subtract_interval);
@@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1574 } else { 1609 } else {
1575 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); 1610 buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
1576 } 1611 }
1577
1578 return true;
1579} 1612}
1580 1613
1581template <class P> 1614template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index fe6068cfe..460fc7551 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -245,6 +245,8 @@ public:
245 245
246 void CachedWriteMemory(VAddr cpu_addr, u64 size); 246 void CachedWriteMemory(VAddr cpu_addr, u64 size);
247 247
248 bool OnCPUWrite(VAddr cpu_addr, u64 size);
249
248 void DownloadMemory(VAddr cpu_addr, u64 size); 250 void DownloadMemory(VAddr cpu_addr, u64 size);
249 251
250 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); 252 std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
@@ -543,6 +545,9 @@ private:
543 545
544 void ClearDownload(IntervalType subtract_interval); 546 void ClearDownload(IntervalType subtract_interval);
545 547
548 void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
549 std::span<const u8> inlined_buffer);
550
546 VideoCore::RasterizerInterface& rasterizer; 551 VideoCore::RasterizerInterface& rasterizer;
547 Core::Memory::Memory& cpu_memory; 552 Core::Memory::Memory& cpu_memory;
548 553
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 35d699bbf..ab20ff30f 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -69,7 +69,6 @@ public:
69 } 69 }
70 70
71 void SignalFence(std::function<void()>&& func) { 71 void SignalFence(std::function<void()>&& func) {
72 rasterizer.InvalidateGPUCache();
73 bool delay_fence = Settings::IsGPULevelHigh(); 72 bool delay_fence = Settings::IsGPULevelHigh();
74 if constexpr (!can_async_check) { 73 if constexpr (!can_async_check) {
75 TryReleasePendingFences<false>(); 74 TryReleasePendingFences<false>();
@@ -96,6 +95,7 @@ public:
96 guard.unlock(); 95 guard.unlock();
97 cv.notify_all(); 96 cv.notify_all();
98 } 97 }
98 rasterizer.InvalidateGPUCache();
99 } 99 }
100 100
101 void SignalSyncPoint(u32 value) { 101 void SignalSyncPoint(u32 value) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index db385076d..c192e33b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,7 +95,9 @@ struct GPU::Impl {
95 95
96 /// Synchronizes CPU writes with Host GPU memory. 96 /// Synchronizes CPU writes with Host GPU memory.
97 void InvalidateGPUCache() { 97 void InvalidateGPUCache() {
98 rasterizer->InvalidateGPUCache(); 98 std::function<void(VAddr, size_t)> callback_writes(
99 [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
100 system.GatherGPUDirtyMemory(callback_writes);
99 } 101 }
100 102
101 /// Signal the ending of command list. 103 /// Signal the ending of command list.
@@ -299,6 +301,10 @@ struct GPU::Impl {
299 gpu_thread.InvalidateRegion(addr, size); 301 gpu_thread.InvalidateRegion(addr, size);
300 } 302 }
301 303
304 bool OnCPUWrite(VAddr addr, u64 size) {
305 return rasterizer->OnCPUWrite(addr, size);
306 }
307
302 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 308 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
303 void FlushAndInvalidateRegion(VAddr addr, u64 size) { 309 void FlushAndInvalidateRegion(VAddr addr, u64 size) {
304 gpu_thread.FlushAndInvalidateRegion(addr, size); 310 gpu_thread.FlushAndInvalidateRegion(addr, size);
@@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
561 impl->InvalidateRegion(addr, size); 567 impl->InvalidateRegion(addr, size);
562} 568}
563 569
570bool GPU::OnCPUWrite(VAddr addr, u64 size) {
571 return impl->OnCPUWrite(addr, size);
572}
573
564void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { 574void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
565 impl->FlushAndInvalidateRegion(addr, size); 575 impl->FlushAndInvalidateRegion(addr, size);
566} 576}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e49c40cf2..ba2838b89 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -250,6 +250,10 @@ public:
250 /// Notify rasterizer that any caches of the specified region should be invalidated 250 /// Notify rasterizer that any caches of the specified region should be invalidated
251 void InvalidateRegion(VAddr addr, u64 size); 251 void InvalidateRegion(VAddr addr, u64 size);
252 252
253 /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
254 /// sensible, false otherwise
255 bool OnCPUWrite(VAddr addr, u64 size);
256
253 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 257 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
254 void FlushAndInvalidateRegion(VAddr addr, u64 size); 258 void FlushAndInvalidateRegion(VAddr addr, u64 size);
255 259
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 889144f38..2f0f9f593 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
47 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { 47 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
48 rasterizer->FlushRegion(flush->addr, flush->size); 48 rasterizer->FlushRegion(flush->addr, flush->size);
49 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { 49 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
50 rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); 50 rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
51 } else { 51 } else {
52 ASSERT(false); 52 ASSERT(false);
53 } 53 }
@@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
102} 102}
103 103
104void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 104void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
105 rasterizer->OnCPUWrite(addr, size); 105 rasterizer->OnCacheInvalidation(addr, size);
106} 106}
107 107
108void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 108void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 109 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
110 rasterizer->OnCPUWrite(addr, size); 110 rasterizer->OnCacheInvalidation(addr, size);
111} 111}
112 112
113u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { 113u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 7566a8c4e..cb8029a4f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -109,7 +109,9 @@ public:
109 } 109 }
110 110
111 /// Notify rasterizer that any caches of the specified region are desync with guest 111 /// Notify rasterizer that any caches of the specified region are desync with guest
112 virtual void OnCPUWrite(VAddr addr, u64 size) = 0; 112 virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
113
114 virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
113 115
114 /// Sync memory between guest and host. 116 /// Sync memory between guest and host.
115 virtual void InvalidateGPUCache() = 0; 117 virtual void InvalidateGPUCache() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index bf2ce4c49..92ecf6682 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
47 return false; 47 return false;
48} 48}
49void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 49void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
50void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} 50bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
51 return false;
52}
53void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
51VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { 54VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
52 VideoCore::RasterizerDownloadArea new_area{ 55 VideoCore::RasterizerDownloadArea new_area{
53 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), 56 .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index a8d35d2c1..93b9a6971 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -53,7 +53,8 @@ public:
53 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 53 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
54 void InvalidateRegion(VAddr addr, u64 size, 54 void InvalidateRegion(VAddr addr, u64 size,
55 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 55 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
56 void OnCPUWrite(VAddr addr, u64 size) override; 56 void OnCacheInvalidation(VAddr addr, u64 size) override;
57 bool OnCPUWrite(VAddr addr, u64 size) override;
57 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 58 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
58 void InvalidateGPUCache() override; 59 void InvalidateGPUCache() override;
59 void UnmapMemory(VAddr addr, u64 size) override; 60 void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index edf527f2d..aadd6967c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
485 } 485 }
486} 486}
487 487
488void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { 488bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
489 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
490 if (addr == 0 || size == 0) {
491 return false;
492 }
493
494 {
495 std::scoped_lock lock{buffer_cache.mutex};
496 if (buffer_cache.OnCPUWrite(addr, size)) {
497 return true;
498 }
499 }
500
501 {
502 std::scoped_lock lock{texture_cache.mutex};
503 texture_cache.WriteMemory(addr, size);
504 }
505
506 shader_cache.InvalidateRegion(addr, size);
507 return false;
508}
509
510void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
489 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 511 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
490 if (addr == 0 || size == 0) { 512 if (addr == 0 || size == 0) {
491 return; 513 return;
492 } 514 }
493 shader_cache.OnCPUWrite(addr, size);
494 { 515 {
495 std::scoped_lock lock{texture_cache.mutex}; 516 std::scoped_lock lock{texture_cache.mutex};
496 texture_cache.WriteMemory(addr, size); 517 texture_cache.WriteMemory(addr, size);
@@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
499 std::scoped_lock lock{buffer_cache.mutex}; 520 std::scoped_lock lock{buffer_cache.mutex};
500 buffer_cache.CachedWriteMemory(addr, size); 521 buffer_cache.CachedWriteMemory(addr, size);
501 } 522 }
523 shader_cache.InvalidateRegion(addr, size);
502} 524}
503 525
504void RasterizerOpenGL::InvalidateGPUCache() { 526void RasterizerOpenGL::InvalidateGPUCache() {
505 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 527 gpu.InvalidateGPUCache();
506 shader_cache.SyncGuestHost();
507 {
508 std::scoped_lock lock{buffer_cache.mutex};
509 buffer_cache.FlushCachedWrites();
510 }
511} 528}
512 529
513void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { 530void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
@@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
519 std::scoped_lock lock{buffer_cache.mutex}; 536 std::scoped_lock lock{buffer_cache.mutex};
520 buffer_cache.WriteMemory(addr, size); 537 buffer_cache.WriteMemory(addr, size);
521 } 538 }
522 shader_cache.OnCPUWrite(addr, size); 539 shader_cache.OnCacheInvalidation(addr, size);
523} 540}
524 541
525void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { 542void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a73ad15c1..8eda2ddba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -98,7 +98,8 @@ public:
98 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; 98 VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
99 void InvalidateRegion(VAddr addr, u64 size, 99 void InvalidateRegion(VAddr addr, u64 size,
100 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 100 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
101 void OnCPUWrite(VAddr addr, u64 size) override; 101 void OnCacheInvalidation(VAddr addr, u64 size) override;
102 bool OnCPUWrite(VAddr addr, u64 size) override;
102 void InvalidateGPUCache() override; 103 void InvalidateGPUCache() override;
103 void UnmapMemory(VAddr addr, u64 size) override; 104 void UnmapMemory(VAddr addr, u64 size) override;
104 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 105 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f7c0d939a..456bb040e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
566 } 566 }
567} 567}
568 568
569void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 569bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
570 if (addr == 0 || size == 0) {
571 return false;
572 }
573
574 {
575 std::scoped_lock lock{buffer_cache.mutex};
576 if (buffer_cache.OnCPUWrite(addr, size)) {
577 return true;
578 }
579 }
580
581 {
582 std::scoped_lock lock{texture_cache.mutex};
583 texture_cache.WriteMemory(addr, size);
584 }
585
586 pipeline_cache.InvalidateRegion(addr, size);
587 return false;
588}
589
590void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
570 if (addr == 0 || size == 0) { 591 if (addr == 0 || size == 0) {
571 return; 592 return;
572 } 593 }
573 pipeline_cache.OnCPUWrite(addr, size); 594
574 { 595 {
575 std::scoped_lock lock{texture_cache.mutex}; 596 std::scoped_lock lock{texture_cache.mutex};
576 texture_cache.WriteMemory(addr, size); 597 texture_cache.WriteMemory(addr, size);
@@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
579 std::scoped_lock lock{buffer_cache.mutex}; 600 std::scoped_lock lock{buffer_cache.mutex};
580 buffer_cache.CachedWriteMemory(addr, size); 601 buffer_cache.CachedWriteMemory(addr, size);
581 } 602 }
603 pipeline_cache.InvalidateRegion(addr, size);
582} 604}
583 605
584void RasterizerVulkan::InvalidateGPUCache() { 606void RasterizerVulkan::InvalidateGPUCache() {
585 pipeline_cache.SyncGuestHost(); 607 gpu.InvalidateGPUCache();
586 {
587 std::scoped_lock lock{buffer_cache.mutex};
588 buffer_cache.FlushCachedWrites();
589 }
590} 608}
591 609
592void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { 610void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
@@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
598 std::scoped_lock lock{buffer_cache.mutex}; 616 std::scoped_lock lock{buffer_cache.mutex};
599 buffer_cache.WriteMemory(addr, size); 617 buffer_cache.WriteMemory(addr, size);
600 } 618 }
601 pipeline_cache.OnCPUWrite(addr, size); 619 pipeline_cache.OnCacheInvalidation(addr, size);
602} 620}
603 621
604void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { 622void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b39710b3c..73257d964 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,7 +96,8 @@ public:
96 void InvalidateRegion(VAddr addr, u64 size, 96 void InvalidateRegion(VAddr addr, u64 size,
97 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 97 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
98 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; 98 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
99 void OnCPUWrite(VAddr addr, u64 size) override; 99 void OnCacheInvalidation(VAddr addr, u64 size) override;
100 bool OnCPUWrite(VAddr addr, u64 size) override;
100 void InvalidateGPUCache() override; 101 void InvalidateGPUCache() override;
101 void UnmapMemory(VAddr addr, u64 size) override; 102 void UnmapMemory(VAddr addr, u64 size) override;
102 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; 103 void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 4db948b6d..01701201d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
24 RemovePendingShaders(); 24 RemovePendingShaders();
25} 25}
26 26
27void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { 27void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
28 std::scoped_lock lock{invalidation_mutex}; 28 std::scoped_lock lock{invalidation_mutex};
29 InvalidatePagesInRegion(addr, size); 29 InvalidatePagesInRegion(addr, size);
30} 30}
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f3cc4c70b..de8e08002 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -62,7 +62,7 @@ public:
62 /// @brief Unmarks a memory region as cached and marks it for removal 62 /// @brief Unmarks a memory region as cached and marks it for removal
63 /// @param addr Start address of the CPU write operation 63 /// @param addr Start address of the CPU write operation
64 /// @param size Number of bytes of the CPU write operation 64 /// @param size Number of bytes of the CPU write operation
65 void OnCPUWrite(VAddr addr, size_t size); 65 void OnCacheInvalidation(VAddr addr, size_t size);
66 66
67 /// @brief Flushes delayed removal operations 67 /// @brief Flushes delayed removal operations
68 void SyncGuestHost(); 68 void SyncGuestHost();