diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/core.cpp | 32 | ||||
| -rw-r--r-- | src/core/core.h | 11 | ||||
| -rw-r--r-- | src/core/gpu_dirty_memory_manager.h | 122 | ||||
| -rw-r--r-- | src/core/memory.cpp | 40 | ||||
| -rw-r--r-- | src/core/memory.h | 6 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 39 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.h | 2 |
20 files changed, 329 insertions, 41 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index b74fd0a58..9e3eb3795 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "core/file_sys/savedata_factory.h" | 27 | #include "core/file_sys/savedata_factory.h" |
| 28 | #include "core/file_sys/vfs_concat.h" | 28 | #include "core/file_sys/vfs_concat.h" |
| 29 | #include "core/file_sys/vfs_real.h" | 29 | #include "core/file_sys/vfs_real.h" |
| 30 | #include "core/gpu_dirty_memory_manager.h" | ||
| 30 | #include "core/hid/hid_core.h" | 31 | #include "core/hid/hid_core.h" |
| 31 | #include "core/hle/kernel/k_memory_manager.h" | 32 | #include "core/hle/kernel/k_memory_manager.h" |
| 32 | #include "core/hle/kernel/k_process.h" | 33 | #include "core/hle/kernel/k_process.h" |
| @@ -130,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 130 | struct System::Impl { | 131 | struct System::Impl { |
| 131 | explicit Impl(System& system) | 132 | explicit Impl(System& system) |
| 132 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, | 133 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, |
| 133 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} | 134 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, |
| 135 | gpu_dirty_memory_write_manager{} { | ||
| 136 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 137 | } | ||
| 134 | 138 | ||
| 135 | void Initialize(System& system) { | 139 | void Initialize(System& system) { |
| 136 | device_memory = std::make_unique<Core::DeviceMemory>(); | 140 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| @@ -234,6 +238,8 @@ struct System::Impl { | |||
| 234 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). | 238 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). |
| 235 | ReinitializeIfNecessary(system); | 239 | ReinitializeIfNecessary(system); |
| 236 | 240 | ||
| 241 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 242 | |||
| 237 | kernel.Initialize(); | 243 | kernel.Initialize(); |
| 238 | cpu_manager.Initialize(); | 244 | cpu_manager.Initialize(); |
| 239 | 245 | ||
| @@ -540,6 +546,9 @@ struct System::Impl { | |||
| 540 | 546 | ||
| 541 | std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; | 547 | std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; |
| 542 | std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; | 548 | std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_cpu{}; |
| 549 | |||
| 550 | std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> | ||
| 551 | gpu_dirty_memory_write_manager{}; | ||
| 543 | }; | 552 | }; |
| 544 | 553 | ||
| 545 | System::System() : impl{std::make_unique<Impl>(*this)} {} | 554 | System::System() : impl{std::make_unique<Impl>(*this)} {} |
| @@ -629,10 +638,31 @@ void System::PrepareReschedule(const u32 core_index) { | |||
| 629 | impl->kernel.PrepareReschedule(core_index); | 638 | impl->kernel.PrepareReschedule(core_index); |
| 630 | } | 639 | } |
| 631 | 640 | ||
| 641 | Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() { | ||
| 642 | const std::size_t core = impl->kernel.GetCurrentHostThreadID(); | ||
| 643 | return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES | ||
| 644 | ? core | ||
| 645 | : Core::Hardware::NUM_CPU_CORES - 1]; | ||
| 646 | } | ||
| 647 | |||
| 648 | /// Provides a constant reference to the current gou dirty memory manager. | ||
| 649 | const Core::GPUDirtyMemoryManager& System::CurrentGPUDirtyMemoryManager() const { | ||
| 650 | const std::size_t core = impl->kernel.GetCurrentHostThreadID(); | ||
| 651 | return impl->gpu_dirty_memory_write_manager[core < Core::Hardware::NUM_CPU_CORES | ||
| 652 | ? core | ||
| 653 | : Core::Hardware::NUM_CPU_CORES - 1]; | ||
| 654 | } | ||
| 655 | |||
| 632 | size_t System::GetCurrentHostThreadID() const { | 656 | size_t System::GetCurrentHostThreadID() const { |
| 633 | return impl->kernel.GetCurrentHostThreadID(); | 657 | return impl->kernel.GetCurrentHostThreadID(); |
| 634 | } | 658 | } |
| 635 | 659 | ||
| 660 | void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | ||
| 661 | for (auto& manager : impl->gpu_dirty_memory_write_manager) { | ||
| 662 | manager.Gather(callback); | ||
| 663 | } | ||
| 664 | } | ||
| 665 | |||
| 636 | PerfStatsResults System::GetAndResetPerfStats() { | 666 | PerfStatsResults System::GetAndResetPerfStats() { |
| 637 | return impl->GetAndResetPerfStats(); | 667 | return impl->GetAndResetPerfStats(); |
| 638 | } | 668 | } |
diff --git a/src/core/core.h b/src/core/core.h index 93afc9303..14b2f7785 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -108,9 +108,10 @@ class CpuManager; | |||
| 108 | class Debugger; | 108 | class Debugger; |
| 109 | class DeviceMemory; | 109 | class DeviceMemory; |
| 110 | class ExclusiveMonitor; | 110 | class ExclusiveMonitor; |
| 111 | class SpeedLimiter; | 111 | class GPUDirtyMemoryManager; |
| 112 | class PerfStats; | 112 | class PerfStats; |
| 113 | class Reporter; | 113 | class Reporter; |
| 114 | class SpeedLimiter; | ||
| 114 | class TelemetrySession; | 115 | class TelemetrySession; |
| 115 | 116 | ||
| 116 | struct PerfStatsResults; | 117 | struct PerfStatsResults; |
| @@ -225,6 +226,14 @@ public: | |||
| 225 | /// Prepare the core emulation for a reschedule | 226 | /// Prepare the core emulation for a reschedule |
| 226 | void PrepareReschedule(u32 core_index); | 227 | void PrepareReschedule(u32 core_index); |
| 227 | 228 | ||
| 229 | /// Provides a reference to the gou dirty memory manager. | ||
| 230 | [[nodiscard]] Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager(); | ||
| 231 | |||
| 232 | /// Provides a constant reference to the current gou dirty memory manager. | ||
| 233 | [[nodiscard]] const Core::GPUDirtyMemoryManager& CurrentGPUDirtyMemoryManager() const; | ||
| 234 | |||
| 235 | void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | ||
| 236 | |||
| 228 | [[nodiscard]] size_t GetCurrentHostThreadID() const; | 237 | [[nodiscard]] size_t GetCurrentHostThreadID() const; |
| 229 | 238 | ||
| 230 | /// Gets and resets core performance statistics | 239 | /// Gets and resets core performance statistics |
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h new file mode 100644 index 000000000..9687531e8 --- /dev/null +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <atomic> | ||
| 7 | #include <bit> | ||
| 8 | #include <functional> | ||
| 9 | #include <mutex> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "core/memory.h" | ||
| 14 | |||
| 15 | namespace Core { | ||
| 16 | |||
| 17 | class GPUDirtyMemoryManager { | ||
| 18 | public: | ||
| 19 | GPUDirtyMemoryManager() : current{default_transform} { | ||
| 20 | back_buffer.reserve(256); | ||
| 21 | front_buffer.reserve(256); | ||
| 22 | } | ||
| 23 | |||
| 24 | ~GPUDirtyMemoryManager() = default; | ||
| 25 | |||
| 26 | void Collect(VAddr address, size_t size) { | ||
| 27 | TransformAddress t = BuildTransform(address, size); | ||
| 28 | TransformAddress tmp, original; | ||
| 29 | do { | ||
| 30 | tmp = current.load(std::memory_order_acquire); | ||
| 31 | original = tmp; | ||
| 32 | if (tmp.address != t.address) { | ||
| 33 | if (IsValid(tmp.address)) { | ||
| 34 | std::scoped_lock lk(guard); | ||
| 35 | back_buffer.emplace_back(tmp); | ||
| 36 | current.exchange(t, std::memory_order_relaxed); | ||
| 37 | return; | ||
| 38 | } | ||
| 39 | tmp.address = t.address; | ||
| 40 | tmp.mask = 0; | ||
| 41 | } | ||
| 42 | if ((tmp.mask | t.mask) == tmp.mask) { | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | tmp.mask |= t.mask; | ||
| 46 | } while (!current.compare_exchange_weak(original, tmp, std::memory_order_release, | ||
| 47 | std::memory_order_relaxed)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void Gather(std::function<void(VAddr, size_t)>& callback) { | ||
| 51 | { | ||
| 52 | std::scoped_lock lk(guard); | ||
| 53 | TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | ||
| 54 | front_buffer.swap(back_buffer); | ||
| 55 | if (IsValid(t.address)) { | ||
| 56 | front_buffer.emplace_back(t); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | for (auto& transform : front_buffer) { | ||
| 60 | size_t offset = 0; | ||
| 61 | u64 mask = transform.mask; | ||
| 62 | while (mask != 0) { | ||
| 63 | const size_t empty_bits = std::countr_zero(mask); | ||
| 64 | offset += empty_bits << align_bits; | ||
| 65 | mask = mask >> empty_bits; | ||
| 66 | |||
| 67 | const size_t continuous_bits = std::countr_one(mask); | ||
| 68 | callback((static_cast<VAddr>(transform.address) << page_bits) + offset, | ||
| 69 | continuous_bits << align_bits); | ||
| 70 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | ||
| 71 | offset += continuous_bits << align_bits; | ||
| 72 | } | ||
| 73 | } | ||
| 74 | front_buffer.clear(); | ||
| 75 | } | ||
| 76 | |||
| 77 | private: | ||
| 78 | struct alignas(8) TransformAddress { | ||
| 79 | u32 address; | ||
| 80 | u32 mask; | ||
| 81 | }; | ||
| 82 | |||
| 83 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS - 1; | ||
| 84 | constexpr static size_t page_size = 1ULL << page_bits; | ||
| 85 | constexpr static size_t page_mask = page_size - 1; | ||
| 86 | |||
| 87 | constexpr static size_t align_bits = 6U; | ||
| 88 | constexpr static size_t align_size = 1U << align_bits; | ||
| 89 | constexpr static size_t align_mask = align_size - 1; | ||
| 90 | constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | ||
| 91 | |||
| 92 | bool IsValid(VAddr address) { | ||
| 93 | return address < (1ULL << 39); | ||
| 94 | } | ||
| 95 | |||
| 96 | template <typename T> | ||
| 97 | T CreateMask(size_t top_bit, size_t minor_bit) { | ||
| 98 | T mask = ~T(0); | ||
| 99 | mask <<= (sizeof(T) * 8 - top_bit); | ||
| 100 | mask >>= (sizeof(T) * 8 - top_bit); | ||
| 101 | mask >>= minor_bit; | ||
| 102 | mask <<= minor_bit; | ||
| 103 | return mask; | ||
| 104 | } | ||
| 105 | |||
| 106 | TransformAddress BuildTransform(VAddr address, size_t size) { | ||
| 107 | const size_t minor_address = address & page_mask; | ||
| 108 | const size_t minor_bit = minor_address >> align_bits; | ||
| 109 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | ||
| 110 | TransformAddress result{}; | ||
| 111 | result.address = static_cast<u32>(address >> page_bits); | ||
| 112 | result.mask = CreateMask<u32>(top_bit, minor_bit); | ||
| 113 | return result; | ||
| 114 | } | ||
| 115 | |||
| 116 | std::atomic<TransformAddress> current{}; | ||
| 117 | std::mutex guard; | ||
| 118 | std::vector<TransformAddress> back_buffer; | ||
| 119 | std::vector<TransformAddress> front_buffer; | ||
| 120 | }; | ||
| 121 | |||
| 122 | } // namespace Core | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 514ba0d66..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <span> | ||
| 6 | 7 | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/atomic_ops.h" | 9 | #include "common/atomic_ops.h" |
| @@ -13,6 +14,7 @@ | |||
| 13 | #include "common/swap.h" | 14 | #include "common/swap.h" |
| 14 | #include "core/core.h" | 15 | #include "core/core.h" |
| 15 | #include "core/device_memory.h" | 16 | #include "core/device_memory.h" |
| 17 | #include "core/gpu_dirty_memory_manager.h" | ||
| 16 | #include "core/hardware_properties.h" | 18 | #include "core/hardware_properties.h" |
| 17 | #include "core/hle/kernel/k_page_table.h" | 19 | #include "core/hle/kernel/k_page_table.h" |
| 18 | #include "core/hle/kernel/k_process.h" | 20 | #include "core/hle/kernel/k_process.h" |
| @@ -678,7 +680,7 @@ struct Memory::Impl { | |||
| 678 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | 680 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, |
| 679 | GetInteger(vaddr), static_cast<u64>(data)); | 681 | GetInteger(vaddr), static_cast<u64>(data)); |
| 680 | }, | 682 | }, |
| 681 | [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); | 683 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 682 | if (ptr) { | 684 | if (ptr) { |
| 683 | std::memcpy(ptr, &data, sizeof(T)); | 685 | std::memcpy(ptr, &data, sizeof(T)); |
| 684 | } | 686 | } |
| @@ -692,7 +694,7 @@ struct Memory::Impl { | |||
| 692 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | 694 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", |
| 693 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | 695 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); |
| 694 | }, | 696 | }, |
| 695 | [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(T)); }); | 697 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 696 | if (ptr) { | 698 | if (ptr) { |
| 697 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | 699 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); |
| 698 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 700 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -707,7 +709,7 @@ struct Memory::Impl { | |||
| 707 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | 709 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", |
| 708 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | 710 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); |
| 709 | }, | 711 | }, |
| 710 | [&]() { system.GPU().InvalidateRegion(GetInteger(vaddr), sizeof(u128)); }); | 712 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); |
| 711 | if (ptr) { | 713 | if (ptr) { |
| 712 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | 714 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); |
| 713 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 715 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -717,7 +719,7 @@ struct Memory::Impl { | |||
| 717 | 719 | ||
| 718 | void HandleRasterizerDownload(VAddr address, size_t size) { | 720 | void HandleRasterizerDownload(VAddr address, size_t size) { |
| 719 | const size_t core = system.GetCurrentHostThreadID(); | 721 | const size_t core = system.GetCurrentHostThreadID(); |
| 720 | auto& current_area = rasterizer_areas[core]; | 722 | auto& current_area = rasterizer_read_areas[core]; |
| 721 | const VAddr end_address = address + size; | 723 | const VAddr end_address = address + size; |
| 722 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 724 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 723 | [[likely]] { | 725 | [[likely]] { |
| @@ -726,9 +728,31 @@ struct Memory::Impl { | |||
| 726 | current_area = system.GPU().OnCPURead(address, size); | 728 | current_area = system.GPU().OnCPURead(address, size); |
| 727 | } | 729 | } |
| 728 | 730 | ||
| 729 | Common::PageTable* current_page_table = nullptr; | 731 | void HandleRasterizerWrite(VAddr address, size_t size) { |
| 730 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | 732 | const size_t core = system.GetCurrentHostThreadID(); |
| 733 | auto& current_area = rasterizer_write_areas[core]; | ||
| 734 | VAddr subaddress = address >> YUZU_PAGEBITS; | ||
| 735 | bool do_collection = current_area.last_address == subaddress; | ||
| 736 | if (!do_collection) [[unlikely]] { | ||
| 737 | do_collection = system.GPU().OnCPUWrite(address, size); | ||
| 738 | if (!do_collection) { | ||
| 739 | return; | ||
| 740 | } | ||
| 741 | current_area.last_address = subaddress; | ||
| 742 | } | ||
| 743 | gpu_dirty_managers[core].Collect(address, size); | ||
| 744 | } | ||
| 745 | |||
| 746 | struct GPUDirtyState { | ||
| 747 | VAddr last_address; | ||
| 748 | }; | ||
| 749 | |||
| 731 | Core::System& system; | 750 | Core::System& system; |
| 751 | Common::PageTable* current_page_table = nullptr; | ||
| 752 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||
| 753 | rasterizer_read_areas{}; | ||
| 754 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||
| 755 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||
| 732 | }; | 756 | }; |
| 733 | 757 | ||
| 734 | Memory::Memory(Core::System& system_) : system{system_} { | 758 | Memory::Memory(Core::System& system_) : system{system_} { |
| @@ -876,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | |||
| 876 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | 900 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); |
| 877 | } | 901 | } |
| 878 | 902 | ||
| 903 | void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||
| 904 | impl->gpu_dirty_managers = managers; | ||
| 905 | } | ||
| 906 | |||
| 879 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | 907 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { |
| 880 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | 908 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); |
| 881 | } | 909 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 72a0be813..ea01824f8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <span> | ||
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include "common/typed_address.h" | 10 | #include "common/typed_address.h" |
| 10 | #include "core/hle/result.h" | 11 | #include "core/hle/result.h" |
| @@ -15,7 +16,8 @@ struct PageTable; | |||
| 15 | 16 | ||
| 16 | namespace Core { | 17 | namespace Core { |
| 17 | class System; | 18 | class System; |
| 18 | } | 19 | class GPUDirtyMemoryManager; |
| 20 | } // namespace Core | ||
| 19 | 21 | ||
| 20 | namespace Kernel { | 22 | namespace Kernel { |
| 21 | class PhysicalMemory; | 23 | class PhysicalMemory; |
| @@ -458,6 +460,8 @@ public: | |||
| 458 | */ | 460 | */ |
| 459 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | 461 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |
| 460 | 462 | ||
| 463 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||
| 464 | |||
| 461 | private: | 465 | private: |
| 462 | Core::System& system; | 466 | Core::System& system; |
| 463 | 467 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 58a45ab67..b5ed3380f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -115,7 +115,34 @@ void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | |||
| 115 | 115 | ||
| 116 | template <class P> | 116 | template <class P> |
| 117 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 117 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { |
| 118 | memory_tracker.CachedCpuWrite(cpu_addr, size); | 118 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); |
| 119 | if (!is_dirty) { | ||
| 120 | return; | ||
| 121 | } | ||
| 122 | VAddr aligned_start = Common::AlignDown(cpu_addr, YUZU_PAGESIZE); | ||
| 123 | VAddr aligned_end = Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE); | ||
| 124 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||
| 125 | WriteMemory(cpu_addr, size); | ||
| 126 | return; | ||
| 127 | } | ||
| 128 | |||
| 129 | tmp_buffer.resize_destructive(size); | ||
| 130 | cpu_memory.ReadBlockUnsafe(cpu_addr, tmp_buffer.data(), size); | ||
| 131 | |||
| 132 | InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | ||
| 133 | } | ||
| 134 | |||
| 135 | template <class P> | ||
| 136 | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | ||
| 137 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | ||
| 138 | if (!is_dirty) { | ||
| 139 | return false; | ||
| 140 | } | ||
| 141 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | ||
| 142 | return true; | ||
| 143 | } | ||
| 144 | WriteMemory(cpu_addr, size); | ||
| 145 | return false; | ||
| 119 | } | 146 | } |
| 120 | 147 | ||
| 121 | template <class P> | 148 | template <class P> |
| @@ -1553,6 +1580,14 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1553 | return false; | 1580 | return false; |
| 1554 | } | 1581 | } |
| 1555 | 1582 | ||
| 1583 | InlineMemoryImplementation(dest_address, copy_size, inlined_buffer); | ||
| 1584 | |||
| 1585 | return true; | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | template <class P> | ||
| 1589 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||
| 1590 | std::span<const u8> inlined_buffer) { | ||
| 1556 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1591 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |
| 1557 | ClearDownload(subtract_interval); | 1592 | ClearDownload(subtract_interval); |
| 1558 | common_ranges.subtract(subtract_interval); | 1593 | common_ranges.subtract(subtract_interval); |
| @@ -1574,8 +1609,6 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1574 | } else { | 1609 | } else { |
| 1575 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); | 1610 | buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); |
| 1576 | } | 1611 | } |
| 1577 | |||
| 1578 | return true; | ||
| 1579 | } | 1612 | } |
| 1580 | 1613 | ||
| 1581 | template <class P> | 1614 | template <class P> |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index fe6068cfe..460fc7551 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -245,6 +245,8 @@ public: | |||
| 245 | 245 | ||
| 246 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 246 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| 247 | 247 | ||
| 248 | bool OnCPUWrite(VAddr cpu_addr, u64 size); | ||
| 249 | |||
| 248 | void DownloadMemory(VAddr cpu_addr, u64 size); | 250 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 249 | 251 | ||
| 250 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 252 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); |
| @@ -543,6 +545,9 @@ private: | |||
| 543 | 545 | ||
| 544 | void ClearDownload(IntervalType subtract_interval); | 546 | void ClearDownload(IntervalType subtract_interval); |
| 545 | 547 | ||
| 548 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||
| 549 | std::span<const u8> inlined_buffer); | ||
| 550 | |||
| 546 | VideoCore::RasterizerInterface& rasterizer; | 551 | VideoCore::RasterizerInterface& rasterizer; |
| 547 | Core::Memory::Memory& cpu_memory; | 552 | Core::Memory::Memory& cpu_memory; |
| 548 | 553 | ||
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 35d699bbf..ab20ff30f 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -69,7 +69,6 @@ public: | |||
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void SignalFence(std::function<void()>&& func) { | 71 | void SignalFence(std::function<void()>&& func) { |
| 72 | rasterizer.InvalidateGPUCache(); | ||
| 73 | bool delay_fence = Settings::IsGPULevelHigh(); | 72 | bool delay_fence = Settings::IsGPULevelHigh(); |
| 74 | if constexpr (!can_async_check) { | 73 | if constexpr (!can_async_check) { |
| 75 | TryReleasePendingFences<false>(); | 74 | TryReleasePendingFences<false>(); |
| @@ -96,6 +95,7 @@ public: | |||
| 96 | guard.unlock(); | 95 | guard.unlock(); |
| 97 | cv.notify_all(); | 96 | cv.notify_all(); |
| 98 | } | 97 | } |
| 98 | rasterizer.InvalidateGPUCache(); | ||
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | void SignalSyncPoint(u32 value) { | 101 | void SignalSyncPoint(u32 value) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index db385076d..c192e33b2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -95,7 +95,9 @@ struct GPU::Impl { | |||
| 95 | 95 | ||
| 96 | /// Synchronizes CPU writes with Host GPU memory. | 96 | /// Synchronizes CPU writes with Host GPU memory. |
| 97 | void InvalidateGPUCache() { | 97 | void InvalidateGPUCache() { |
| 98 | rasterizer->InvalidateGPUCache(); | 98 | std::function<void(VAddr, size_t)> callback_writes( |
| 99 | [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||
| 100 | system.GatherGPUDirtyMemory(callback_writes); | ||
| 99 | } | 101 | } |
| 100 | 102 | ||
| 101 | /// Signal the ending of command list. | 103 | /// Signal the ending of command list. |
| @@ -299,6 +301,10 @@ struct GPU::Impl { | |||
| 299 | gpu_thread.InvalidateRegion(addr, size); | 301 | gpu_thread.InvalidateRegion(addr, size); |
| 300 | } | 302 | } |
| 301 | 303 | ||
| 304 | bool OnCPUWrite(VAddr addr, u64 size) { | ||
| 305 | return rasterizer->OnCPUWrite(addr, size); | ||
| 306 | } | ||
| 307 | |||
| 302 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 308 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 303 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | 309 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 304 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 310 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| @@ -561,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) { | |||
| 561 | impl->InvalidateRegion(addr, size); | 567 | impl->InvalidateRegion(addr, size); |
| 562 | } | 568 | } |
| 563 | 569 | ||
| 570 | bool GPU::OnCPUWrite(VAddr addr, u64 size) { | ||
| 571 | return impl->OnCPUWrite(addr, size); | ||
| 572 | } | ||
| 573 | |||
| 564 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 574 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 565 | impl->FlushAndInvalidateRegion(addr, size); | 575 | impl->FlushAndInvalidateRegion(addr, size); |
| 566 | } | 576 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e49c40cf2..ba2838b89 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -250,6 +250,10 @@ public: | |||
| 250 | /// Notify rasterizer that any caches of the specified region should be invalidated | 250 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 251 | void InvalidateRegion(VAddr addr, u64 size); | 251 | void InvalidateRegion(VAddr addr, u64 size); |
| 252 | 252 | ||
| 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | ||
| 254 | /// sensible, false otherwise | ||
| 255 | bool OnCPUWrite(VAddr addr, u64 size); | ||
| 256 | |||
| 253 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 254 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 258 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 255 | 259 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 889144f38..2f0f9f593 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, | |||
| 47 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | 47 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { |
| 48 | rasterizer->FlushRegion(flush->addr, flush->size); | 48 | rasterizer->FlushRegion(flush->addr, flush->size); |
| 49 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | 49 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 50 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | 50 | rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); |
| 51 | } else { | 51 | } else { |
| 52 | ASSERT(false); | 52 | ASSERT(false); |
| 53 | } | 53 | } |
| @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() { | |||
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 105 | rasterizer->OnCPUWrite(addr, size); | 105 | rasterizer->OnCacheInvalidation(addr, size); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 110 | rasterizer->OnCPUWrite(addr, size); | 110 | rasterizer->OnCacheInvalidation(addr, size); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | 113 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 7566a8c4e..cb8029a4f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -109,7 +109,9 @@ public: | |||
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /// Notify rasterizer that any caches of the specified region are desync with guest | 111 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 112 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | 112 | virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; |
| 113 | |||
| 114 | virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | ||
| 113 | 115 | ||
| 114 | /// Sync memory between guest and host. | 116 | /// Sync memory between guest and host. |
| 115 | virtual void InvalidateGPUCache() = 0; | 117 | virtual void InvalidateGPUCache() = 0; |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index bf2ce4c49..92ecf6682 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp | |||
| 47 | return false; | 47 | return false; |
| 48 | } | 48 | } |
| 49 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 49 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} |
| 50 | void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} | 50 | bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { |
| 51 | return false; | ||
| 52 | } | ||
| 53 | void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | ||
| 51 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | 54 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { |
| 52 | VideoCore::RasterizerDownloadArea new_area{ | 55 | VideoCore::RasterizerDownloadArea new_area{ |
| 53 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 56 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a8d35d2c1..93b9a6971 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -53,7 +53,8 @@ public: | |||
| 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 54 | void InvalidateRegion(VAddr addr, u64 size, | 54 | void InvalidateRegion(VAddr addr, u64 size, |
| 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 56 | void OnCPUWrite(VAddr addr, u64 size) override; | 56 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 57 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 57 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 58 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |
| 58 | void InvalidateGPUCache() override; | 59 | void InvalidateGPUCache() override; |
| 59 | void UnmapMemory(VAddr addr, u64 size) override; | 60 | void UnmapMemory(VAddr addr, u64 size) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index edf527f2d..aadd6967c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 485 | } | 485 | } |
| 486 | } | 486 | } |
| 487 | 487 | ||
| 488 | void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | 488 | bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { |
| 489 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 490 | if (addr == 0 || size == 0) { | ||
| 491 | return false; | ||
| 492 | } | ||
| 493 | |||
| 494 | { | ||
| 495 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 496 | if (buffer_cache.OnCPUWrite(addr, size)) { | ||
| 497 | return true; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | |||
| 501 | { | ||
| 502 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 503 | texture_cache.WriteMemory(addr, size); | ||
| 504 | } | ||
| 505 | |||
| 506 | shader_cache.InvalidateRegion(addr, size); | ||
| 507 | return false; | ||
| 508 | } | ||
| 509 | |||
| 510 | void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | ||
| 489 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 511 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 490 | if (addr == 0 || size == 0) { | 512 | if (addr == 0 || size == 0) { |
| 491 | return; | 513 | return; |
| 492 | } | 514 | } |
| 493 | shader_cache.OnCPUWrite(addr, size); | ||
| 494 | { | 515 | { |
| 495 | std::scoped_lock lock{texture_cache.mutex}; | 516 | std::scoped_lock lock{texture_cache.mutex}; |
| 496 | texture_cache.WriteMemory(addr, size); | 517 | texture_cache.WriteMemory(addr, size); |
| @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 499 | std::scoped_lock lock{buffer_cache.mutex}; | 520 | std::scoped_lock lock{buffer_cache.mutex}; |
| 500 | buffer_cache.CachedWriteMemory(addr, size); | 521 | buffer_cache.CachedWriteMemory(addr, size); |
| 501 | } | 522 | } |
| 523 | shader_cache.InvalidateRegion(addr, size); | ||
| 502 | } | 524 | } |
| 503 | 525 | ||
| 504 | void RasterizerOpenGL::InvalidateGPUCache() { | 526 | void RasterizerOpenGL::InvalidateGPUCache() { |
| 505 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 527 | gpu.InvalidateGPUCache(); |
| 506 | shader_cache.SyncGuestHost(); | ||
| 507 | { | ||
| 508 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 509 | buffer_cache.FlushCachedWrites(); | ||
| 510 | } | ||
| 511 | } | 528 | } |
| 512 | 529 | ||
| 513 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 530 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { |
| @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | |||
| 519 | std::scoped_lock lock{buffer_cache.mutex}; | 536 | std::scoped_lock lock{buffer_cache.mutex}; |
| 520 | buffer_cache.WriteMemory(addr, size); | 537 | buffer_cache.WriteMemory(addr, size); |
| 521 | } | 538 | } |
| 522 | shader_cache.OnCPUWrite(addr, size); | 539 | shader_cache.OnCacheInvalidation(addr, size); |
| 523 | } | 540 | } |
| 524 | 541 | ||
| 525 | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | 542 | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a73ad15c1..8eda2ddba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -98,7 +98,8 @@ public: | |||
| 98 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 98 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |
| 99 | void InvalidateRegion(VAddr addr, u64 size, | 99 | void InvalidateRegion(VAddr addr, u64 size, |
| 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 101 | void OnCPUWrite(VAddr addr, u64 size) override; | 101 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 102 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 102 | void InvalidateGPUCache() override; | 103 | void InvalidateGPUCache() override; |
| 103 | void UnmapMemory(VAddr addr, u64 size) override; | 104 | void UnmapMemory(VAddr addr, u64 size) override; |
| 104 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f7c0d939a..456bb040e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -566,11 +566,32 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||
| 566 | } | 566 | } |
| 567 | } | 567 | } |
| 568 | 568 | ||
| 569 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 569 | bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { |
| 570 | if (addr == 0 || size == 0) { | ||
| 571 | return false; | ||
| 572 | } | ||
| 573 | |||
| 574 | { | ||
| 575 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 576 | if (buffer_cache.OnCPUWrite(addr, size)) { | ||
| 577 | return true; | ||
| 578 | } | ||
| 579 | } | ||
| 580 | |||
| 581 | { | ||
| 582 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 583 | texture_cache.WriteMemory(addr, size); | ||
| 584 | } | ||
| 585 | |||
| 586 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 587 | return false; | ||
| 588 | } | ||
| 589 | |||
| 590 | void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | ||
| 570 | if (addr == 0 || size == 0) { | 591 | if (addr == 0 || size == 0) { |
| 571 | return; | 592 | return; |
| 572 | } | 593 | } |
| 573 | pipeline_cache.OnCPUWrite(addr, size); | 594 | |
| 574 | { | 595 | { |
| 575 | std::scoped_lock lock{texture_cache.mutex}; | 596 | std::scoped_lock lock{texture_cache.mutex}; |
| 576 | texture_cache.WriteMemory(addr, size); | 597 | texture_cache.WriteMemory(addr, size); |
| @@ -579,14 +600,11 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 579 | std::scoped_lock lock{buffer_cache.mutex}; | 600 | std::scoped_lock lock{buffer_cache.mutex}; |
| 580 | buffer_cache.CachedWriteMemory(addr, size); | 601 | buffer_cache.CachedWriteMemory(addr, size); |
| 581 | } | 602 | } |
| 603 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 582 | } | 604 | } |
| 583 | 605 | ||
| 584 | void RasterizerVulkan::InvalidateGPUCache() { | 606 | void RasterizerVulkan::InvalidateGPUCache() { |
| 585 | pipeline_cache.SyncGuestHost(); | 607 | gpu.InvalidateGPUCache(); |
| 586 | { | ||
| 587 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 588 | buffer_cache.FlushCachedWrites(); | ||
| 589 | } | ||
| 590 | } | 608 | } |
| 591 | 609 | ||
| 592 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 610 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { |
| @@ -598,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | |||
| 598 | std::scoped_lock lock{buffer_cache.mutex}; | 616 | std::scoped_lock lock{buffer_cache.mutex}; |
| 599 | buffer_cache.WriteMemory(addr, size); | 617 | buffer_cache.WriteMemory(addr, size); |
| 600 | } | 618 | } |
| 601 | pipeline_cache.OnCPUWrite(addr, size); | 619 | pipeline_cache.OnCacheInvalidation(addr, size); |
| 602 | } | 620 | } |
| 603 | 621 | ||
| 604 | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | 622 | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b39710b3c..73257d964 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -96,7 +96,8 @@ public: | |||
| 96 | void InvalidateRegion(VAddr addr, u64 size, | 96 | void InvalidateRegion(VAddr addr, u64 size, |
| 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 98 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | 98 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; |
| 99 | void OnCPUWrite(VAddr addr, u64 size) override; | 99 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 100 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 100 | void InvalidateGPUCache() override; | 101 | void InvalidateGPUCache() override; |
| 101 | void UnmapMemory(VAddr addr, u64 size) override; | 102 | void UnmapMemory(VAddr addr, u64 size) override; |
| 102 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 103 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 4db948b6d..01701201d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | |||
| 24 | RemovePendingShaders(); | 24 | RemovePendingShaders(); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | 27 | void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) { |
| 28 | std::scoped_lock lock{invalidation_mutex}; | 28 | std::scoped_lock lock{invalidation_mutex}; |
| 29 | InvalidatePagesInRegion(addr, size); | 29 | InvalidatePagesInRegion(addr, size); |
| 30 | } | 30 | } |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index f3cc4c70b..de8e08002 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -62,7 +62,7 @@ public: | |||
| 62 | /// @brief Unmarks a memory region as cached and marks it for removal | 62 | /// @brief Unmarks a memory region as cached and marks it for removal |
| 63 | /// @param addr Start address of the CPU write operation | 63 | /// @param addr Start address of the CPU write operation |
| 64 | /// @param size Number of bytes of the CPU write operation | 64 | /// @param size Number of bytes of the CPU write operation |
| 65 | void OnCPUWrite(VAddr addr, size_t size); | 65 | void OnCacheInvalidation(VAddr addr, size_t size); |
| 66 | 66 | ||
| 67 | /// @brief Flushes delayed removal operations | 67 | /// @brief Flushes delayed removal operations |
| 68 | void SyncGuestHost(); | 68 | void SyncGuestHost(); |