diff options
| author | 2023-06-28 19:32:50 +0200 | |
|---|---|---|
| committer | 2023-06-28 21:32:45 +0200 | |
| commit | da440da9f54cc860f3c69da685a415d5ec9d7b64 (patch) | |
| tree | 5a7a4a56462244970e1356a723e6a8a77477f820 | |
| parent | MemoryTracking: Initial setup of atomic writes. (diff) | |
| download | yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.gz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.tar.xz yuzu-da440da9f54cc860f3c69da685a415d5ec9d7b64.zip | |
Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
| -rw-r--r-- | src/core/core.cpp | 8 | ||||
| -rw-r--r-- | src/core/gpu_dirty_memory_manager.h | 14 | ||||
| -rw-r--r-- | src/core/memory.cpp | 39 | ||||
| -rw-r--r-- | src/core/memory.h | 6 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 15 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 5 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_null/null_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.h | 2 |
19 files changed, 153 insertions, 38 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index deefeb301..9e3eb3795 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #include "video_core/renderer_base.h" | 55 | #include "video_core/renderer_base.h" |
| 56 | #include "video_core/video_core.h" | 56 | #include "video_core/video_core.h" |
| 57 | 57 | ||
| 58 | |||
| 59 | MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); | 58 | MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); |
| 60 | MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); | 59 | MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); |
| 61 | MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); | 60 | MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); |
| @@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 132 | struct System::Impl { | 131 | struct System::Impl { |
| 133 | explicit Impl(System& system) | 132 | explicit Impl(System& system) |
| 134 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, | 133 | : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, |
| 135 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} | 134 | cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, |
| 135 | gpu_dirty_memory_write_manager{} { | ||
| 136 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 137 | } | ||
| 136 | 138 | ||
| 137 | void Initialize(System& system) { | 139 | void Initialize(System& system) { |
| 138 | device_memory = std::make_unique<Core::DeviceMemory>(); | 140 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| @@ -236,6 +238,8 @@ struct System::Impl { | |||
| 236 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). | 238 | // Setting changes may require a full system reinitialization (e.g., disabling multicore). |
| 237 | ReinitializeIfNecessary(system); | 239 | ReinitializeIfNecessary(system); |
| 238 | 240 | ||
| 241 | memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||
| 242 | |||
| 239 | kernel.Initialize(); | 243 | kernel.Initialize(); |
| 240 | cpu_manager.Initialize(); | 244 | cpu_manager.Initialize(); |
| 241 | 245 | ||
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h index 9c3d41d11..789b7530f 100644 --- a/src/core/gpu_dirty_memory_manager.h +++ b/src/core/gpu_dirty_memory_manager.h | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 1 | #pragma once | 4 | #pragma once |
| 2 | 5 | ||
| 3 | #include <atomic> | 6 | #include <atomic> |
| @@ -59,8 +62,7 @@ public: | |||
| 59 | mask = mask >> empty_bits; | 62 | mask = mask >> empty_bits; |
| 60 | 63 | ||
| 61 | const size_t continuous_bits = std::countr_one(mask); | 64 | const size_t continuous_bits = std::countr_one(mask); |
| 62 | callback((transform.address << Memory::YUZU_PAGEBITS) + offset, | 65 | callback((transform.address << page_bits) + offset, continuous_bits << align_bits); |
| 63 | continuous_bits << align_bits); | ||
| 64 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | 66 | mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |
| 65 | offset += continuous_bits << align_bits; | 67 | offset += continuous_bits << align_bits; |
| 66 | } | 68 | } |
| @@ -74,6 +76,10 @@ private: | |||
| 74 | u64 mask; | 76 | u64 mask; |
| 75 | }; | 77 | }; |
| 76 | 78 | ||
| 79 | constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; | ||
| 80 | constexpr static size_t page_size = 1ULL << page_bits; | ||
| 81 | constexpr static size_t page_mask = page_size - 1; | ||
| 82 | |||
| 77 | constexpr static size_t align_bits = 6U; | 83 | constexpr static size_t align_bits = 6U; |
| 78 | constexpr static size_t align_size = 1U << align_bits; | 84 | constexpr static size_t align_size = 1U << align_bits; |
| 79 | constexpr static size_t align_mask = align_size - 1; | 85 | constexpr static size_t align_mask = align_size - 1; |
| @@ -94,11 +100,11 @@ private: | |||
| 94 | } | 100 | } |
| 95 | 101 | ||
| 96 | TransformAddress BuildTransform(VAddr address, size_t size) { | 102 | TransformAddress BuildTransform(VAddr address, size_t size) { |
| 97 | const size_t minor_address = address & Memory::YUZU_PAGEMASK; | 103 | const size_t minor_address = address & page_mask; |
| 98 | const size_t minor_bit = minor_address >> align_bits; | 104 | const size_t minor_bit = minor_address >> align_bits; |
| 99 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | 105 | const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |
| 100 | TransformAddress result{}; | 106 | TransformAddress result{}; |
| 101 | result.address = address >> Memory::YUZU_PAGEBITS; | 107 | result.address = address >> page_bits; |
| 102 | result.mask = CreateMask<u64>(top_bit, minor_bit); | 108 | result.mask = CreateMask<u64>(top_bit, minor_bit); |
| 103 | return result; | 109 | return result; |
| 104 | } | 110 | } |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 60b246bdd..257406f09 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <span> | ||
| 6 | 7 | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/atomic_ops.h" | 9 | #include "common/atomic_ops.h" |
| @@ -679,7 +680,7 @@ struct Memory::Impl { | |||
| 679 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | 680 | LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, |
| 680 | GetInteger(vaddr), static_cast<u64>(data)); | 681 | GetInteger(vaddr), static_cast<u64>(data)); |
| 681 | }, | 682 | }, |
| 682 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 683 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 683 | if (ptr) { | 684 | if (ptr) { |
| 684 | std::memcpy(ptr, &data, sizeof(T)); | 685 | std::memcpy(ptr, &data, sizeof(T)); |
| 685 | } | 686 | } |
| @@ -693,7 +694,7 @@ struct Memory::Impl { | |||
| 693 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | 694 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", |
| 694 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | 695 | sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); |
| 695 | }, | 696 | }, |
| 696 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); | 697 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); |
| 697 | if (ptr) { | 698 | if (ptr) { |
| 698 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | 699 | const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); |
| 699 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 700 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -708,7 +709,7 @@ struct Memory::Impl { | |||
| 708 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | 709 | LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", |
| 709 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | 710 | GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); |
| 710 | }, | 711 | }, |
| 711 | [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); | 712 | [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); |
| 712 | if (ptr) { | 713 | if (ptr) { |
| 713 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | 714 | const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); |
| 714 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | 715 | return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |
| @@ -718,7 +719,7 @@ struct Memory::Impl { | |||
| 718 | 719 | ||
| 719 | void HandleRasterizerDownload(VAddr address, size_t size) { | 720 | void HandleRasterizerDownload(VAddr address, size_t size) { |
| 720 | const size_t core = system.GetCurrentHostThreadID(); | 721 | const size_t core = system.GetCurrentHostThreadID(); |
| 721 | auto& current_area = rasterizer_areas[core]; | 722 | auto& current_area = rasterizer_read_areas[core]; |
| 722 | const VAddr end_address = address + size; | 723 | const VAddr end_address = address + size; |
| 723 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 724 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 724 | [[likely]] { | 725 | [[likely]] { |
| @@ -727,9 +728,31 @@ struct Memory::Impl { | |||
| 727 | current_area = system.GPU().OnCPURead(address, size); | 728 | current_area = system.GPU().OnCPURead(address, size); |
| 728 | } | 729 | } |
| 729 | 730 | ||
| 730 | Common::PageTable* current_page_table = nullptr; | 731 | void HandleRasterizerWrite(VAddr address, size_t size) { |
| 731 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; | 732 | const size_t core = system.GetCurrentHostThreadID(); |
| 733 | auto& current_area = rasterizer_write_areas[core]; | ||
| 734 | VAddr subaddress = address >> YUZU_PAGEBITS; | ||
| 735 | bool do_collection = current_area.last_address == subaddress; | ||
| 736 | if (!do_collection) [[unlikely]] { | ||
| 737 | do_collection = system.GPU().OnCPUWrite(address, size); | ||
| 738 | if (!do_collection) { | ||
| 739 | return; | ||
| 740 | } | ||
| 741 | current_area.last_address = subaddress; | ||
| 742 | } | ||
| 743 | gpu_dirty_managers[core].Collect(address, size); | ||
| 744 | } | ||
| 745 | |||
| 746 | struct GPUDirtyState { | ||
| 747 | VAddr last_address; | ||
| 748 | }; | ||
| 749 | |||
| 732 | Core::System& system; | 750 | Core::System& system; |
| 751 | Common::PageTable* current_page_table = nullptr; | ||
| 752 | std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||
| 753 | rasterizer_read_areas{}; | ||
| 754 | std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||
| 755 | std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||
| 733 | }; | 756 | }; |
| 734 | 757 | ||
| 735 | Memory::Memory(Core::System& system_) : system{system_} { | 758 | Memory::Memory(Core::System& system_) : system{system_} { |
| @@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | |||
| 877 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | 900 | impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); |
| 878 | } | 901 | } |
| 879 | 902 | ||
| 903 | void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||
| 904 | impl->gpu_dirty_managers = managers; | ||
| 905 | } | ||
| 906 | |||
| 880 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | 907 | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { |
| 881 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | 908 | return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); |
| 882 | } | 909 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 72a0be813..ea01824f8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <span> | ||
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include "common/typed_address.h" | 10 | #include "common/typed_address.h" |
| 10 | #include "core/hle/result.h" | 11 | #include "core/hle/result.h" |
| @@ -15,7 +16,8 @@ struct PageTable; | |||
| 15 | 16 | ||
| 16 | namespace Core { | 17 | namespace Core { |
| 17 | class System; | 18 | class System; |
| 18 | } | 19 | class GPUDirtyMemoryManager; |
| 20 | } // namespace Core | ||
| 19 | 21 | ||
| 20 | namespace Kernel { | 22 | namespace Kernel { |
| 21 | class PhysicalMemory; | 23 | class PhysicalMemory; |
| @@ -458,6 +460,8 @@ public: | |||
| 458 | */ | 460 | */ |
| 459 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | 461 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |
| 460 | 462 | ||
| 463 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||
| 464 | |||
| 461 | private: | 465 | private: |
| 462 | Core::System& system; | 466 | Core::System& system; |
| 463 | 467 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9239ad862..b5ed3380f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -133,6 +133,19 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | template <class P> | 135 | template <class P> |
| 136 | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | ||
| 137 | const bool is_dirty = IsRegionRegistered(cpu_addr, size); | ||
| 138 | if (!is_dirty) { | ||
| 139 | return false; | ||
| 140 | } | ||
| 141 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | ||
| 142 | return true; | ||
| 143 | } | ||
| 144 | WriteMemory(cpu_addr, size); | ||
| 145 | return false; | ||
| 146 | } | ||
| 147 | |||
| 148 | template <class P> | ||
| 136 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | 149 | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, |
| 137 | u64 size) { | 150 | u64 size) { |
| 138 | std::optional<VideoCore::RasterizerDownloadArea> area{}; | 151 | std::optional<VideoCore::RasterizerDownloadArea> area{}; |
| @@ -1574,7 +1587,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1574 | 1587 | ||
| 1575 | template <class P> | 1588 | template <class P> |
| 1576 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | 1589 | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, |
| 1577 | std::span<const u8> inlined_buffer) { | 1590 | std::span<const u8> inlined_buffer) { |
| 1578 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | 1591 | const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |
| 1579 | ClearDownload(subtract_interval); | 1592 | ClearDownload(subtract_interval); |
| 1580 | common_ranges.subtract(subtract_interval); | 1593 | common_ranges.subtract(subtract_interval); |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 4d9bab7f7..460fc7551 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -245,6 +245,8 @@ public: | |||
| 245 | 245 | ||
| 246 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 246 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| 247 | 247 | ||
| 248 | bool OnCPUWrite(VAddr cpu_addr, u64 size); | ||
| 249 | |||
| 248 | void DownloadMemory(VAddr cpu_addr, u64 size); | 250 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 249 | 251 | ||
| 250 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | 252 | std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); |
| @@ -543,7 +545,8 @@ private: | |||
| 543 | 545 | ||
| 544 | void ClearDownload(IntervalType subtract_interval); | 546 | void ClearDownload(IntervalType subtract_interval); |
| 545 | 547 | ||
| 546 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | 548 | void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, |
| 549 | std::span<const u8> inlined_buffer); | ||
| 547 | 550 | ||
| 548 | VideoCore::RasterizerInterface& rasterizer; | 551 | VideoCore::RasterizerInterface& rasterizer; |
| 549 | Core::Memory::Memory& cpu_memory; | 552 | Core::Memory::Memory& cpu_memory; |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 35d699bbf..ab20ff30f 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -69,7 +69,6 @@ public: | |||
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void SignalFence(std::function<void()>&& func) { | 71 | void SignalFence(std::function<void()>&& func) { |
| 72 | rasterizer.InvalidateGPUCache(); | ||
| 73 | bool delay_fence = Settings::IsGPULevelHigh(); | 72 | bool delay_fence = Settings::IsGPULevelHigh(); |
| 74 | if constexpr (!can_async_check) { | 73 | if constexpr (!can_async_check) { |
| 75 | TryReleasePendingFences<false>(); | 74 | TryReleasePendingFences<false>(); |
| @@ -96,6 +95,7 @@ public: | |||
| 96 | guard.unlock(); | 95 | guard.unlock(); |
| 97 | cv.notify_all(); | 96 | cv.notify_all(); |
| 98 | } | 97 | } |
| 98 | rasterizer.InvalidateGPUCache(); | ||
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | void SignalSyncPoint(u32 value) { | 101 | void SignalSyncPoint(u32 value) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index f823a1e2b..c192e33b2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -96,7 +96,7 @@ struct GPU::Impl { | |||
| 96 | /// Synchronizes CPU writes with Host GPU memory. | 96 | /// Synchronizes CPU writes with Host GPU memory. |
| 97 | void InvalidateGPUCache() { | 97 | void InvalidateGPUCache() { |
| 98 | std::function<void(VAddr, size_t)> callback_writes( | 98 | std::function<void(VAddr, size_t)> callback_writes( |
| 99 | [this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); }); | 99 | [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); |
| 100 | system.GatherGPUDirtyMemory(callback_writes); | 100 | system.GatherGPUDirtyMemory(callback_writes); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| @@ -301,6 +301,10 @@ struct GPU::Impl { | |||
| 301 | gpu_thread.InvalidateRegion(addr, size); | 301 | gpu_thread.InvalidateRegion(addr, size); |
| 302 | } | 302 | } |
| 303 | 303 | ||
| 304 | bool OnCPUWrite(VAddr addr, u64 size) { | ||
| 305 | return rasterizer->OnCPUWrite(addr, size); | ||
| 306 | } | ||
| 307 | |||
| 304 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 308 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 305 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | 309 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 306 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 310 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| @@ -563,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) { | |||
| 563 | impl->InvalidateRegion(addr, size); | 567 | impl->InvalidateRegion(addr, size); |
| 564 | } | 568 | } |
| 565 | 569 | ||
| 570 | bool GPU::OnCPUWrite(VAddr addr, u64 size) { | ||
| 571 | return impl->OnCPUWrite(addr, size); | ||
| 572 | } | ||
| 573 | |||
| 566 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 574 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 567 | impl->FlushAndInvalidateRegion(addr, size); | 575 | impl->FlushAndInvalidateRegion(addr, size); |
| 568 | } | 576 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e49c40cf2..ba2838b89 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -250,6 +250,10 @@ public: | |||
| 250 | /// Notify rasterizer that any caches of the specified region should be invalidated | 250 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 251 | void InvalidateRegion(VAddr addr, u64 size); | 251 | void InvalidateRegion(VAddr addr, u64 size); |
| 252 | 252 | ||
| 253 | /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | ||
| 254 | /// sensible, false otherwise | ||
| 255 | bool OnCPUWrite(VAddr addr, u64 size); | ||
| 256 | |||
| 253 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 257 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 254 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 258 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 255 | 259 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 889144f38..2f0f9f593 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, | |||
| 47 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | 47 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { |
| 48 | rasterizer->FlushRegion(flush->addr, flush->size); | 48 | rasterizer->FlushRegion(flush->addr, flush->size); |
| 49 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | 49 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 50 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | 50 | rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); |
| 51 | } else { | 51 | } else { |
| 52 | ASSERT(false); | 52 | ASSERT(false); |
| 53 | } | 53 | } |
| @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() { | |||
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 105 | rasterizer->OnCPUWrite(addr, size); | 105 | rasterizer->OnCacheInvalidation(addr, size); |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 108 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 109 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 110 | rasterizer->OnCPUWrite(addr, size); | 110 | rasterizer->OnCacheInvalidation(addr, size); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | 113 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 7566a8c4e..cb8029a4f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -109,7 +109,9 @@ public: | |||
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /// Notify rasterizer that any caches of the specified region are desync with guest | 111 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 112 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | 112 | virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; |
| 113 | |||
| 114 | virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | ||
| 113 | 115 | ||
| 114 | /// Sync memory between guest and host. | 116 | /// Sync memory between guest and host. |
| 115 | virtual void InvalidateGPUCache() = 0; | 117 | virtual void InvalidateGPUCache() = 0; |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index bf2ce4c49..92ecf6682 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp | |||
| 47 | return false; | 47 | return false; |
| 48 | } | 48 | } |
| 49 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 49 | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} |
| 50 | void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} | 50 | bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { |
| 51 | return false; | ||
| 52 | } | ||
| 53 | void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | ||
| 51 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | 54 | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { |
| 52 | VideoCore::RasterizerDownloadArea new_area{ | 55 | VideoCore::RasterizerDownloadArea new_area{ |
| 53 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | 56 | .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a8d35d2c1..93b9a6971 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -53,7 +53,8 @@ public: | |||
| 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 53 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 54 | void InvalidateRegion(VAddr addr, u64 size, | 54 | void InvalidateRegion(VAddr addr, u64 size, |
| 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 55 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 56 | void OnCPUWrite(VAddr addr, u64 size) override; | 56 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 57 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 57 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 58 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |
| 58 | void InvalidateGPUCache() override; | 59 | void InvalidateGPUCache() override; |
| 59 | void UnmapMemory(VAddr addr, u64 size) override; | 60 | void UnmapMemory(VAddr addr, u64 size) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index edf527f2d..aadd6967c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 485 | } | 485 | } |
| 486 | } | 486 | } |
| 487 | 487 | ||
| 488 | void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | 488 | bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { |
| 489 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 490 | if (addr == 0 || size == 0) { | ||
| 491 | return false; | ||
| 492 | } | ||
| 493 | |||
| 494 | { | ||
| 495 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 496 | if (buffer_cache.OnCPUWrite(addr, size)) { | ||
| 497 | return true; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | |||
| 501 | { | ||
| 502 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 503 | texture_cache.WriteMemory(addr, size); | ||
| 504 | } | ||
| 505 | |||
| 506 | shader_cache.InvalidateRegion(addr, size); | ||
| 507 | return false; | ||
| 508 | } | ||
| 509 | |||
| 510 | void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | ||
| 489 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 511 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 490 | if (addr == 0 || size == 0) { | 512 | if (addr == 0 || size == 0) { |
| 491 | return; | 513 | return; |
| 492 | } | 514 | } |
| 493 | shader_cache.OnCPUWrite(addr, size); | ||
| 494 | { | 515 | { |
| 495 | std::scoped_lock lock{texture_cache.mutex}; | 516 | std::scoped_lock lock{texture_cache.mutex}; |
| 496 | texture_cache.WriteMemory(addr, size); | 517 | texture_cache.WriteMemory(addr, size); |
| @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 499 | std::scoped_lock lock{buffer_cache.mutex}; | 520 | std::scoped_lock lock{buffer_cache.mutex}; |
| 500 | buffer_cache.CachedWriteMemory(addr, size); | 521 | buffer_cache.CachedWriteMemory(addr, size); |
| 501 | } | 522 | } |
| 523 | shader_cache.InvalidateRegion(addr, size); | ||
| 502 | } | 524 | } |
| 503 | 525 | ||
| 504 | void RasterizerOpenGL::InvalidateGPUCache() { | 526 | void RasterizerOpenGL::InvalidateGPUCache() { |
| 505 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 527 | gpu.InvalidateGPUCache(); |
| 506 | shader_cache.SyncGuestHost(); | ||
| 507 | { | ||
| 508 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 509 | buffer_cache.FlushCachedWrites(); | ||
| 510 | } | ||
| 511 | } | 528 | } |
| 512 | 529 | ||
| 513 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 530 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { |
| @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | |||
| 519 | std::scoped_lock lock{buffer_cache.mutex}; | 536 | std::scoped_lock lock{buffer_cache.mutex}; |
| 520 | buffer_cache.WriteMemory(addr, size); | 537 | buffer_cache.WriteMemory(addr, size); |
| 521 | } | 538 | } |
| 522 | shader_cache.OnCPUWrite(addr, size); | 539 | shader_cache.OnCacheInvalidation(addr, size); |
| 523 | } | 540 | } |
| 524 | 541 | ||
| 525 | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | 542 | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a73ad15c1..8eda2ddba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -98,7 +98,8 @@ public: | |||
| 98 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | 98 | VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |
| 99 | void InvalidateRegion(VAddr addr, u64 size, | 99 | void InvalidateRegion(VAddr addr, u64 size, |
| 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 100 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 101 | void OnCPUWrite(VAddr addr, u64 size) override; | 101 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 102 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 102 | void InvalidateGPUCache() override; | 103 | void InvalidateGPUCache() override; |
| 103 | void UnmapMemory(VAddr addr, u64 size) override; | 104 | void UnmapMemory(VAddr addr, u64 size) override; |
| 104 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 105 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a63a29e61..456bb040e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -566,7 +566,28 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||
| 566 | } | 566 | } |
| 567 | } | 567 | } |
| 568 | 568 | ||
| 569 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 569 | bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { |
| 570 | if (addr == 0 || size == 0) { | ||
| 571 | return false; | ||
| 572 | } | ||
| 573 | |||
| 574 | { | ||
| 575 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 576 | if (buffer_cache.OnCPUWrite(addr, size)) { | ||
| 577 | return true; | ||
| 578 | } | ||
| 579 | } | ||
| 580 | |||
| 581 | { | ||
| 582 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 583 | texture_cache.WriteMemory(addr, size); | ||
| 584 | } | ||
| 585 | |||
| 586 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 587 | return false; | ||
| 588 | } | ||
| 589 | |||
| 590 | void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | ||
| 570 | if (addr == 0 || size == 0) { | 591 | if (addr == 0 || size == 0) { |
| 571 | return; | 592 | return; |
| 572 | } | 593 | } |
| @@ -595,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | |||
| 595 | std::scoped_lock lock{buffer_cache.mutex}; | 616 | std::scoped_lock lock{buffer_cache.mutex}; |
| 596 | buffer_cache.WriteMemory(addr, size); | 617 | buffer_cache.WriteMemory(addr, size); |
| 597 | } | 618 | } |
| 598 | pipeline_cache.OnCPUWrite(addr, size); | 619 | pipeline_cache.OnCacheInvalidation(addr, size); |
| 599 | } | 620 | } |
| 600 | 621 | ||
| 601 | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | 622 | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b39710b3c..73257d964 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -96,7 +96,8 @@ public: | |||
| 96 | void InvalidateRegion(VAddr addr, u64 size, | 96 | void InvalidateRegion(VAddr addr, u64 size, |
| 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 98 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | 98 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; |
| 99 | void OnCPUWrite(VAddr addr, u64 size) override; | 99 | void OnCacheInvalidation(VAddr addr, u64 size) override; |
| 100 | bool OnCPUWrite(VAddr addr, u64 size) override; | ||
| 100 | void InvalidateGPUCache() override; | 101 | void InvalidateGPUCache() override; |
| 101 | void UnmapMemory(VAddr addr, u64 size) override; | 102 | void UnmapMemory(VAddr addr, u64 size) override; |
| 102 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 103 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 4db948b6d..01701201d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | |||
| 24 | RemovePendingShaders(); | 24 | RemovePendingShaders(); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | 27 | void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) { |
| 28 | std::scoped_lock lock{invalidation_mutex}; | 28 | std::scoped_lock lock{invalidation_mutex}; |
| 29 | InvalidatePagesInRegion(addr, size); | 29 | InvalidatePagesInRegion(addr, size); |
| 30 | } | 30 | } |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index f3cc4c70b..de8e08002 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -62,7 +62,7 @@ public: | |||
| 62 | /// @brief Unmarks a memory region as cached and marks it for removal | 62 | /// @brief Unmarks a memory region as cached and marks it for removal |
| 63 | /// @param addr Start address of the CPU write operation | 63 | /// @param addr Start address of the CPU write operation |
| 64 | /// @param size Number of bytes of the CPU write operation | 64 | /// @param size Number of bytes of the CPU write operation |
| 65 | void OnCPUWrite(VAddr addr, size_t size); | 65 | void OnCacheInvalidation(VAddr addr, size_t size); |
| 66 | 66 | ||
| 67 | /// @brief Flushes delayed removal operations | 67 | /// @brief Flushes delayed removal operations |
| 68 | void SyncGuestHost(); | 68 | void SyncGuestHost(); |