diff options
| author | 2022-03-25 12:09:40 -0400 | |
|---|---|---|
| committer | 2022-03-25 12:09:40 -0400 | |
| commit | da46d924e9953a6934c43e33e068023b405ecbcb (patch) | |
| tree | 2a5ad33409771c951997186c5665b60a96fa4205 /src | |
| parent | Merge pull request #7720 from FernandoS27/yfc-gc (diff) | |
| parent | Memory: Don't protect reads on Normal accuracy. (diff) | |
| download | yuzu-da46d924e9953a6934c43e33e068023b405ecbcb.tar.gz yuzu-da46d924e9953a6934c43e33e068023b405ecbcb.tar.xz yuzu-da46d924e9953a6934c43e33e068023b405ecbcb.zip | |
Merge pull request #8080 from FernandoS27/yo-momma-so-fat-that
Memory GPU <-> CPU: reduce infighting in the texture cache by adding CPU Cached memory.
Diffstat (limited to '')
| -rw-r--r-- | src/core/memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 41 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 11 |
6 files changed, 65 insertions, 4 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 28d30eee2..3fed51400 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -322,7 +322,7 @@ struct Memory::Impl { | |||
| 322 | } | 322 | } |
| 323 | 323 | ||
| 324 | if (Settings::IsFastmemEnabled()) { | 324 | if (Settings::IsFastmemEnabled()) { |
| 325 | const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; | 325 | const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; |
| 326 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | 326 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); |
| 327 | } | 327 | } |
| 328 | 328 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4d632d211..7e06d0069 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -352,7 +352,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 352 | shader_cache.OnCPUWrite(addr, size); | 352 | shader_cache.OnCPUWrite(addr, size); |
| 353 | { | 353 | { |
| 354 | std::scoped_lock lock{texture_cache.mutex}; | 354 | std::scoped_lock lock{texture_cache.mutex}; |
| 355 | texture_cache.WriteMemory(addr, size); | 355 | texture_cache.CachedWriteMemory(addr, size); |
| 356 | } | 356 | } |
| 357 | { | 357 | { |
| 358 | std::scoped_lock lock{buffer_cache.mutex}; | 358 | std::scoped_lock lock{buffer_cache.mutex}; |
| @@ -364,6 +364,10 @@ void RasterizerOpenGL::SyncGuestHost() { | |||
| 364 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 364 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 365 | shader_cache.SyncGuestHost(); | 365 | shader_cache.SyncGuestHost(); |
| 366 | { | 366 | { |
| 367 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 368 | texture_cache.FlushCachedWrites(); | ||
| 369 | } | ||
| 370 | { | ||
| 367 | std::scoped_lock lock{buffer_cache.mutex}; | 371 | std::scoped_lock lock{buffer_cache.mutex}; |
| 368 | buffer_cache.FlushCachedWrites(); | 372 | buffer_cache.FlushCachedWrites(); |
| 369 | } | 373 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa87d37f8..dd6e0027e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -408,7 +408,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 408 | pipeline_cache.OnCPUWrite(addr, size); | 408 | pipeline_cache.OnCPUWrite(addr, size); |
| 409 | { | 409 | { |
| 410 | std::scoped_lock lock{texture_cache.mutex}; | 410 | std::scoped_lock lock{texture_cache.mutex}; |
| 411 | texture_cache.WriteMemory(addr, size); | 411 | texture_cache.CachedWriteMemory(addr, size); |
| 412 | } | 412 | } |
| 413 | { | 413 | { |
| 414 | std::scoped_lock lock{buffer_cache.mutex}; | 414 | std::scoped_lock lock{buffer_cache.mutex}; |
| @@ -419,6 +419,10 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 419 | void RasterizerVulkan::SyncGuestHost() { | 419 | void RasterizerVulkan::SyncGuestHost() { |
| 420 | pipeline_cache.SyncGuestHost(); | 420 | pipeline_cache.SyncGuestHost(); |
| 421 | { | 421 | { |
| 422 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 423 | texture_cache.FlushCachedWrites(); | ||
| 424 | } | ||
| 425 | { | ||
| 422 | std::scoped_lock lock{buffer_cache.mutex}; | 426 | std::scoped_lock lock{buffer_cache.mutex}; |
| 423 | buffer_cache.FlushCachedWrites(); | 427 | buffer_cache.FlushCachedWrites(); |
| 424 | } | 428 | } |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index dd0106432..cc7999027 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -39,6 +39,9 @@ enum class ImageFlagBits : u32 { | |||
| 39 | Rescaled = 1 << 13, | 39 | Rescaled = 1 << 13, |
| 40 | CheckingRescalable = 1 << 14, | 40 | CheckingRescalable = 1 << 14, |
| 41 | IsRescalable = 1 << 15, | 41 | IsRescalable = 1 << 15, |
| 42 | |||
| 43 | // Cached CPU | ||
| 44 | CachedCpuModified = 1 << 16, ///< Contents have been modified from the CPU | ||
| 42 | }; | 45 | }; |
| 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 46 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 44 | 47 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index efc1c4525..099b2ae1b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -438,6 +438,23 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 438 | } | 438 | } |
| 439 | 439 | ||
| 440 | template <class P> | 440 | template <class P> |
| 441 | void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) { | ||
| 442 | const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE); | ||
| 443 | const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE); | ||
| 444 | ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) { | ||
| 445 | if (True(image.flags & ImageFlagBits::CachedCpuModified)) { | ||
| 446 | return; | ||
| 447 | } | ||
| 448 | image.flags |= ImageFlagBits::CachedCpuModified; | ||
| 449 | cached_cpu_invalidate.insert(image_id); | ||
| 450 | |||
| 451 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 452 | UntrackImage(image, image_id); | ||
| 453 | } | ||
| 454 | }); | ||
| 455 | } | ||
| 456 | |||
| 457 | template <class P> | ||
| 441 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 458 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 442 | std::vector<ImageId> images; | 459 | std::vector<ImageId> images; |
| 443 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | 460 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { |
| @@ -495,6 +512,18 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | |||
| 495 | } | 512 | } |
| 496 | 513 | ||
| 497 | template <class P> | 514 | template <class P> |
| 515 | void TextureCache<P>::FlushCachedWrites() { | ||
| 516 | for (ImageId image_id : cached_cpu_invalidate) { | ||
| 517 | Image& image = slot_images[image_id]; | ||
| 518 | if (True(image.flags & ImageFlagBits::CachedCpuModified)) { | ||
| 519 | image.flags &= ~ImageFlagBits::CachedCpuModified; | ||
| 520 | image.flags |= ImageFlagBits::CpuModified; | ||
| 521 | } | ||
| 522 | } | ||
| 523 | cached_cpu_invalidate.clear(); | ||
| 524 | } | ||
| 525 | |||
| 526 | template <class P> | ||
| 498 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 527 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 499 | const Tegra::Engines::Fermi2D::Surface& src, | 528 | const Tegra::Engines::Fermi2D::Surface& src, |
| 500 | const Tegra::Engines::Fermi2D::Config& copy) { | 529 | const Tegra::Engines::Fermi2D::Config& copy) { |
| @@ -1560,6 +1589,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1560 | template <class P> | 1589 | template <class P> |
| 1561 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | 1590 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { |
| 1562 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 1591 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1592 | if (True(image.flags & ImageFlagBits::CachedCpuModified)) { | ||
| 1593 | return; | ||
| 1594 | } | ||
| 1563 | image.flags |= ImageFlagBits::Tracked; | 1595 | image.flags |= ImageFlagBits::Tracked; |
| 1564 | if (False(image.flags & ImageFlagBits::Sparse)) { | 1596 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1565 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 1597 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| @@ -1616,6 +1648,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { | |||
| 1616 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1648 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1617 | } | 1649 | } |
| 1618 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | 1650 | total_used_memory -= Common::AlignUp(tentative_size, 1024); |
| 1651 | if (True(image.flags & ImageFlagBits::CachedCpuModified)) { | ||
| 1652 | cached_cpu_invalidate.erase(image_id); | ||
| 1653 | } | ||
| 1619 | const GPUVAddr gpu_addr = image.gpu_addr; | 1654 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 1620 | const auto alloc_it = image_allocs_table.find(gpu_addr); | 1655 | const auto alloc_it = image_allocs_table.find(gpu_addr); |
| 1621 | if (alloc_it == image_allocs_table.end()) { | 1656 | if (alloc_it == image_allocs_table.end()) { |
| @@ -1782,7 +1817,11 @@ template <class P> | |||
| 1782 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { | 1817 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { |
| 1783 | Image& image = slot_images[image_id]; | 1818 | Image& image = slot_images[image_id]; |
| 1784 | if (invalidate) { | 1819 | if (invalidate) { |
| 1785 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | 1820 | if (True(image.flags & ImageFlagBits::CachedCpuModified)) { |
| 1821 | cached_cpu_invalidate.erase(image_id); | ||
| 1822 | } | ||
| 1823 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified | | ||
| 1824 | ImageFlagBits::CachedCpuModified); | ||
| 1786 | if (False(image.flags & ImageFlagBits::Tracked)) { | 1825 | if (False(image.flags & ImageFlagBits::Tracked)) { |
| 1787 | TrackImage(image, image_id); | 1826 | TrackImage(image, image_id); |
| 1788 | } | 1827 | } |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index b1324edf3..ad5978a33 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <span> | 8 | #include <span> |
| 9 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | 10 | #include <unordered_map> |
| 11 | #include <unordered_set> | ||
| 11 | #include <vector> | 12 | #include <vector> |
| 12 | #include <queue> | 13 | #include <queue> |
| 13 | 14 | ||
| @@ -50,6 +51,9 @@ class TextureCache { | |||
| 50 | /// Address shift for caching images into a hash table | 51 | /// Address shift for caching images into a hash table |
| 51 | static constexpr u64 PAGE_BITS = 20; | 52 | static constexpr u64 PAGE_BITS = 20; |
| 52 | 53 | ||
| 54 | static constexpr u64 CPU_PAGE_BITS = 12; | ||
| 55 | static constexpr u64 CPU_PAGE_SIZE = 1ULL << CPU_PAGE_BITS; | ||
| 56 | |||
| 53 | /// Enables debugging features to the texture cache | 57 | /// Enables debugging features to the texture cache |
| 54 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | 58 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; |
| 55 | /// Implement blits as copies between framebuffers | 59 | /// Implement blits as copies between framebuffers |
| @@ -136,6 +140,9 @@ public: | |||
| 136 | /// Mark images in a range as modified from the CPU | 140 | /// Mark images in a range as modified from the CPU |
| 137 | void WriteMemory(VAddr cpu_addr, size_t size); | 141 | void WriteMemory(VAddr cpu_addr, size_t size); |
| 138 | 142 | ||
| 143 | /// Mark images in a range as modified from the CPU | ||
| 144 | void CachedWriteMemory(VAddr cpu_addr, size_t size); | ||
| 145 | |||
| 139 | /// Download contents of host images to guest memory in a region | 146 | /// Download contents of host images to guest memory in a region |
| 140 | void DownloadMemory(VAddr cpu_addr, size_t size); | 147 | void DownloadMemory(VAddr cpu_addr, size_t size); |
| 141 | 148 | ||
| @@ -145,6 +152,8 @@ public: | |||
| 145 | /// Remove images in a region | 152 | /// Remove images in a region |
| 146 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | 153 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); |
| 147 | 154 | ||
| 155 | void FlushCachedWrites(); | ||
| 156 | |||
| 148 | /// Blit an image with the given parameters | 157 | /// Blit an image with the given parameters |
| 149 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 158 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 150 | const Tegra::Engines::Fermi2D::Surface& src, | 159 | const Tegra::Engines::Fermi2D::Surface& src, |
| @@ -366,6 +375,8 @@ private: | |||
| 366 | 375 | ||
| 367 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 376 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; |
| 368 | 377 | ||
| 378 | std::unordered_set<ImageId> cached_cpu_invalidate; | ||
| 379 | |||
| 369 | VAddr virtual_invalid_space{}; | 380 | VAddr virtual_invalid_space{}; |
| 370 | 381 | ||
| 371 | bool has_deleted_images = false; | 382 | bool has_deleted_images = false; |