summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Morph2022-03-25 12:09:40 -0400
committerGravatar GitHub2022-03-25 12:09:40 -0400
commitda46d924e9953a6934c43e33e068023b405ecbcb (patch)
tree2a5ad33409771c951997186c5665b60a96fa4205 /src
parentMerge pull request #7720 from FernandoS27/yfc-gc (diff)
parentMemory: Don't protect reads on Normal accuracy. (diff)
downloadyuzu-da46d924e9953a6934c43e33e068023b405ecbcb.tar.gz
yuzu-da46d924e9953a6934c43e33e068023b405ecbcb.tar.xz
yuzu-da46d924e9953a6934c43e33e068023b405ecbcb.zip
Merge pull request #8080 from FernandoS27/yo-momma-so-fat-that
Memory GPU <-> CPU: reduce infighting in the texture cache by adding CPU Cached memory.
Diffstat (limited to '')
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h41
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h11
6 files changed, 65 insertions, 4 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 28d30eee2..3fed51400 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -322,7 +322,7 @@ struct Memory::Impl {
322 } 322 }
323 323
324 if (Settings::IsFastmemEnabled()) { 324 if (Settings::IsFastmemEnabled()) {
325 const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; 325 const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
326 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); 326 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
327 } 327 }
328 328
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4d632d211..7e06d0069 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -352,7 +352,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
352 shader_cache.OnCPUWrite(addr, size); 352 shader_cache.OnCPUWrite(addr, size);
353 { 353 {
354 std::scoped_lock lock{texture_cache.mutex}; 354 std::scoped_lock lock{texture_cache.mutex};
355 texture_cache.WriteMemory(addr, size); 355 texture_cache.CachedWriteMemory(addr, size);
356 } 356 }
357 { 357 {
358 std::scoped_lock lock{buffer_cache.mutex}; 358 std::scoped_lock lock{buffer_cache.mutex};
@@ -364,6 +364,10 @@ void RasterizerOpenGL::SyncGuestHost() {
364 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 364 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
365 shader_cache.SyncGuestHost(); 365 shader_cache.SyncGuestHost();
366 { 366 {
367 std::scoped_lock lock{texture_cache.mutex};
368 texture_cache.FlushCachedWrites();
369 }
370 {
367 std::scoped_lock lock{buffer_cache.mutex}; 371 std::scoped_lock lock{buffer_cache.mutex};
368 buffer_cache.FlushCachedWrites(); 372 buffer_cache.FlushCachedWrites();
369 } 373 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fa87d37f8..dd6e0027e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -408,7 +408,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
408 pipeline_cache.OnCPUWrite(addr, size); 408 pipeline_cache.OnCPUWrite(addr, size);
409 { 409 {
410 std::scoped_lock lock{texture_cache.mutex}; 410 std::scoped_lock lock{texture_cache.mutex};
411 texture_cache.WriteMemory(addr, size); 411 texture_cache.CachedWriteMemory(addr, size);
412 } 412 }
413 { 413 {
414 std::scoped_lock lock{buffer_cache.mutex}; 414 std::scoped_lock lock{buffer_cache.mutex};
@@ -419,6 +419,10 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
419void RasterizerVulkan::SyncGuestHost() { 419void RasterizerVulkan::SyncGuestHost() {
420 pipeline_cache.SyncGuestHost(); 420 pipeline_cache.SyncGuestHost();
421 { 421 {
422 std::scoped_lock lock{texture_cache.mutex};
423 texture_cache.FlushCachedWrites();
424 }
425 {
422 std::scoped_lock lock{buffer_cache.mutex}; 426 std::scoped_lock lock{buffer_cache.mutex};
423 buffer_cache.FlushCachedWrites(); 427 buffer_cache.FlushCachedWrites();
424 } 428 }
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index dd0106432..cc7999027 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -39,6 +39,9 @@ enum class ImageFlagBits : u32 {
39 Rescaled = 1 << 13, 39 Rescaled = 1 << 13,
40 CheckingRescalable = 1 << 14, 40 CheckingRescalable = 1 << 14,
41 IsRescalable = 1 << 15, 41 IsRescalable = 1 << 15,
42
43 // Cached CPU
44 CachedCpuModified = 1 << 16, ///< Contents have been modified from the CPU
42}; 45};
43DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 46DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
44 47
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index efc1c4525..099b2ae1b 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -438,6 +438,23 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
438} 438}
439 439
440template <class P> 440template <class P>
441void TextureCache<P>::CachedWriteMemory(VAddr cpu_addr, size_t size) {
442 const VAddr new_cpu_addr = Common::AlignDown(cpu_addr, CPU_PAGE_SIZE);
443 const size_t new_size = Common::AlignUp(size + cpu_addr - new_cpu_addr, CPU_PAGE_SIZE);
444 ForEachImageInRegion(new_cpu_addr, new_size, [this](ImageId image_id, Image& image) {
445 if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
446 return;
447 }
448 image.flags |= ImageFlagBits::CachedCpuModified;
449 cached_cpu_invalidate.insert(image_id);
450
451 if (True(image.flags & ImageFlagBits::Tracked)) {
452 UntrackImage(image, image_id);
453 }
454 });
455}
456
457template <class P>
441void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 458void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
442 std::vector<ImageId> images; 459 std::vector<ImageId> images;
443 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { 460 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
@@ -495,6 +512,18 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
495} 512}
496 513
497template <class P> 514template <class P>
515void TextureCache<P>::FlushCachedWrites() {
516 for (ImageId image_id : cached_cpu_invalidate) {
517 Image& image = slot_images[image_id];
518 if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
519 image.flags &= ~ImageFlagBits::CachedCpuModified;
520 image.flags |= ImageFlagBits::CpuModified;
521 }
522 }
523 cached_cpu_invalidate.clear();
524}
525
526template <class P>
498void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 527void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
499 const Tegra::Engines::Fermi2D::Surface& src, 528 const Tegra::Engines::Fermi2D::Surface& src,
500 const Tegra::Engines::Fermi2D::Config& copy) { 529 const Tegra::Engines::Fermi2D::Config& copy) {
@@ -1560,6 +1589,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1560template <class P> 1589template <class P>
1561void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { 1590void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1562 ASSERT(False(image.flags & ImageFlagBits::Tracked)); 1591 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1592 if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
1593 return;
1594 }
1563 image.flags |= ImageFlagBits::Tracked; 1595 image.flags |= ImageFlagBits::Tracked;
1564 if (False(image.flags & ImageFlagBits::Sparse)) { 1596 if (False(image.flags & ImageFlagBits::Sparse)) {
1565 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); 1597 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
@@ -1616,6 +1648,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1616 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1648 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1617 } 1649 }
1618 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1650 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1651 if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
1652 cached_cpu_invalidate.erase(image_id);
1653 }
1619 const GPUVAddr gpu_addr = image.gpu_addr; 1654 const GPUVAddr gpu_addr = image.gpu_addr;
1620 const auto alloc_it = image_allocs_table.find(gpu_addr); 1655 const auto alloc_it = image_allocs_table.find(gpu_addr);
1621 if (alloc_it == image_allocs_table.end()) { 1656 if (alloc_it == image_allocs_table.end()) {
@@ -1782,7 +1817,11 @@ template <class P>
1782void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { 1817void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1783 Image& image = slot_images[image_id]; 1818 Image& image = slot_images[image_id];
1784 if (invalidate) { 1819 if (invalidate) {
1785 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); 1820 if (True(image.flags & ImageFlagBits::CachedCpuModified)) {
1821 cached_cpu_invalidate.erase(image_id);
1822 }
1823 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified |
1824 ImageFlagBits::CachedCpuModified);
1786 if (False(image.flags & ImageFlagBits::Tracked)) { 1825 if (False(image.flags & ImageFlagBits::Tracked)) {
1787 TrackImage(image, image_id); 1826 TrackImage(image, image_id);
1788 } 1827 }
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index b1324edf3..ad5978a33 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -8,6 +8,7 @@
8#include <span> 8#include <span>
9#include <type_traits> 9#include <type_traits>
10#include <unordered_map> 10#include <unordered_map>
11#include <unordered_set>
11#include <vector> 12#include <vector>
12#include <queue> 13#include <queue>
13 14
@@ -50,6 +51,9 @@ class TextureCache {
50 /// Address shift for caching images into a hash table 51 /// Address shift for caching images into a hash table
51 static constexpr u64 PAGE_BITS = 20; 52 static constexpr u64 PAGE_BITS = 20;
52 53
54 static constexpr u64 CPU_PAGE_BITS = 12;
55 static constexpr u64 CPU_PAGE_SIZE = 1ULL << CPU_PAGE_BITS;
56
53 /// Enables debugging features to the texture cache 57 /// Enables debugging features to the texture cache
54 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; 58 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
55 /// Implement blits as copies between framebuffers 59 /// Implement blits as copies between framebuffers
@@ -136,6 +140,9 @@ public:
136 /// Mark images in a range as modified from the CPU 140 /// Mark images in a range as modified from the CPU
137 void WriteMemory(VAddr cpu_addr, size_t size); 141 void WriteMemory(VAddr cpu_addr, size_t size);
138 142
143 /// Mark images in a range as modified from the CPU
144 void CachedWriteMemory(VAddr cpu_addr, size_t size);
145
139 /// Download contents of host images to guest memory in a region 146 /// Download contents of host images to guest memory in a region
140 void DownloadMemory(VAddr cpu_addr, size_t size); 147 void DownloadMemory(VAddr cpu_addr, size_t size);
141 148
@@ -145,6 +152,8 @@ public:
145 /// Remove images in a region 152 /// Remove images in a region
146 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); 153 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
147 154
155 void FlushCachedWrites();
156
148 /// Blit an image with the given parameters 157 /// Blit an image with the given parameters
149 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 158 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
150 const Tegra::Engines::Fermi2D::Surface& src, 159 const Tegra::Engines::Fermi2D::Surface& src,
@@ -366,6 +375,8 @@ private:
366 375
367 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 376 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
368 377
378 std::unordered_set<ImageId> cached_cpu_invalidate;
379
369 VAddr virtual_invalid_space{}; 380 VAddr virtual_invalid_space{};
370 381
371 bool has_deleted_images = false; 382 bool has_deleted_images = false;