diff options
| author | 2022-01-16 04:43:06 +0100 | |
|---|---|---|
| committer | 2022-03-25 01:51:51 +0100 | |
| commit | ecb3342145780d811017a3a3c8f14f3e0725db75 (patch) | |
| tree | 2badf5f2b54a90cc3803d63f9f013c6abe1a6a2d /src | |
| parent | Merge pull request #8074 from liamwhite/cached-words (diff) | |
| download | yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.tar.gz yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.tar.xz yuzu-ecb3342145780d811017a3a3c8f14f3e0725db75.zip | |
Garbage Collection: Redesign the algorithm to do a better use of memory.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 15 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 42 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 9 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 52 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 11 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_memory_allocator.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.h | 4 |
13 files changed, 156 insertions, 32 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3c1f79a27..40f52eacb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -484,6 +484,15 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 484 | rescale_read_fbos[i].Create(); | 484 | rescale_read_fbos[i].Create(); |
| 485 | } | 485 | } |
| 486 | } | 486 | } |
| 487 | |||
| 488 | device_access_memory = []() -> u64 { | ||
| 489 | if (GLAD_GL_NVX_gpu_memory_info) { | ||
| 490 | GLint cur_avail_mem_kb = 0; | ||
| 491 | glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, &cur_avail_mem_kb); | ||
| 492 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; | ||
| 493 | } | ||
| 494 | return 2_GiB; // Return minimum requirements | ||
| 495 | }(); | ||
| 487 | } | 496 | } |
| 488 | 497 | ||
| 489 | TextureCacheRuntime::~TextureCacheRuntime() = default; | 498 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| @@ -500,13 +509,13 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | |||
| 500 | return download_buffers.RequestMap(size, false); | 509 | return download_buffers.RequestMap(size, false); |
| 501 | } | 510 | } |
| 502 | 511 | ||
| 503 | u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | 512 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { |
| 504 | if (GLAD_GL_NVX_gpu_memory_info) { | 513 | if (GLAD_GL_NVX_gpu_memory_info) { |
| 505 | GLint cur_avail_mem_kb = 0; | 514 | GLint cur_avail_mem_kb = 0; |
| 506 | glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); | 515 | glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); |
| 507 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; | 516 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; |
| 508 | } | 517 | } |
| 509 | return 2_GiB; // Return minimum requirements | 518 | return 2_GiB; |
| 510 | } | 519 | } |
| 511 | 520 | ||
| 512 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | 521 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, |
| @@ -686,6 +695,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, | |||
| 686 | } | 695 | } |
| 687 | if (IsConverted(runtime->device, info.format, info.type)) { | 696 | if (IsConverted(runtime->device, info.format, info.type)) { |
| 688 | flags |= ImageFlagBits::Converted; | 697 | flags |= ImageFlagBits::Converted; |
| 698 | flags |= ImageFlagBits::GCProtected; | ||
| 689 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 699 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 690 | gl_format = GL_RGBA; | 700 | gl_format = GL_RGBA; |
| 691 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 701 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 7f425631f..feeeb371e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -83,7 +83,15 @@ public: | |||
| 83 | 83 | ||
| 84 | ImageBufferMap DownloadStagingBuffer(size_t size); | 84 | ImageBufferMap DownloadStagingBuffer(size_t size); |
| 85 | 85 | ||
| 86 | u64 GetDeviceLocalMemory() const; | 86 | u64 GetDeviceLocalMemory() const { |
| 87 | return device_access_memory; | ||
| 88 | } | ||
| 89 | |||
| 90 | u64 GetDeviceMemoryUsage() const; | ||
| 91 | |||
| 92 | bool CanReportMemoryUsage() const { | ||
| 93 | return GLAD_GL_NVX_gpu_memory_info; | ||
| 94 | } | ||
| 87 | 95 | ||
| 88 | bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { | 96 | bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { |
| 89 | return true; | 97 | return true; |
| @@ -172,6 +180,7 @@ private: | |||
| 172 | std::array<OGLFramebuffer, 4> rescale_draw_fbos; | 180 | std::array<OGLFramebuffer, 4> rescale_draw_fbos; |
| 173 | std::array<OGLFramebuffer, 4> rescale_read_fbos; | 181 | std::array<OGLFramebuffer, 4> rescale_read_fbos; |
| 174 | const Settings::ResolutionScalingInfo& resolution; | 182 | const Settings::ResolutionScalingInfo& resolution; |
| 183 | u64 device_access_memory; | ||
| 175 | }; | 184 | }; |
| 176 | 185 | ||
| 177 | class Image : public VideoCommon::ImageBase { | 186 | class Image : public VideoCommon::ImageBase { |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 5d5329abf..64a58304b 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -118,7 +118,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 118 | .image = nullptr, | 118 | .image = nullptr, |
| 119 | .buffer = *stream_buffer, | 119 | .buffer = *stream_buffer, |
| 120 | }; | 120 | }; |
| 121 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | 121 | const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; |
| 122 | VkMemoryAllocateInfo stream_memory_info{ | 122 | VkMemoryAllocateInfo stream_memory_info{ |
| 123 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | 123 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| 124 | .pNext = make_dedicated ? &dedicated_info : nullptr, | 124 | .pNext = make_dedicated ? &dedicated_info : nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 83a23b66a..884f501ef 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1189,6 +1189,14 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | |||
| 1189 | return device.GetDeviceLocalMemory(); | 1189 | return device.GetDeviceLocalMemory(); |
| 1190 | } | 1190 | } |
| 1191 | 1191 | ||
| 1192 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { | ||
| 1193 | return device.GetDeviceMemoryUsage(); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | bool TextureCacheRuntime::CanReportMemoryUsage() const { | ||
| 1197 | return device.CanReportMemoryUsage(); | ||
| 1198 | } | ||
| 1199 | |||
| 1192 | void TextureCacheRuntime::TickFrame() {} | 1200 | void TextureCacheRuntime::TickFrame() {} |
| 1193 | 1201 | ||
| 1194 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, | 1202 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| @@ -1203,6 +1211,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu | |||
| 1203 | } else { | 1211 | } else { |
| 1204 | flags |= VideoCommon::ImageFlagBits::Converted; | 1212 | flags |= VideoCommon::ImageFlagBits::Converted; |
| 1205 | } | 1213 | } |
| 1214 | flags |= VideoCommon::ImageFlagBits::GCProtected; | ||
| 1206 | } | 1215 | } |
| 1207 | if (runtime->device.HasDebuggingToolAttached()) { | 1216 | if (runtime->device.HasDebuggingToolAttached()) { |
| 1208 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1217 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index c81130dd2..cb15b4a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -55,6 +55,10 @@ public: | |||
| 55 | 55 | ||
| 56 | u64 GetDeviceLocalMemory() const; | 56 | u64 GetDeviceLocalMemory() const; |
| 57 | 57 | ||
| 58 | u64 GetDeviceMemoryUsage() const; | ||
| 59 | |||
| 60 | bool CanReportMemoryUsage() const; | ||
| 61 | |||
| 58 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 62 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 59 | const Region2D& dst_region, const Region2D& src_region, | 63 | const Region2D& dst_region, const Region2D& src_region, |
| 60 | Tegra::Engines::Fermi2D::Filter filter, | 64 | Tegra::Engines::Fermi2D::Filter filter, |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..279f39269 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -29,15 +29,16 @@ enum class ImageFlagBits : u32 { | |||
| 29 | Sparse = 1 << 9, ///< Image has non continous submemory. | 29 | Sparse = 1 << 9, ///< Image has non continous submemory. |
| 30 | 30 | ||
| 31 | // Garbage Collection Flags | 31 | // Garbage Collection Flags |
| 32 | BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher | 32 | BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher |
| 33 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 35 | ///< collection | 35 | ///< collection |
| 36 | GCProtected = 1 << 12, ///< Protected from low-tier GC as they are costy to load back. | ||
| 36 | 37 | ||
| 37 | // Rescaler | 38 | // Rescaler |
| 38 | Rescaled = 1 << 12, | 39 | Rescaled = 1 << 13, |
| 39 | CheckingRescalable = 1 << 13, | 40 | CheckingRescalable = 1 << 14, |
| 40 | IsRescalable = 1 << 14, | 41 | IsRescalable = 1 << 15, |
| 41 | }; | 42 | }; |
| 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 43 | 44 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..7b6bd8697 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -50,14 +50,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 51 | 51 | ||
| 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; | 54 | const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; |
| 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; | 55 | const s64 min_spacing_critical = device_memory - 1_GiB; |
| 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); | 56 | const s64 mem_tresshold = std::min(device_memory, TARGET_THRESHOLD); |
| 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); | 57 | const s64 min_vacancy_expected = (6 * mem_tresshold) / 10; |
| 58 | minimum_memory = 0; | 58 | const s64 min_vacancy_critical = (3 * mem_tresshold) / 10; |
| 59 | expected_memory = static_cast<u64>( | ||
| 60 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||
| 61 | DEFAULT_EXPECTED_MEMORY)); | ||
| 62 | critical_memory = static_cast<u64>( | ||
| 63 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||
| 64 | DEFAULT_CRITICAL_MEMORY)); | ||
| 65 | minimum_memory = static_cast<u64>((device_memory - mem_tresshold) / 2); | ||
| 66 | LOG_CRITICAL(Debug, "Available Memory: {}", device_memory / 1_MiB); | ||
| 59 | } else { | 67 | } else { |
| 60 | // On OpenGL we can be more conservatives as the driver takes care. | ||
| 61 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 68 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 62 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 69 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 63 | minimum_memory = 0; | 70 | minimum_memory = 0; |
| @@ -76,7 +83,8 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 76 | } | 83 | } |
| 77 | --num_iterations; | 84 | --num_iterations; |
| 78 | auto& image = slot_images[image_id]; | 85 | auto& image = slot_images[image_id]; |
| 79 | const bool must_download = image.IsSafeDownload(); | 86 | const bool must_download = |
| 87 | image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); | ||
| 80 | if (!high_priority_mode && must_download) { | 88 | if (!high_priority_mode && must_download) { |
| 81 | return false; | 89 | return false; |
| 82 | } | 90 | } |
| @@ -99,6 +107,10 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 99 | 107 | ||
| 100 | template <class P> | 108 | template <class P> |
| 101 | void TextureCache<P>::TickFrame() { | 109 | void TextureCache<P>::TickFrame() { |
| 110 | // If we can obtain the memory info, use it instead of the estimate. | ||
| 111 | if (runtime.CanReportMemoryUsage()) { | ||
| 112 | total_used_memory = runtime.GetDeviceMemoryUsage(); | ||
| 113 | } | ||
| 102 | if (total_used_memory > minimum_memory) { | 114 | if (total_used_memory > minimum_memory) { |
| 103 | RunGarbageCollector(); | 115 | RunGarbageCollector(); |
| 104 | } | 116 | } |
| @@ -106,7 +118,9 @@ void TextureCache<P>::TickFrame() { | |||
| 106 | sentenced_framebuffers.Tick(); | 118 | sentenced_framebuffers.Tick(); |
| 107 | sentenced_image_view.Tick(); | 119 | sentenced_image_view.Tick(); |
| 108 | runtime.TickFrame(); | 120 | runtime.TickFrame(); |
| 121 | critical_gc = 0; | ||
| 109 | ++frame_tick; | 122 | ++frame_tick; |
| 123 | LOG_CRITICAL(Debug, "Current memory: {}", total_used_memory / 1_MiB); | ||
| 110 | } | 124 | } |
| 111 | 125 | ||
| 112 | template <class P> | 126 | template <class P> |
| @@ -1052,6 +1066,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1052 | 1066 | ||
| 1053 | for (const ImageId overlap_id : overlap_ids) { | 1067 | for (const ImageId overlap_id : overlap_ids) { |
| 1054 | Image& overlap = slot_images[overlap_id]; | 1068 | Image& overlap = slot_images[overlap_id]; |
| 1069 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1070 | new_image.flags |= ImageFlagBits::GpuModified; | ||
| 1071 | new_image.modification_tick = | ||
| 1072 | std::max(overlap.modification_tick, new_image.modification_tick); | ||
| 1073 | } | ||
| 1055 | if (overlap.info.num_samples != new_image.info.num_samples) { | 1074 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 1056 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | 1075 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 1057 | } else { | 1076 | } else { |
| @@ -1414,6 +1433,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1414 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1433 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1415 | } | 1434 | } |
| 1416 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1435 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1436 | if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { | ||
| 1437 | RunGarbageCollector(); | ||
| 1438 | critical_gc++; | ||
| 1439 | } | ||
| 1417 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | 1440 | image.lru_index = lru_cache.Insert(image_id, frame_tick); |
| 1418 | 1441 | ||
| 1419 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1442 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| @@ -1704,6 +1727,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1704 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1727 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1705 | aliased_images.push_back(&aliased); | 1728 | aliased_images.push_back(&aliased); |
| 1706 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | 1729 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); |
| 1730 | if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { | ||
| 1731 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1732 | } | ||
| 1707 | } | 1733 | } |
| 1708 | } | 1734 | } |
| 1709 | if (aliased_images.empty()) { | 1735 | if (aliased_images.empty()) { |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 647ca0730..5dabc344b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -59,8 +59,12 @@ class TextureCache { | |||
| 59 | /// True when the API can provide info about the memory of the device. | 59 | /// True when the API can provide info about the memory of the device. |
| 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 61 | 61 | ||
| 62 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 62 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| 63 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 63 | static constexpr s64 MIN_VACANCY_EXPECTED = (6 * TARGET_THRESHOLD) / 10; |
| 64 | static constexpr s64 MIN_VACANCY_CRITICAL = (3 * TARGET_THRESHOLD) / 10; | ||
| 65 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; | ||
| 66 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; | ||
| 67 | static constexpr size_t GC_EMERGENCY_COUNTS = 2; | ||
| 64 | 68 | ||
| 65 | using Runtime = typename P::Runtime; | 69 | using Runtime = typename P::Runtime; |
| 66 | using Image = typename P::Image; | 70 | using Image = typename P::Image; |
| @@ -372,6 +376,7 @@ private: | |||
| 372 | u64 minimum_memory; | 376 | u64 minimum_memory; |
| 373 | u64 expected_memory; | 377 | u64 expected_memory; |
| 374 | u64 critical_memory; | 378 | u64 critical_memory; |
| 379 | size_t critical_gc; | ||
| 375 | 380 | ||
| 376 | SlotVector<Image> slot_images; | 381 | SlotVector<Image> slot_images; |
| 377 | SlotVector<ImageMapView> slot_map_views; | 382 | SlotVector<ImageMapView> slot_map_views; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 32c10d675..dab5b4fe4 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -12,12 +12,14 @@ | |||
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | 13 | ||
| 14 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 15 | #include "common/literals.h" | ||
| 15 | #include "common/settings.h" | 16 | #include "common/settings.h" |
| 16 | #include "video_core/vulkan_common/nsight_aftermath_tracker.h" | 17 | #include "video_core/vulkan_common/nsight_aftermath_tracker.h" |
| 17 | #include "video_core/vulkan_common/vulkan_device.h" | 18 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | using namespace Common::Literals; | ||
| 21 | namespace { | 23 | namespace { |
| 22 | namespace Alternatives { | 24 | namespace Alternatives { |
| 23 | constexpr std::array STENCIL8_UINT{ | 25 | constexpr std::array STENCIL8_UINT{ |
| @@ -596,6 +598,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 596 | } | 598 | } |
| 597 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | 599 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); |
| 598 | 600 | ||
| 601 | is_integrated = (properties.deviceType & VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) != 0; | ||
| 602 | is_virtual = (properties.deviceType & VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU) != 0; | ||
| 603 | is_non_gpu = (properties.deviceType & VK_PHYSICAL_DEVICE_TYPE_OTHER) != 0 || | ||
| 604 | (properties.deviceType & VK_PHYSICAL_DEVICE_TYPE_CPU) != 0; | ||
| 605 | |||
| 599 | CollectPhysicalMemoryInfo(); | 606 | CollectPhysicalMemoryInfo(); |
| 600 | CollectTelemetryParameters(); | 607 | CollectTelemetryParameters(); |
| 601 | CollectToolingInfo(); | 608 | CollectToolingInfo(); |
| @@ -985,6 +992,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 985 | test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, | 992 | test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, |
| 986 | false); | 993 | false); |
| 987 | test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); | 994 | test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); |
| 995 | test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true); | ||
| 988 | if (Settings::values.enable_nsight_aftermath) { | 996 | if (Settings::values.enable_nsight_aftermath) { |
| 989 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 997 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 990 | true); | 998 | true); |
| @@ -997,7 +1005,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 997 | VkPhysicalDeviceFeatures2KHR features{}; | 1005 | VkPhysicalDeviceFeatures2KHR features{}; |
| 998 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | 1006 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; |
| 999 | 1007 | ||
| 1000 | VkPhysicalDeviceProperties2KHR physical_properties; | 1008 | VkPhysicalDeviceProperties2KHR physical_properties{}; |
| 1001 | physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | 1009 | physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; |
| 1002 | 1010 | ||
| 1003 | if (has_khr_shader_float16_int8) { | 1011 | if (has_khr_shader_float16_int8) { |
| @@ -1267,15 +1275,51 @@ void Device::CollectTelemetryParameters() { | |||
| 1267 | vendor_name = driver.driverName; | 1275 | vendor_name = driver.driverName; |
| 1268 | } | 1276 | } |
| 1269 | 1277 | ||
| 1278 | u64 Device::GetDeviceMemoryUsage() const { | ||
| 1279 | VkPhysicalDeviceMemoryBudgetPropertiesEXT budget; | ||
| 1280 | budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; | ||
| 1281 | budget.pNext = nullptr; | ||
| 1282 | physical.GetMemoryProperties(&budget); | ||
| 1283 | u64 result{}; | ||
| 1284 | for (const size_t heap : valid_heap_memory) { | ||
| 1285 | result += budget.heapUsage[heap]; | ||
| 1286 | } | ||
| 1287 | return result; | ||
| 1288 | } | ||
| 1289 | |||
| 1270 | void Device::CollectPhysicalMemoryInfo() { | 1290 | void Device::CollectPhysicalMemoryInfo() { |
| 1271 | const auto mem_properties = physical.GetMemoryProperties(); | 1291 | VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; |
| 1292 | budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; | ||
| 1293 | const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); | ||
| 1294 | const auto& mem_properties = mem_info.memoryProperties; | ||
| 1272 | const size_t num_properties = mem_properties.memoryHeapCount; | 1295 | const size_t num_properties = mem_properties.memoryHeapCount; |
| 1273 | device_access_memory = 0; | 1296 | device_access_memory = 0; |
| 1297 | u64 device_initial_usage = 0; | ||
| 1298 | u64 local_memory = 0; | ||
| 1274 | for (size_t element = 0; element < num_properties; ++element) { | 1299 | for (size_t element = 0; element < num_properties; ++element) { |
| 1275 | if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { | 1300 | const bool is_heap_local = |
| 1276 | device_access_memory += mem_properties.memoryHeaps[element].size; | 1301 | mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT != 0; |
| 1302 | if (!is_integrated && !is_heap_local) { | ||
| 1303 | continue; | ||
| 1277 | } | 1304 | } |
| 1305 | valid_heap_memory.push_back(element); | ||
| 1306 | if (is_heap_local) { | ||
| 1307 | local_memory += mem_properties.memoryHeaps[element].size; | ||
| 1308 | } | ||
| 1309 | if (ext_memory_budget) { | ||
| 1310 | device_initial_usage += budget.heapUsage[element]; | ||
| 1311 | device_access_memory += budget.heapBudget[element]; | ||
| 1312 | continue; | ||
| 1313 | } | ||
| 1314 | device_access_memory += mem_properties.memoryHeaps[element].size; | ||
| 1315 | } | ||
| 1316 | if (!is_integrated) { | ||
| 1317 | return; | ||
| 1278 | } | 1318 | } |
| 1319 | const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); | ||
| 1320 | device_access_memory = static_cast<u64>(std::max<s64>( | ||
| 1321 | std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory))); | ||
| 1322 | device_initial_usage = 0; | ||
| 1279 | } | 1323 | } |
| 1280 | 1324 | ||
| 1281 | void Device::CollectToolingInfo() { | 1325 | void Device::CollectToolingInfo() { |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1c7c18bcf..2d709d069 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -341,6 +341,12 @@ public: | |||
| 341 | return device_access_memory; | 341 | return device_access_memory; |
| 342 | } | 342 | } |
| 343 | 343 | ||
| 344 | bool CanReportMemoryUsage() const { | ||
| 345 | return ext_memory_budget; | ||
| 346 | } | ||
| 347 | |||
| 348 | u64 GetDeviceMemoryUsage() const; | ||
| 349 | |||
| 344 | u32 GetSetsPerPool() const { | 350 | u32 GetSetsPerPool() const { |
| 345 | return sets_per_pool; | 351 | return sets_per_pool; |
| 346 | } | 352 | } |
| @@ -421,6 +427,9 @@ private: | |||
| 421 | bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list | 427 | bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list |
| 422 | ///< topologies. | 428 | ///< topologies. |
| 423 | bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. | 429 | bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. |
| 430 | bool is_integrated{}; ///< Is GPU an iGPU. | ||
| 431 | bool is_virtual{}; ///< Is GPU a virtual GPU. | ||
| 432 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. | ||
| 424 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 433 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 425 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. | 434 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. |
| 426 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. | 435 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. |
| @@ -445,6 +454,7 @@ private: | |||
| 445 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. | 454 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. |
| 446 | bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. | 455 | bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. |
| 447 | bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. | 456 | bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. |
| 457 | bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. | ||
| 448 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 458 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 449 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit | 459 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit |
| 450 | bool has_renderdoc{}; ///< Has RenderDoc attached | 460 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| @@ -456,6 +466,7 @@ private: | |||
| 456 | // Telemetry parameters | 466 | // Telemetry parameters |
| 457 | std::string vendor_name; ///< Device's driver name. | 467 | std::string vendor_name; ///< Device's driver name. |
| 458 | std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. | 468 | std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. |
| 469 | std::vector<size_t> valid_heap_memory; ///< Heaps used. | ||
| 459 | 470 | ||
| 460 | /// Format properties dictionary. | 471 | /// Format properties dictionary. |
| 461 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 472 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 300a61205..e6e97b332 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -227,7 +227,7 @@ void MemoryCommit::Release() { | |||
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) | 229 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) |
| 230 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, | 230 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, |
| 231 | export_allocations{export_allocations_}, | 231 | export_allocations{export_allocations_}, |
| 232 | buffer_image_granularity{ | 232 | buffer_image_granularity{ |
| 233 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} | 233 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index a794f16dd..742cc39da 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -237,8 +237,8 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { | |||
| 237 | return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && | 237 | return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && |
| 238 | X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && | 238 | X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && |
| 239 | X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && | 239 | X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && |
| 240 | X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) && | 240 | X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceMemoryProperties2) && |
| 241 | X(vkGetPhysicalDeviceQueueFamilyProperties); | 241 | X(vkGetPhysicalDeviceProperties) && X(vkGetPhysicalDeviceQueueFamilyProperties); |
| 242 | #undef X | 242 | #undef X |
| 243 | } | 243 | } |
| 244 | 244 | ||
| @@ -926,9 +926,12 @@ std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR( | |||
| 926 | return modes; | 926 | return modes; |
| 927 | } | 927 | } |
| 928 | 928 | ||
| 929 | VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { | 929 | VkPhysicalDeviceMemoryProperties2 PhysicalDevice::GetMemoryProperties( |
| 930 | VkPhysicalDeviceMemoryProperties properties; | 930 | void* next_structures) const noexcept { |
| 931 | dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); | 931 | VkPhysicalDeviceMemoryProperties2 properties{}; |
| 932 | properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; | ||
| 933 | properties.pNext = next_structures; | ||
| 934 | dld->vkGetPhysicalDeviceMemoryProperties2(physical_device, &properties); | ||
| 932 | return properties; | 935 | return properties; |
| 933 | } | 936 | } |
| 934 | 937 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 53bac627f..0a5f9931c 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -172,6 +172,7 @@ struct InstanceDispatch { | |||
| 172 | PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{}; | 172 | PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{}; |
| 173 | PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{}; | 173 | PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{}; |
| 174 | PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{}; | 174 | PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{}; |
| 175 | PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{}; | ||
| 175 | PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; | 176 | PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; |
| 176 | PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{}; | 177 | PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{}; |
| 177 | PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{}; | 178 | PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{}; |
| @@ -950,7 +951,8 @@ public: | |||
| 950 | 951 | ||
| 951 | std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; | 952 | std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; |
| 952 | 953 | ||
| 953 | VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; | 954 | VkPhysicalDeviceMemoryProperties2 GetMemoryProperties( |
| 955 | void* next_structures = nullptr) const noexcept; | ||
| 954 | 956 | ||
| 955 | private: | 957 | private: |
| 956 | VkPhysicalDevice physical_device = nullptr; | 958 | VkPhysicalDevice physical_device = nullptr; |