diff options
| author | 2022-10-05 08:54:22 +0200 | |
|---|---|---|
| committer | 2022-10-05 08:54:22 +0200 | |
| commit | 71fe9fd0f26b7780c914a37c5e38e43031b54bb8 (patch) | |
| tree | a920904929ab7c1d070c4b66546e7242571a2d42 /src | |
| parent | Merge pull request #9011 from liamwhite/frog-emoji-moment (diff) | |
| parent | vulkan: automatically use larger staging buffer sizes when possible (diff) | |
| download | yuzu-71fe9fd0f26b7780c914a37c5e38e43031b54bb8.tar.gz yuzu-71fe9fd0f26b7780c914a37c5e38e43031b54bb8.tar.xz yuzu-71fe9fd0f26b7780c914a37c5e38e43031b54bb8.zip | |
Merge pull request #8987 from liamwhite/another-name-for-reinforcement-steel
vulkan: automatically use larger staging buffer sizes when possible
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 3 |
2 files changed, 60 insertions, 27 deletions
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 06f68d09a..7fb256953 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -26,20 +26,39 @@ using namespace Common::Literals; | |||
| 26 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | 26 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; |
| 27 | // Maximum size to put elements in the stream buffer | 27 | // Maximum size to put elements in the stream buffer |
| 28 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; | 28 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; |
| 29 | // Stream buffer size in bytes | ||
| 30 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | ||
| 31 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 32 | 29 | ||
| 33 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | 30 | constexpr VkMemoryPropertyFlags HOST_FLAGS = |
| 34 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | 31 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; |
| 35 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | 32 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; |
| 36 | 33 | ||
| 37 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | 34 | static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept { |
| 38 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | 35 | return staging_buffer_size < (heap.size * 2) / 3; |
| 36 | } | ||
| 37 | |||
| 38 | static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) { | ||
| 39 | const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT}; | ||
| 40 | |||
| 41 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 42 | const auto& memory_type{props.memoryTypes[type_index]}; | ||
| 43 | |||
| 44 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 45 | // Memory must be device local and host visible | ||
| 46 | continue; | ||
| 47 | } | ||
| 48 | |||
| 49 | const auto& heap{props.memoryHeaps[memory_type.heapIndex]}; | ||
| 50 | if (heap.size >= 7168_MiB) { | ||
| 51 | // This is the right type of memory | ||
| 52 | return true; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | return false; | ||
| 39 | } | 57 | } |
| 40 | 58 | ||
| 41 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | 59 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, |
| 42 | VkMemoryPropertyFlags flags) noexcept { | 60 | VkMemoryPropertyFlags flags, |
| 61 | size_t staging_buffer_size) noexcept { | ||
| 43 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | 62 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { |
| 44 | if (((type_mask >> type_index) & 1) == 0) { | 63 | if (((type_mask >> type_index) & 1) == 0) { |
| 45 | // Memory type is incompatible | 64 | // Memory type is incompatible |
| @@ -50,7 +69,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p | |||
| 50 | // Memory type doesn't have the flags we want | 69 | // Memory type doesn't have the flags we want |
| 51 | continue; | 70 | continue; |
| 52 | } | 71 | } |
| 53 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | 72 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) { |
| 54 | // Memory heap is not suitable for streaming | 73 | // Memory heap is not suitable for streaming |
| 55 | continue; | 74 | continue; |
| 56 | } | 75 | } |
| @@ -61,17 +80,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p | |||
| 61 | } | 80 | } |
| 62 | 81 | ||
| 63 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | 82 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, |
| 64 | bool try_device_local) { | 83 | bool try_device_local, size_t staging_buffer_size) { |
| 65 | std::optional<u32> type; | 84 | std::optional<u32> type; |
| 66 | if (try_device_local) { | 85 | if (try_device_local) { |
| 67 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | 86 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this |
| 68 | type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | 87 | type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size); |
| 69 | if (type) { | 88 | if (type) { |
| 70 | return *type; | 89 | return *type; |
| 71 | } | 90 | } |
| 72 | } | 91 | } |
| 73 | // Otherwise try without the DEVICE_LOCAL_BIT | 92 | // Otherwise try without the DEVICE_LOCAL_BIT |
| 74 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | 93 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size); |
| 75 | if (type) { | 94 | if (type) { |
| 76 | return *type; | 95 | return *type; |
| 77 | } | 96 | } |
| @@ -79,20 +98,32 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_ | |||
| 79 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | 98 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); |
| 80 | } | 99 | } |
| 81 | 100 | ||
| 82 | size_t Region(size_t iterator) noexcept { | 101 | size_t Region(size_t iterator, size_t region_size) noexcept { |
| 83 | return iterator / REGION_SIZE; | 102 | return iterator / region_size; |
| 84 | } | 103 | } |
| 85 | } // Anonymous namespace | 104 | } // Anonymous namespace |
| 86 | 105 | ||
| 87 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 106 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 88 | Scheduler& scheduler_) | 107 | Scheduler& scheduler_) |
| 89 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | 108 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 109 | |||
| 110 | const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties}; | ||
| 111 | if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) { | ||
| 112 | // Possible on many integrated and newer discrete cards | ||
| 113 | staging_buffer_size = 1_GiB; | ||
| 114 | } else { | ||
| 115 | // Well-supported default size used by most Vulkan PC games | ||
| 116 | staging_buffer_size = 256_MiB; | ||
| 117 | } | ||
| 118 | |||
| 119 | region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS; | ||
| 120 | |||
| 90 | const vk::Device& dev = device.GetLogical(); | 121 | const vk::Device& dev = device.GetLogical(); |
| 91 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | 122 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ |
| 92 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 123 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 93 | .pNext = nullptr, | 124 | .pNext = nullptr, |
| 94 | .flags = 0, | 125 | .flags = 0, |
| 95 | .size = STREAM_BUFFER_SIZE, | 126 | .size = staging_buffer_size, |
| 96 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 127 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 97 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | 128 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 98 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 129 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| @@ -117,19 +148,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 117 | .image = nullptr, | 148 | .image = nullptr, |
| 118 | .buffer = *stream_buffer, | 149 | .buffer = *stream_buffer, |
| 119 | }; | 150 | }; |
| 120 | const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; | ||
| 121 | VkMemoryAllocateInfo stream_memory_info{ | 151 | VkMemoryAllocateInfo stream_memory_info{ |
| 122 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | 152 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| 123 | .pNext = make_dedicated ? &dedicated_info : nullptr, | 153 | .pNext = make_dedicated ? &dedicated_info : nullptr, |
| 124 | .allocationSize = requirements.size, | 154 | .allocationSize = requirements.size, |
| 125 | .memoryTypeIndex = | 155 | .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true, |
| 126 | FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), | 156 | staging_buffer_size), |
| 127 | }; | 157 | }; |
| 128 | stream_memory = dev.TryAllocateMemory(stream_memory_info); | 158 | stream_memory = dev.TryAllocateMemory(stream_memory_info); |
| 129 | if (!stream_memory) { | 159 | if (!stream_memory) { |
| 130 | LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); | 160 | LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); |
| 131 | stream_memory_info.memoryTypeIndex = | 161 | stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex( |
| 132 | FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); | 162 | memory_properties, requirements.memoryTypeBits, false, staging_buffer_size); |
| 133 | stream_memory = dev.AllocateMemory(stream_memory_info); | 163 | stream_memory = dev.AllocateMemory(stream_memory_info); |
| 134 | } | 164 | } |
| 135 | 165 | ||
| @@ -137,7 +167,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 137 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | 167 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); |
| 138 | } | 168 | } |
| 139 | stream_buffer.BindMemory(*stream_memory, 0); | 169 | stream_buffer.BindMemory(*stream_memory, 0); |
| 140 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | 170 | stream_pointer = stream_memory.Map(0, staging_buffer_size); |
| 141 | } | 171 | } |
| 142 | 172 | ||
| 143 | StagingBufferPool::~StagingBufferPool() = default; | 173 | StagingBufferPool::~StagingBufferPool() = default; |
| @@ -158,25 +188,25 @@ void StagingBufferPool::TickFrame() { | |||
| 158 | } | 188 | } |
| 159 | 189 | ||
| 160 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | 190 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { |
| 161 | if (AreRegionsActive(Region(free_iterator) + 1, | 191 | if (AreRegionsActive(Region(free_iterator, region_size) + 1, |
| 162 | std::min(Region(iterator + size) + 1, NUM_SYNCS))) { | 192 | std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) { |
| 163 | // Avoid waiting for the previous usages to be free | 193 | // Avoid waiting for the previous usages to be free |
| 164 | return GetStagingBuffer(size, MemoryUsage::Upload); | 194 | return GetStagingBuffer(size, MemoryUsage::Upload); |
| 165 | } | 195 | } |
| 166 | const u64 current_tick = scheduler.CurrentTick(); | 196 | const u64 current_tick = scheduler.CurrentTick(); |
| 167 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), | 197 | std::fill(sync_ticks.begin() + Region(used_iterator, region_size), |
| 168 | current_tick); | 198 | sync_ticks.begin() + Region(iterator, region_size), current_tick); |
| 169 | used_iterator = iterator; | 199 | used_iterator = iterator; |
| 170 | free_iterator = std::max(free_iterator, iterator + size); | 200 | free_iterator = std::max(free_iterator, iterator + size); |
| 171 | 201 | ||
| 172 | if (iterator + size >= STREAM_BUFFER_SIZE) { | 202 | if (iterator + size >= staging_buffer_size) { |
| 173 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | 203 | std::fill(sync_ticks.begin() + Region(used_iterator, region_size), |
| 174 | current_tick); | 204 | sync_ticks.begin() + NUM_SYNCS, current_tick); |
| 175 | used_iterator = 0; | 205 | used_iterator = 0; |
| 176 | iterator = 0; | 206 | iterator = 0; |
| 177 | free_iterator = size; | 207 | free_iterator = size; |
| 178 | 208 | ||
| 179 | if (AreRegionsActive(0, Region(size) + 1)) { | 209 | if (AreRegionsActive(0, Region(size, region_size) + 1)) { |
| 180 | // Avoid waiting for the previous usages to be free | 210 | // Avoid waiting for the previous usages to be free |
| 181 | return GetStagingBuffer(size, MemoryUsage::Upload); | 211 | return GetStagingBuffer(size, MemoryUsage::Upload); |
| 182 | } | 212 | } |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 91dc84da8..90c67177f 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -93,6 +93,9 @@ private: | |||
| 93 | size_t free_iterator = 0; | 93 | size_t free_iterator = 0; |
| 94 | std::array<u64, NUM_SYNCS> sync_ticks{}; | 94 | std::array<u64, NUM_SYNCS> sync_ticks{}; |
| 95 | 95 | ||
| 96 | size_t staging_buffer_size = 0; | ||
| 97 | size_t region_size = 0; | ||
| 98 | |||
| 96 | StagingBuffersCache device_local_cache; | 99 | StagingBuffersCache device_local_cache; |
| 97 | StagingBuffersCache upload_cache; | 100 | StagingBuffersCache upload_cache; |
| 98 | StagingBuffersCache download_cache; | 101 | StagingBuffersCache download_cache; |