diff options
4 files changed, 59 insertions, 14 deletions
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index ce92f66ab..b278614e6 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -24,25 +24,38 @@ using namespace Common::Literals; | |||
| 24 | 24 | ||
| 25 | // Maximum potential alignment of a Vulkan buffer | 25 | // Maximum potential alignment of a Vulkan buffer |
| 26 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | 26 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; |
| 27 | // Maximum size to put elements in the stream buffer | ||
| 28 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; | ||
| 29 | // Stream buffer size in bytes | 27 | // Stream buffer size in bytes |
| 30 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | 28 | constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; |
| 31 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 32 | 29 | ||
| 33 | size_t Region(size_t iterator) noexcept { | 30 | size_t GetStreamBufferSize(const Device& device) { |
| 34 | return iterator / REGION_SIZE; | 31 | VkDeviceSize size{0}; |
| 32 | if (device.HasDebuggingToolAttached()) { | ||
| 33 | ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { | ||
| 34 | size = std::max(size, heap.size); | ||
| 35 | }); | ||
| 36 | // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be | ||
| 37 | // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue | ||
| 38 | // as the heap will be much larger. | ||
| 39 | if (size <= 256_MiB) { | ||
| 40 | size = size * 40 / 100; | ||
| 41 | } | ||
| 42 | } else { | ||
| 43 | size = MAX_STREAM_BUFFER_SIZE; | ||
| 44 | } | ||
| 45 | return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); | ||
| 35 | } | 46 | } |
| 36 | } // Anonymous namespace | 47 | } // Anonymous namespace |
| 37 | 48 | ||
| 38 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 49 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 39 | Scheduler& scheduler_) | 50 | Scheduler& scheduler_) |
| 40 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | 51 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, |
| 52 | stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / | ||
| 53 | StagingBufferPool::NUM_SYNCS} { | ||
| 41 | VkBufferCreateInfo stream_ci = { | 54 | VkBufferCreateInfo stream_ci = { |
| 42 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 55 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 43 | .pNext = nullptr, | 56 | .pNext = nullptr, |
| 44 | .flags = 0, | 57 | .flags = 0, |
| 45 | .size = STREAM_BUFFER_SIZE, | 58 | .size = stream_buffer_size, |
| 46 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 59 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 47 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | 60 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 48 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 61 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 63 | StagingBufferPool::~StagingBufferPool() = default; | 76 | StagingBufferPool::~StagingBufferPool() = default; |
| 64 | 77 | ||
| 65 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { | 78 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { |
| 66 | if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { | 79 | if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { |
| 67 | return GetStreamBuffer(size); | 80 | return GetStreamBuffer(size); |
| 68 | } | 81 | } |
| 69 | return GetStagingBuffer(size, usage, deferred); | 82 | return GetStagingBuffer(size, usage, deferred); |
| @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | |||
| 101 | used_iterator = iterator; | 114 | used_iterator = iterator; |
| 102 | free_iterator = std::max(free_iterator, iterator + size); | 115 | free_iterator = std::max(free_iterator, iterator + size); |
| 103 | 116 | ||
| 104 | if (iterator + size >= STREAM_BUFFER_SIZE) { | 117 | if (iterator + size >= stream_buffer_size) { |
| 105 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | 118 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, |
| 106 | current_tick); | 119 | current_tick); |
| 107 | used_iterator = 0; | 120 | used_iterator = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 5f69f08b1..d3deb9072 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -90,6 +90,9 @@ private: | |||
| 90 | void ReleaseCache(MemoryUsage usage); | 90 | void ReleaseCache(MemoryUsage usage); |
| 91 | 91 | ||
| 92 | void ReleaseLevel(StagingBuffersCache& cache, size_t log2); | 92 | void ReleaseLevel(StagingBuffersCache& cache, size_t log2); |
| 93 | size_t Region(size_t iter) const noexcept { | ||
| 94 | return iter / region_size; | ||
| 95 | } | ||
| 93 | 96 | ||
| 94 | const Device& device; | 97 | const Device& device; |
| 95 | MemoryAllocator& memory_allocator; | 98 | MemoryAllocator& memory_allocator; |
| @@ -97,6 +100,8 @@ private: | |||
| 97 | 100 | ||
| 98 | vk::Buffer stream_buffer; | 101 | vk::Buffer stream_buffer; |
| 99 | std::span<u8> stream_pointer; | 102 | std::span<u8> stream_pointer; |
| 103 | VkDeviceSize stream_buffer_size; | ||
| 104 | VkDeviceSize region_size; | ||
| 100 | 105 | ||
| 101 | size_t iterator = 0; | 106 | size_t iterator = 0; |
| 102 | size_t used_iterator = 0; | 107 | size_t used_iterator = 0; |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 3ef381a38..82767fdf0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/literals.h" | ||
| 12 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 13 | #include "common/polyfill_ranges.h" | 14 | #include "common/polyfill_ranges.h" |
| 14 | #include "video_core/vulkan_common/vma.h" | 15 | #include "video_core/vulkan_common/vma.h" |
| @@ -69,8 +70,7 @@ struct Range { | |||
| 69 | case MemoryUsage::Download: | 70 | case MemoryUsage::Download: |
| 70 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; | 71 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; |
| 71 | case MemoryUsage::DeviceLocal: | 72 | case MemoryUsage::DeviceLocal: |
| 72 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | | 73 | return {}; |
| 73 | VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; | ||
| 74 | } | 74 | } |
| 75 | return {}; | 75 | return {}; |
| 76 | } | 76 | } |
| @@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) | |||
| 212 | : device{device_}, allocator{device.GetAllocator()}, | 212 | : device{device_}, allocator{device.GetAllocator()}, |
| 213 | properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, | 213 | properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, |
| 214 | buffer_image_granularity{ | 214 | buffer_image_granularity{ |
| 215 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} | 215 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { |
| 216 | // GPUs not supporting rebar may only have a region with less than 256MB host visible/device | ||
| 217 | // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to | ||
| 218 | // the heap running out of memory. With RenderDoc attached and only a small host/device region, | ||
| 219 | // only allow the stream buffer in this memory heap. | ||
| 220 | if (device.HasDebuggingToolAttached()) { | ||
| 221 | using namespace Common::Literals; | ||
| 222 | ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { | ||
| 223 | if (heap.size <= 256_MiB) { | ||
| 224 | valid_memory_types &= ~(1u << index); | ||
| 225 | } | ||
| 226 | }); | ||
| 227 | } | ||
| 228 | } | ||
| 216 | 229 | ||
| 217 | MemoryAllocator::~MemoryAllocator() = default; | 230 | MemoryAllocator::~MemoryAllocator() = default; |
| 218 | 231 | ||
| @@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa | |||
| 244 | .usage = MemoryUsageVma(usage), | 257 | .usage = MemoryUsageVma(usage), |
| 245 | .requiredFlags = 0, | 258 | .requiredFlags = 0, |
| 246 | .preferredFlags = MemoryUsagePreferedVmaFlags(usage), | 259 | .preferredFlags = MemoryUsagePreferedVmaFlags(usage), |
| 247 | .memoryTypeBits = 0, | 260 | .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, |
| 248 | .pool = VK_NULL_HANDLE, | 261 | .pool = VK_NULL_HANDLE, |
| 249 | .pUserData = nullptr, | 262 | .pUserData = nullptr, |
| 250 | .priority = 0.f, | 263 | .priority = 0.f, |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index f449bc8d0..38a182bcb 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 12 | ||
| 12 | VK_DEFINE_HANDLE(VmaAllocator) | 13 | VK_DEFINE_HANDLE(VmaAllocator) |
| @@ -26,6 +27,18 @@ enum class MemoryUsage { | |||
| 26 | Stream, ///< Requests device local host visible buffer, falling back host memory. | 27 | Stream, ///< Requests device local host visible buffer, falling back host memory. |
| 27 | }; | 28 | }; |
| 28 | 29 | ||
| 30 | template <typename F> | ||
| 31 | void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { | ||
| 32 | auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; | ||
| 33 | for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { | ||
| 34 | auto& memory_type = memory_props.memoryTypes[i]; | ||
| 35 | if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && | ||
| 36 | (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { | ||
| 37 | f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 29 | /// Ownership handle of a memory commitment. | 42 | /// Ownership handle of a memory commitment. |
| 30 | /// Points to a subregion of a memory allocation. | 43 | /// Points to a subregion of a memory allocation. |
| 31 | class MemoryCommit { | 44 | class MemoryCommit { |
| @@ -124,6 +137,7 @@ private: | |||
| 124 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. | 137 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. |
| 125 | VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers | 138 | VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers |
| 126 | // and optimal images | 139 | // and optimal images |
| 140 | u32 valid_memory_types{~0u}; | ||
| 127 | }; | 141 | }; |
| 128 | 142 | ||
| 129 | } // namespace Vulkan | 143 | } // namespace Vulkan |