summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-04-17 05:13:35 -0300
committerGravatar ReinUsesLisp2020-04-17 18:12:48 -0300
commita7b6bd56d79f462bb1ca39da91b1bb3d4226fe24 (patch)
treeb3af324d3ef4eb262a709b14d89f63362505c653 /src
parentMerge pull request #3703 from yuzu-emu/revert-3656-glsl-full-decompile (diff)
downloadyuzu-a7b6bd56d79f462bb1ca39da91b1bb3d4226fe24.tar.gz
yuzu-a7b6bd56d79f462bb1ca39da91b1bb3d4226fe24.tar.xz
yuzu-a7b6bd56d79f462bb1ca39da91b1bb3d4226fe24.zip
vk_stream_buffer: Fix out of memory on boot on recent Nvidia drivers
Nvidia recently introduced a new memory type for data streaming (awesome!), but yuzu was assuming that all heaps had enough memory for the assumed stream buffer size (256 MiB). This worked fine on AMD but Nvidia's new memory heap was smaller than 256 MiB. This commit changes this assumption and allocates a bit less than the size of the preferred heap, with a maximum of 256 MiB (to avoid allocating all system memory on integrated devices). - Fixes a crash on NVIDIA 450.82.0.0
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h5
2 files changed, 48 insertions, 33 deletions
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 38a93a01a..868447af2 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits>
6#include <optional> 7#include <optional>
7#include <tuple> 8#include <tuple>
8#include <vector> 9#include <vector>
@@ -22,22 +23,38 @@ namespace {
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 23constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 24constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 25
25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; 26constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
26 27
27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, 28/// Find a memory type with the passed requirements
28 VkMemoryPropertyFlags wanted) { 29std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
29 const auto properties = device.GetPhysical().GetMemoryProperties(); 30 VkMemoryPropertyFlags wanted,
30 for (u32 i = 0; i < properties.memoryTypeCount; i++) { 31 u32 filter = std::numeric_limits<u32>::max()) {
31 if (!(filter & (1 << i))) { 32 for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
32 continue; 33 const auto flags = properties.memoryTypes[i].propertyFlags;
33 } 34 if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
34 if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
35 return i; 35 return i;
36 } 36 }
37 } 37 }
38 return std::nullopt; 38 return std::nullopt;
39} 39}
40 40
41/// Get the preferred host visible memory type.
42u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
43 u32 filter = std::numeric_limits<u32>::max()) {
44 // Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
45 // Otherwise search for a host visible allocation.
46 static constexpr auto HOST_MEMORY =
47 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
48 static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
49
50 std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
51 if (!preferred_type) {
52 preferred_type = FindMemoryType(properties, HOST_MEMORY);
53 ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
54 }
55 return preferred_type.value_or(0);
56}
57
41} // Anonymous namespace 58} // Anonymous namespace
42 59
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 60VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
@@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
51VKStreamBuffer::~VKStreamBuffer() = default; 68VKStreamBuffer::~VKStreamBuffer() = default;
52 69
53std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 70std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
54 ASSERT(size <= STREAM_BUFFER_SIZE); 71 ASSERT(size <= stream_buffer_size);
55 mapped_size = size; 72 mapped_size = size;
56 73
57 if (alignment > 0) { 74 if (alignment > 0) {
@@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
61 WaitPendingOperations(offset); 78 WaitPendingOperations(offset);
62 79
63 bool invalidated = false; 80 bool invalidated = false;
64 if (offset + size > STREAM_BUFFER_SIZE) { 81 if (offset + size > stream_buffer_size) {
65 // The buffer would overflow, save the amount of used watches and reset the state. 82 // The buffer would overflow, save the amount of used watches and reset the state.
66 invalidation_mark = current_watch_cursor; 83 invalidation_mark = current_watch_cursor;
67 current_watch_cursor = 0; 84 current_watch_cursor = 0;
@@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) {
98} 115}
99 116
100void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 117void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
118 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
119 const u32 preferred_type = GetMemoryType(memory_properties);
120 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
121
122 // Substract from the preferred heap size some bytes to avoid getting out of memory.
123 const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
124 const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
125
101 VkBufferCreateInfo buffer_ci; 126 VkBufferCreateInfo buffer_ci;
102 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 127 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
103 buffer_ci.pNext = nullptr; 128 buffer_ci.pNext = nullptr;
104 buffer_ci.flags = 0; 129 buffer_ci.flags = 0;
105 buffer_ci.size = STREAM_BUFFER_SIZE; 130 buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
106 buffer_ci.usage = usage; 131 buffer_ci.usage = usage;
107 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 132 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
108 buffer_ci.queueFamilyIndexCount = 0; 133 buffer_ci.queueFamilyIndexCount = 0;
109 buffer_ci.pQueueFamilyIndices = nullptr; 134 buffer_ci.pQueueFamilyIndices = nullptr;
110 135
111 const auto& dev = device.GetLogical(); 136 buffer = device.GetLogical().CreateBuffer(buffer_ci);
112 buffer = dev.CreateBuffer(buffer_ci); 137
113 138 const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
114 const auto& dld = device.GetDispatchLoader(); 139 const u32 required_flags = requirements.memoryTypeBits;
115 const auto requirements = dev.GetBufferMemoryRequirements(*buffer); 140 stream_buffer_size = static_cast<u64>(requirements.size);
116 // Prefer device local host visible allocations (this should hit AMD's pinned memory). 141
117 auto type =
118 FindMemoryType(device, requirements.memoryTypeBits,
119 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
120 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
121 if (!type) {
122 // Otherwise search for a host visible allocation.
123 type = FindMemoryType(device, requirements.memoryTypeBits,
124 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
125 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
126 ASSERT_MSG(type, "No host visible and coherent memory type found");
127 }
128 VkMemoryAllocateInfo memory_ai; 142 VkMemoryAllocateInfo memory_ai;
129 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 143 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
130 memory_ai.pNext = nullptr; 144 memory_ai.pNext = nullptr;
131 memory_ai.allocationSize = requirements.size; 145 memory_ai.allocationSize = requirements.size;
132 memory_ai.memoryTypeIndex = *type; 146 memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
133 147
134 memory = dev.AllocateMemory(memory_ai); 148 memory = device.GetLogical().AllocateMemory(memory_ai);
135 buffer.BindMemory(*memory, 0); 149 buffer.BindMemory(*memory, 0);
136} 150}
137 151
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 58ce8b973..dfddf7ad6 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -56,8 +56,9 @@ private:
56 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
57 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
58 58
59 vk::Buffer buffer; ///< Mapped buffer. 59 vk::Buffer buffer; ///< Mapped buffer.
60 vk::DeviceMemory memory; ///< Memory allocation. 60 vk::DeviceMemory memory; ///< Memory allocation.
61 u64 stream_buffer_size{}; ///< Stream buffer size.
61 62
62 u64 offset{}; ///< Buffer iterator. 63 u64 offset{}; ///< Buffer iterator.
63 u64 mapped_size{}; ///< Size reserved for the current copy. 64 u64 mapped_size{}; ///< Size reserved for the current copy.