summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp201
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h107
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h44
6 files changed, 415 insertions, 309 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 46da81aaa..1ba544943 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -2,124 +2,145 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cstring> 6#include <cstring>
6#include <memory> 7#include <memory>
7#include <optional> 8#include <optional>
8#include <tuple> 9#include <tuple>
9 10
10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "core/memory.h" 12#include "common/bit_util.h"
13#include "video_core/memory_manager.h" 13#include "core/core.h"
14#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h" 15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
16#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
17#include "video_core/renderer_vulkan/vk_stream_buffer.h" 18#include "video_core/renderer_vulkan/vk_stream_buffer.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
20 21
21CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, 22namespace {
22 std::size_t alignment, u8* host_ptr)
23 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset},
24 alignment{alignment} {}
25
26VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
27 Memory::Memory& cpu_memory_,
28 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
29 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
30 : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{
31 cpu_memory_} {
32 const auto usage = vk::BufferUsageFlagBits::eVertexBuffer |
33 vk::BufferUsageFlagBits::eIndexBuffer |
34 vk::BufferUsageFlagBits::eUniformBuffer;
35 const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead |
36 vk::AccessFlagBits::eUniformRead;
37 stream_buffer =
38 std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access,
39 vk::PipelineStageFlagBits::eAllCommands);
40 buffer_handle = stream_buffer->GetBuffer();
41}
42 23
43VKBufferCache::~VKBufferCache() = default; 24const auto BufferUsage =
25 vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
26 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer;
27
28const auto UploadPipelineStage =
29 vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput |
30 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
31 vk::PipelineStageFlagBits::eComputeShader;
44 32
45u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { 33const auto UploadAccessBarriers =
46 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; 34 vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead |
47 ASSERT_MSG(cpu_addr, "Invalid GPU address"); 35 vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead |
48 36 vk::AccessFlagBits::eIndexRead;
49 // Cache management is a big overhead, so only cache entries with a given size. 37
50 // TODO: Figure out which size is the best for given games. 38auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
51 cache &= size >= 2048; 39 return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage);
52
53 u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)};
54 if (cache) {
55 const auto entry = TryGet(host_ptr);
56 if (entry) {
57 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
58 return entry->GetOffset();
59 }
60 Unregister(entry);
61 }
62 }
63
64 AlignBuffer(alignment);
65 const u64 uploaded_offset = buffer_offset;
66
67 if (host_ptr == nullptr) {
68 return uploaded_offset;
69 }
70
71 std::memcpy(buffer_ptr, host_ptr, size);
72 buffer_ptr += size;
73 buffer_offset += size;
74
75 if (cache) {
76 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
77 alignment, host_ptr);
78 Register(entry);
79 }
80
81 return uploaded_offset;
82} 40}
83 41
84u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { 42} // Anonymous namespace
85 AlignBuffer(alignment); 43
86 std::memcpy(buffer_ptr, raw_pointer, size); 44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
87 const u64 uploaded_offset = buffer_offset; 45 CacheAddr cache_addr, std::size_t size)
46 : VideoCommon::BufferBlock{cache_addr, size} {
47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
49 vk::BufferUsageFlagBits::eTransferDst,
50 vk::SharingMode::eExclusive, 0, nullptr);
88 51
89 buffer_ptr += size; 52 const auto& dld{device.GetDispatchLoader()};
90 buffer_offset += size; 53 const auto dev{device.GetLogical()};
91 return uploaded_offset; 54 buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld);
55 buffer.commit = memory_manager.Commit(*buffer.handle, false);
92} 56}
93 57
94std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { 58CachedBufferBlock::~CachedBufferBlock() = default;
95 AlignBuffer(alignment); 59
96 u8* const uploaded_ptr = buffer_ptr; 60VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
97 const u64 uploaded_offset = buffer_offset; 61 const VKDevice& device, VKMemoryManager& memory_manager,
62 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
63 : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system,
64 CreateStreamBuffer(device,
65 scheduler)},
66 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
67 staging_pool} {}
98 68
99 buffer_ptr += size; 69VKBufferCache::~VKBufferCache() = default;
100 buffer_offset += size; 70
101 return {uploaded_ptr, uploaded_offset}; 71Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size);
102} 73}
103 74
104void VKBufferCache::Reserve(std::size_t max_size) { 75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
105 bool invalidate; 76 return buffer->GetHandle();
106 std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); 77}
107 buffer_offset = buffer_offset_base; 78
79const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
80 size = std::max(size, std::size_t(4));
81 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
82 scheduler.RequestOutsideRenderPassOperationContext();
83 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) {
84 cmdbuf.fillBuffer(buffer, 0, size, 0, dld);
85 });
86 return &*empty.handle;
87}
108 88
109 if (invalidate) { 89void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
110 InvalidateAll(); 90 const u8* data) {
111 } 91 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
92 std::memcpy(staging.commit->Map(size), data, size);
93
94 scheduler.RequestOutsideRenderPassOperationContext();
95 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
96 size](auto cmdbuf, auto& dld) {
97 cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld);
98 cmdbuf.pipelineBarrier(
99 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
100 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
101 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer,
102 offset, size)},
103 {}, dld);
104 });
112} 105}
113 106
114void VKBufferCache::Send() { 107void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
115 stream_buffer->Send(buffer_offset - buffer_offset_base); 108 u8* data) {
109 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
110 scheduler.RequestOutsideRenderPassOperationContext();
111 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
112 size](auto cmdbuf, auto& dld) {
113 cmdbuf.pipelineBarrier(
114 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader |
115 vk::PipelineStageFlagBits::eComputeShader,
116 vk::PipelineStageFlagBits::eTransfer, {}, {},
117 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite,
118 vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED,
119 VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)},
120 {}, dld);
121 cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld);
122 });
123 scheduler.Finish();
124
125 std::memcpy(data, staging.commit->Map(size), size);
116} 126}
117 127
118void VKBufferCache::AlignBuffer(std::size_t alignment) { 128void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
119 // Align the offset, not the mapped pointer 129 std::size_t dst_offset, std::size_t size) {
120 const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); 130 scheduler.RequestOutsideRenderPassOperationContext();
121 buffer_ptr += offset_aligned - buffer_offset; 131 scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
122 buffer_offset = offset_aligned; 132 dst_offset, size](auto cmdbuf, auto& dld) {
133 cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld);
134 cmdbuf.pipelineBarrier(
135 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {},
136 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead,
137 vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED,
138 VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size),
139 vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers,
140 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer,
141 dst_offset, size)},
142 {}, dld);
143 });
123} 144}
124 145
125} // namespace Vulkan 146} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index daa8ccf66..3f38eed0c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -5,105 +5,74 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <tuple> 8#include <unordered_map>
9#include <vector>
9 10
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/gpu.h" 12#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 13#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_vulkan/declarations.h" 14#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
18#include "video_core/renderer_vulkan/vk_stream_buffer.h"
15 19
16namespace Memory { 20namespace Core {
17class Memory; 21class System;
18}
19
20namespace Tegra {
21class MemoryManager;
22} 22}
23 23
24namespace Vulkan { 24namespace Vulkan {
25 25
26class VKDevice; 26class VKDevice;
27class VKFence;
28class VKMemoryManager; 27class VKMemoryManager;
29class VKStreamBuffer; 28class VKScheduler;
30 29
31class CachedBufferEntry final : public RasterizerCacheObject { 30class CachedBufferBlock final : public VideoCommon::BufferBlock {
32public: 31public:
33 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, 32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
34 u8* host_ptr); 33 CacheAddr cache_addr, std::size_t size);
34 ~CachedBufferBlock();
35 35
36 VAddr GetCpuAddr() const override { 36 const vk::Buffer* GetHandle() const {
37 return cpu_addr; 37 return &*buffer.handle;
38 }
39
40 std::size_t GetSizeInBytes() const override {
41 return size;
42 }
43
44 std::size_t GetSize() const {
45 return size;
46 }
47
48 u64 GetOffset() const {
49 return offset;
50 }
51
52 std::size_t GetAlignment() const {
53 return alignment;
54 } 38 }
55 39
56private: 40private:
57 VAddr cpu_addr{}; 41 VKBuffer buffer;
58 std::size_t size{};
59 u64 offset{};
60 std::size_t alignment{};
61}; 42};
62 43
63class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 44using Buffer = std::shared_ptr<CachedBufferBlock>;
45
46class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> {
64public: 47public:
65 explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, 48 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
66 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, 49 const VKDevice& device, VKMemoryManager& memory_manager,
67 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); 50 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
68 ~VKBufferCache(); 51 ~VKBufferCache();
69 52
70 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 53 const vk::Buffer* GetEmptyBuffer(std::size_t size) override;
71 /// allocated.
72 u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true);
73 54
74 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 55protected:
75 u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); 56 void WriteBarrier() override {}
76 57
77 /// Reserves memory to be used by host's CPU. Returns mapped address and offset. 58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
78 std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4);
79 59
80 /// Reserves a region of memory to be used in subsequent upload/reserve operations. 60 const vk::Buffer* ToHandle(const Buffer& buffer) override;
81 void Reserve(std::size_t max_size);
82 61
83 /// Ensures that the set data is sent to the device. 62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
84 void Send(); 63 const u8* data) override;
85 64
86 /// Returns the buffer cache handle. 65 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
87 vk::Buffer GetBuffer() const { 66 u8* data) override;
88 return buffer_handle;
89 }
90 67
91protected: 68 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
92 // We do not have to flush this cache as things in it are never modified by us. 69 std::size_t dst_offset, std::size_t size) override;
93 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
94 70
95private: 71private:
96 void AlignBuffer(std::size_t alignment); 72 const VKDevice& device;
97 73 VKMemoryManager& memory_manager;
98 Tegra::MemoryManager& tegra_memory_manager; 74 VKScheduler& scheduler;
99 Memory::Memory& cpu_memory; 75 VKStagingBufferPool& staging_pool;
100
101 std::unique_ptr<VKStreamBuffer> stream_buffer;
102 vk::Buffer buffer_handle;
103
104 u8* buffer_ptr = nullptr;
105 u64 buffer_offset = 0;
106 u64 buffer_offset_base = 0;
107}; 76};
108 77
109} // namespace Vulkan 78} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 0451babbf..9cc9979d0 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -6,6 +6,7 @@
6#include <optional> 6#include <optional>
7#include <tuple> 7#include <tuple>
8#include <vector> 8#include <vector>
9
9#include "common/alignment.h" 10#include "common/alignment.h"
10#include "common/assert.h" 11#include "common/assert.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -16,34 +17,32 @@
16 17
17namespace Vulkan { 18namespace Vulkan {
18 19
19// TODO(Rodrigo): Fine tune this number 20namespace {
20constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; 21
22u64 GetAllocationChunkSize(u64 required_size) {
23 static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20};
24 auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size);
25 return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20);
26}
27
28} // Anonymous namespace
21 29
22class VKMemoryAllocation final { 30class VKMemoryAllocation final {
23public: 31public:
24 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, 32 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
25 vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) 33 vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type)
26 : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, 34 : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size},
27 shifted_type{ShiftType(type)}, is_mappable{properties & 35 shifted_type{ShiftType(type)} {}
28 vk::MemoryPropertyFlagBits::eHostVisible} {
29 if (is_mappable) {
30 const auto dev = device.GetLogical();
31 const auto& dld = device.GetDispatchLoader();
32 base_address = static_cast<u8*>(dev.mapMemory(memory, 0, alloc_size, {}, dld));
33 }
34 }
35 36
36 ~VKMemoryAllocation() { 37 ~VKMemoryAllocation() {
37 const auto dev = device.GetLogical(); 38 const auto dev = device.GetLogical();
38 const auto& dld = device.GetDispatchLoader(); 39 const auto& dld = device.GetDispatchLoader();
39 if (is_mappable)
40 dev.unmapMemory(memory, dld);
41 dev.free(memory, nullptr, dld); 40 dev.free(memory, nullptr, dld);
42 } 41 }
43 42
44 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { 43 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) {
45 auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast<u64>(commit_size), 44 auto found = TryFindFreeSection(free_iterator, allocation_size,
46 static_cast<u64>(alignment)); 45 static_cast<u64>(commit_size), static_cast<u64>(alignment));
47 if (!found) { 46 if (!found) {
48 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size), 47 found = TryFindFreeSection(0, free_iterator, static_cast<u64>(commit_size),
49 static_cast<u64>(alignment)); 48 static_cast<u64>(alignment));
@@ -52,8 +51,7 @@ public:
52 return nullptr; 51 return nullptr;
53 } 52 }
54 } 53 }
55 u8* address = is_mappable ? base_address + *found : nullptr; 54 auto commit = std::make_unique<VKMemoryCommitImpl>(device, this, memory, *found,
56 auto commit = std::make_unique<VKMemoryCommitImpl>(this, memory, address, *found,
57 *found + commit_size); 55 *found + commit_size);
58 commits.push_back(commit.get()); 56 commits.push_back(commit.get());
59 57
@@ -65,12 +63,10 @@ public:
65 63
66 void Free(const VKMemoryCommitImpl* commit) { 64 void Free(const VKMemoryCommitImpl* commit) {
67 ASSERT(commit); 65 ASSERT(commit);
68 const auto it = 66
69 std::find_if(commits.begin(), commits.end(), 67 const auto it = std::find(std::begin(commits), std::end(commits), commit);
70 [&](const auto& stored_commit) { return stored_commit == commit; });
71 if (it == commits.end()) { 68 if (it == commits.end()) {
72 LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); 69 UNREACHABLE_MSG("Freeing unallocated commit!");
73 UNREACHABLE();
74 return; 70 return;
75 } 71 }
76 commits.erase(it); 72 commits.erase(it);
@@ -88,11 +84,11 @@ private:
88 } 84 }
89 85
90 /// A memory allocator, it may return a free region between "start" and "end" with the solicited 86 /// A memory allocator, it may return a free region between "start" and "end" with the solicited
91 /// requeriments. 87 /// requirements.
92 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { 88 std::optional<u64> TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const {
93 u64 iterator = start; 89 u64 iterator = Common::AlignUp(start, alignment);
94 while (iterator + size < end) { 90 while (iterator + size <= end) {
95 const u64 try_left = Common::AlignUp(iterator, alignment); 91 const u64 try_left = iterator;
96 const u64 try_right = try_left + size; 92 const u64 try_right = try_left + size;
97 93
98 bool overlap = false; 94 bool overlap = false;
@@ -100,7 +96,7 @@ private:
100 const auto [commit_left, commit_right] = commit->interval; 96 const auto [commit_left, commit_right] = commit->interval;
101 if (try_left < commit_right && commit_left < try_right) { 97 if (try_left < commit_right && commit_left < try_right) {
102 // There's an overlap, continue the search where the overlapping commit ends. 98 // There's an overlap, continue the search where the overlapping commit ends.
103 iterator = commit_right; 99 iterator = Common::AlignUp(commit_right, alignment);
104 overlap = true; 100 overlap = true;
105 break; 101 break;
106 } 102 }
@@ -110,6 +106,7 @@ private:
110 return try_left; 106 return try_left;
111 } 107 }
112 } 108 }
109
113 // No free regions where found, return an empty optional. 110 // No free regions where found, return an empty optional.
114 return std::nullopt; 111 return std::nullopt;
115 } 112 }
@@ -117,12 +114,8 @@ private:
117 const VKDevice& device; ///< Vulkan device. 114 const VKDevice& device; ///< Vulkan device.
118 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 115 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
119 const vk::MemoryPropertyFlags properties; ///< Vulkan properties. 116 const vk::MemoryPropertyFlags properties; ///< Vulkan properties.
120 const u64 alloc_size; ///< Size of this allocation. 117 const u64 allocation_size; ///< Size of this allocation.
121 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. 118 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
122 const bool is_mappable; ///< Whether the allocation is mappable.
123
124 /// Base address of the mapped pointer.
125 u8* base_address{};
126 119
127 /// Hints where the next free region is likely going to be. 120 /// Hints where the next free region is likely going to be.
128 u64 free_iterator{}; 121 u64 free_iterator{};
@@ -132,13 +125,15 @@ private:
132}; 125};
133 126
134VKMemoryManager::VKMemoryManager(const VKDevice& device) 127VKMemoryManager::VKMemoryManager(const VKDevice& device)
135 : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, 128 : device{device}, properties{device.GetPhysical().getMemoryProperties(
136 is_memory_unified{GetMemoryUnified(props)} {} 129 device.GetDispatchLoader())},
130 is_memory_unified{GetMemoryUnified(properties)} {}
137 131
138VKMemoryManager::~VKMemoryManager() = default; 132VKMemoryManager::~VKMemoryManager() = default;
139 133
140VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { 134VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements,
141 ASSERT(reqs.size < ALLOC_CHUNK_SIZE); 135 bool host_visible) {
136 const u64 chunk_size = GetAllocationChunkSize(requirements.size);
142 137
143 // When a host visible commit is asked, search for host visible and coherent, otherwise search 138 // When a host visible commit is asked, search for host visible and coherent, otherwise search
144 // for a fast device local type. 139 // for a fast device local type.
@@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
147 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent 142 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
148 : vk::MemoryPropertyFlagBits::eDeviceLocal; 143 : vk::MemoryPropertyFlagBits::eDeviceLocal;
149 144
150 const auto TryCommit = [&]() -> VKMemoryCommit { 145 if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
151 for (auto& alloc : allocs) {
152 if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits))
153 continue;
154
155 if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) {
156 return commit;
157 }
158 }
159 return {};
160 };
161
162 if (auto commit = TryCommit(); commit) {
163 return commit; 146 return commit;
164 } 147 }
165 148
166 // Commit has failed, allocate more memory. 149 // Commit has failed, allocate more memory.
167 if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { 150 if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) {
168 // TODO(Rodrigo): Try to use host memory. 151 // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory.
169 LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); 152 // Allocation has failed, panic.
170 UNREACHABLE(); 153 UNREACHABLE_MSG("Ran out of VRAM!");
154 return {};
171 } 155 }
172 156
173 // Commit again, this time it won't fail since there's a fresh allocation above. If it does, 157 // Commit again, this time it won't fail since there's a fresh allocation above. If it does,
174 // there's a bug. 158 // there's a bug.
175 auto commit = TryCommit(); 159 auto commit = TryAllocCommit(requirements, wanted_properties);
176 ASSERT(commit); 160 ASSERT(commit);
177 return commit; 161 return commit;
178} 162}
@@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool
180VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { 164VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
181 const auto dev = device.GetLogical(); 165 const auto dev = device.GetLogical();
182 const auto& dld = device.GetDispatchLoader(); 166 const auto& dld = device.GetDispatchLoader();
183 const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); 167 auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
184 auto commit = Commit(requeriments, host_visible);
185 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); 168 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
186 return commit; 169 return commit;
187} 170}
@@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) {
189VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { 172VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) {
190 const auto dev = device.GetLogical(); 173 const auto dev = device.GetLogical();
191 const auto& dld = device.GetDispatchLoader(); 174 const auto& dld = device.GetDispatchLoader();
192 const auto requeriments = dev.getImageMemoryRequirements(image, dld); 175 auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
193 auto commit = Commit(requeriments, host_visible);
194 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); 176 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
195 return commit; 177 return commit;
196} 178}
197 179
198bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, 180bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask,
199 u64 size) { 181 u64 size) {
200 const u32 type = [&]() { 182 const u32 type = [&] {
201 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { 183 for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
202 const auto flags = props.memoryTypes[type_index].propertyFlags; 184 const auto flags = properties.memoryTypes[type_index].propertyFlags;
203 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { 185 if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) {
204 // The type matches in type and in the wanted properties. 186 // The type matches in type and in the wanted properties.
205 return type_index; 187 return type_index;
206 } 188 }
207 } 189 }
208 LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); 190 UNREACHABLE_MSG("Couldn't find a compatible memory type!");
209 UNREACHABLE(); 191 return 0U;
210 return 0u;
211 }(); 192 }();
212 193
213 const auto dev = device.GetLogical(); 194 const auto dev = device.GetLogical();
@@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
216 // Try to allocate found type. 197 // Try to allocate found type.
217 const vk::MemoryAllocateInfo memory_ai(size, type); 198 const vk::MemoryAllocateInfo memory_ai(size, type);
218 vk::DeviceMemory memory; 199 vk::DeviceMemory memory;
219 if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); 200 if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld);
220 res != vk::Result::eSuccess) { 201 res != vk::Result::eSuccess) {
221 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); 202 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res));
222 return false; 203 return false;
223 } 204 }
224 allocs.push_back( 205 allocations.push_back(
225 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); 206 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type));
226 return true; 207 return true;
227} 208}
228 209
229/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { 210VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements,
230 for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { 211 vk::MemoryPropertyFlags wanted_properties) {
231 if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { 212 for (auto& allocation : allocations) {
213 if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
214 continue;
215 }
216 if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
217 return commit;
218 }
219 }
220 return {};
221}
222
223/*static*/ bool VKMemoryManager::GetMemoryUnified(
224 const vk::PhysicalDeviceMemoryProperties& properties) {
225 for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
226 if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) {
232 // Memory is considered unified when heaps are device local only. 227 // Memory is considered unified when heaps are device local only.
233 return false; 228 return false;
234 } 229 }
@@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
236 return true; 231 return true;
237} 232}
238 233
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, 234VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
240 u8* data, u64 begin, u64 end) 235 vk::DeviceMemory memory, u64 begin, u64 end)
241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} 236 : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {}
242 237
243VKMemoryCommitImpl::~VKMemoryCommitImpl() { 238VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this); 239 allocation->Free(this);
245} 240}
246 241
247u8* VKMemoryCommitImpl::GetData() const { 242MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
248 ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); 243 const auto dev = device.GetLogical();
249 return data; 244 const auto address = reinterpret_cast<u8*>(
245 dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
246 return MemoryMap{this, address};
247}
248
249void VKMemoryCommitImpl::Unmap() const {
250 const auto dev = device.GetLogical();
251 dev.unmapMemory(memory, device.GetDispatchLoader());
252}
253
254MemoryMap VKMemoryCommitImpl::Map() const {
255 return Map(interval.second - interval.first);
250} 256}
251 257
252} // namespace Vulkan 258} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 073597b35..cd00bb91b 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -12,6 +12,7 @@
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15class MemoryMap;
15class VKDevice; 16class VKDevice;
16class VKMemoryAllocation; 17class VKMemoryAllocation;
17class VKMemoryCommitImpl; 18class VKMemoryCommitImpl;
@@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
21class VKMemoryManager final { 22class VKMemoryManager final {
22public: 23public:
23 explicit VKMemoryManager(const VKDevice& device); 24 explicit VKMemoryManager(const VKDevice& device);
25 VKMemoryManager(const VKMemoryManager&) = delete;
24 ~VKMemoryManager(); 26 ~VKMemoryManager();
25 27
26 /** 28 /**
27 * Commits a memory with the specified requeriments. 29 * Commits a memory with the specified requeriments.
28 * @param reqs Requeriments returned from a Vulkan call. 30 * @param requirements Requirements returned from a Vulkan call.
29 * @param host_visible Signals the allocator that it *must* use host visible and coherent 31 * @param host_visible Signals the allocator that it *must* use host visible and coherent
30 * memory. When passing false, it will try to allocate device local memory. 32 * memory. When passing false, it will try to allocate device local memory.
31 * @returns A memory commit. 33 * @returns A memory commit.
32 */ 34 */
33 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); 35 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible);
@@ -47,25 +49,35 @@ private:
47 /// Allocates a chunk of memory. 49 /// Allocates a chunk of memory.
48 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); 50 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
49 51
52 /// Tries to allocate a memory commit.
53 VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements,
54 vk::MemoryPropertyFlags wanted_properties);
55
50 /// Returns true if the device uses an unified memory model. 56 /// Returns true if the device uses an unified memory model.
51 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); 57 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties);
52 58
53 const VKDevice& device; ///< Device handler. 59 const VKDevice& device; ///< Device handler.
54 const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. 60 const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties.
55 const bool is_memory_unified; ///< True if memory model is unified. 61 const bool is_memory_unified; ///< True if memory model is unified.
56 std::vector<std::unique_ptr<VKMemoryAllocation>> allocs; ///< Current allocations. 62 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
57}; 63};
58 64
59class VKMemoryCommitImpl final { 65class VKMemoryCommitImpl final {
60 friend VKMemoryAllocation; 66 friend VKMemoryAllocation;
67 friend MemoryMap;
61 68
62public: 69public:
63 explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, 70 explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
64 u64 begin, u64 end); 71 vk::DeviceMemory memory, u64 begin, u64 end);
65 ~VKMemoryCommitImpl(); 72 ~VKMemoryCommitImpl();
66 73
67 /// Returns the writeable memory map. The commit has to be mappable. 74 /// Maps a memory region and returns a pointer to it.
68 u8* GetData() const; 75 /// It's illegal to have more than one memory map at the same time.
76 MemoryMap Map(u64 size, u64 offset = 0) const;
77
78 /// Maps the whole commit and returns a pointer to it.
79 /// It's illegal to have more than one memory map at the same time.
80 MemoryMap Map() const;
69 81
70 /// Returns the Vulkan memory handler. 82 /// Returns the Vulkan memory handler.
71 vk::DeviceMemory GetMemory() const { 83 vk::DeviceMemory GetMemory() const {
@@ -78,10 +90,46 @@ public:
78 } 90 }
79 91
80private: 92private:
93 /// Unmaps memory.
94 void Unmap() const;
95
96 const VKDevice& device; ///< Vulkan device.
81 std::pair<u64, u64> interval{}; ///< Interval where the commit exists. 97 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
82 vk::DeviceMemory memory; ///< Vulkan device memory handler. 98 vk::DeviceMemory memory; ///< Vulkan device memory handler.
83 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. 99 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
84 u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. 100};
101
102/// Holds ownership of a memory map.
103class MemoryMap final {
104public:
105 explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address)
106 : commit{commit}, address{address} {}
107
108 ~MemoryMap() {
109 if (commit) {
110 commit->Unmap();
111 }
112 }
113
114 /// Prematurely releases the memory map.
115 void Release() {
116 commit->Unmap();
117 commit = nullptr;
118 }
119
120 /// Returns the address of the memory map.
121 u8* GetAddress() const {
122 return address;
123 }
124
125 /// Returns the address of the memory map;
126 operator u8*() const {
127 return address;
128 }
129
130private:
131 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
132 u8* address{}; ///< Address to the mapped memory.
85}; 133};
86 134
87} // namespace Vulkan 135} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 62f1427f5..d48d3b44c 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,86 +3,144 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <memory>
7#include <optional> 6#include <optional>
7#include <tuple>
8#include <vector> 8#include <vector>
9 9
10#include "common/alignment.h"
10#include "common/assert.h" 11#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h" 16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20namespace {
21
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22 24
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, 25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, 26
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) 27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ 28 vk::MemoryPropertyFlags wanted) {
27 pipeline_stage} { 29 const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader());
28 CreateBuffers(memory_manager, usage); 30 for (u32 i = 0; i < properties.memoryTypeCount; i++) {
29 ReserveWatches(WATCHES_INITIAL_RESERVE); 31 if (!(filter & (1 << i))) {
32 continue;
33 }
34 if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
35 return i;
36 }
37 }
38 return {};
39}
40
41} // Anonymous namespace
42
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
44 vk::BufferUsageFlags usage)
45 : device{device}, scheduler{scheduler} {
46 CreateBuffers(usage);
47 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
48 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
30} 49}
31 50
32VKStreamBuffer::~VKStreamBuffer() = default; 51VKStreamBuffer::~VKStreamBuffer() = default;
33 52
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { 53std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
35 ASSERT(size <= buffer_size); 54 ASSERT(size <= STREAM_BUFFER_SIZE);
36 mapped_size = size; 55 mapped_size = size;
37 56
38 if (offset + size > buffer_size) { 57 if (alignment > 0) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and 58 offset = Common::AlignUp(offset, alignment);
40 // reset the state. 59 }
41 invalidation_mark = used_watches; 60
42 used_watches = 0; 61 WaitPendingOperations(offset);
62
63 bool invalidated = false;
64 if (offset + size > STREAM_BUFFER_SIZE) {
65 // The buffer would overflow, save the amount of used watches and reset the state.
66 invalidation_mark = current_watch_cursor;
67 current_watch_cursor = 0;
43 offset = 0; 68 offset = 0;
69
70 // Swap watches and reset waiting cursors.
71 std::swap(previous_watches, current_watches);
72 wait_cursor = 0;
73 wait_bound = 0;
74
75 // Ensure that we don't wait for uncommitted fences.
76 scheduler.Flush();
77
78 invalidated = true;
44 } 79 }
45 80
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; 81 const auto dev = device.GetLogical();
82 const auto& dld = device.GetDispatchLoader();
83 const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
84 return {pointer, offset, invalidated};
47} 85}
48 86
49void VKStreamBuffer::Send(u64 size) { 87void VKStreamBuffer::Unmap(u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 88 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51 89
52 if (invalidation_mark) { 90 const auto dev = device.GetLogical();
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. 91 dev.unmapMemory(*memory, device.GetDispatchLoader());
54 scheduler.Flush(); 92
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark, 93 offset += size;
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59 94
60 if (used_watches + 1 >= watches.size()) { 95 if (current_watch_cursor + 1 >= current_watches.size()) {
61 // Ensure that there are enough watches. 96 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK); 97 ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
63 } 98 }
64 // Add a watch for this allocation. 99 auto& watch = current_watches[current_watch_cursor++];
65 watches[used_watches++]->Watch(scheduler.GetFence()); 100 watch.upper_bound = offset;
66 101 watch.fence.Watch(scheduler.GetFence());
67 offset += size;
68} 102}
69 103
70void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { 104void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) {
71 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, 105 const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive,
72 nullptr); 106 0, nullptr);
73
74 const auto dev = device.GetLogical(); 107 const auto dev = device.GetLogical();
75 const auto& dld = device.GetDispatchLoader(); 108 const auto& dld = device.GetDispatchLoader();
76 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); 109 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
77 commit = memory_manager.Commit(*buffer, true); 110
78 mapped_pointer = commit->GetData(); 111 const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld);
112 // Prefer device local host visible allocations (this should hit AMD's pinned memory).
113 auto type = FindMemoryType(device, requirements.memoryTypeBits,
114 vk::MemoryPropertyFlagBits::eHostVisible |
115 vk::MemoryPropertyFlagBits::eHostCoherent |
116 vk::MemoryPropertyFlagBits::eDeviceLocal);
117 if (!type) {
118 // Otherwise search for a host visible allocation.
119 type = FindMemoryType(device, requirements.memoryTypeBits,
120 vk::MemoryPropertyFlagBits::eHostVisible |
121 vk::MemoryPropertyFlagBits::eHostCoherent);
122 ASSERT_MSG(type, "No host visible and coherent memory type found");
123 }
124 const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type);
125 memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld);
126
127 dev.bindBufferMemory(*buffer, *memory, 0, dld);
79} 128}
80 129
81void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { 130void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
82 const std::size_t previous_size = watches.size(); 131 watches.resize(watches.size() + grow_size);
83 watches.resize(previous_size + grow_size); 132}
84 std::generate(watches.begin() + previous_size, watches.end(), 133
85 []() { return std::make_unique<VKFenceWatch>(); }); 134void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
135 if (!invalidation_mark) {
136 return;
137 }
138 while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
139 auto& watch = previous_watches[wait_cursor];
140 wait_bound = watch.upper_bound;
141 watch.fence.Wait();
142 ++wait_cursor;
143 }
86} 144}
87 145
88} // namespace Vulkan 146} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 842e54162..187c0c612 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -4,28 +4,24 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8#include <optional> 7#include <optional>
9#include <tuple> 8#include <tuple>
10#include <vector> 9#include <vector>
11 10
12#include "common/common_types.h" 11#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15 13
16namespace Vulkan { 14namespace Vulkan {
17 15
18class VKDevice; 16class VKDevice;
19class VKFence; 17class VKFence;
20class VKFenceWatch; 18class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler; 19class VKScheduler;
23 20
24class VKStreamBuffer { 21class VKStreamBuffer final {
25public: 22public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, 23 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, 24 vk::BufferUsageFlags usage);
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer(); 25 ~VKStreamBuffer();
30 26
31 /** 27 /**
@@ -34,39 +30,47 @@ public:
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer 30 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated. 31 * offset and a boolean that's true when buffer has been invalidated.
36 */ 32 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size); 33 std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
38 34
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 void Send(u64 size); 36 void Unmap(u64 size);
41 37
42 vk::Buffer GetBuffer() const { 38 vk::Buffer GetHandle() const {
43 return *buffer; 39 return *buffer;
44 } 40 }
45 41
46private: 42private:
43 struct Watch final {
44 VKFenceWatch fence;
45 u64 upper_bound{};
46 };
47
47 /// Creates Vulkan buffer handles committing the required the required memory. 48 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); 49 void CreateBuffers(vk::BufferUsageFlags usage);
49 50
50 /// Increases the amount of watches available. 51 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size); 52 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
53
54 void WaitPendingOperations(u64 requested_upper_bound);
52 55
53 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer. 58 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. 59 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58 60
59 UniqueBuffer buffer; ///< Mapped buffer. 61 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit. 62 UniqueDeviceMemory memory; ///< Memory allocation.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62 63
63 u64 offset{}; ///< Buffer iterator. 64 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy. 65 u64 mapped_size{}; ///< Size reserved for the current copy.
65 66
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches 67 std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation. 68 std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t> 69 std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
69 invalidation_mark{}; ///< Number of watches used in the current invalidation. 70
71 std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
72 std::size_t wait_cursor{}; ///< Last watch being waited for completion.
73 u64 wait_bound{}; ///< Highest offset being watched for completion.
70}; 74};
71 75
72} // namespace Vulkan 76} // namespace Vulkan