summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp61
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h9
7 files changed, 214 insertions, 64 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 48fc5d966..4f1e4ec28 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
139 u32 base_vertex, u32 num_indices, VkBuffer buffer, 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
140 u32 offset, [[maybe_unused]] u32 size) { 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); 141 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
142 VkDeviceSize vk_offset = offset;
142 if (topology == PrimitiveTopology::Quads) { 143 if (topology == PrimitiveTopology::Quads) {
143 index_type = VK_INDEX_TYPE_UINT32; 144 vk_index_type = VK_INDEX_TYPE_UINT32;
144 std::tie(buffer, offset) = 145 std::tie(buffer, vk_offset) =
145 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); 146 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
146 } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { 147 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
147 index_type = VK_INDEX_TYPE_UINT16; 148 vk_index_type = VK_INDEX_TYPE_UINT16;
148 std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); 149 std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
149 } 150 }
150 scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { 151 scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
151 cmdbuf.BindIndexBuffer(buffer, offset, index_type); 152 cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type);
152 }); 153 });
153} 154}
154 155
@@ -251,10 +252,10 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
251 } 252 }
252 } 253 }
253 scheduler.RequestOutsideRenderPassOperationContext(); 254 scheduler.RequestOutsideRenderPassOperationContext();
254 scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, 255 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
255 size_bytes](vk::CommandBuffer cmdbuf) { 256 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
256 const VkBufferCopy copy{ 257 const VkBufferCopy copy{
257 .srcOffset = 0, 258 .srcOffset = src_offset,
258 .dstOffset = 0, 259 .dstOffset = 0,
259 .size = size_bytes, 260 .size = size_bytes,
260 }; 261 };
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index a4fdcdf81..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,6 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/div_ceil.h"
13#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
14#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
15#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
148 149
149Uint8Pass::~Uint8Pass() = default; 150Uint8Pass::~Uint8Pass() = default;
150 151
151std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 152std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
152 u32 src_offset) { 153 u32 src_offset) {
153 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 154 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
154 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 155 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
155 156
156 update_descriptor_queue.Acquire(); 157 update_descriptor_queue.Acquire();
157 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 158 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
158 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); 159 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
159 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 160 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
160 161
161 scheduler.RequestOutsideRenderPassOperationContext(); 162 scheduler.RequestOutsideRenderPassOperationContext();
162 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 163 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
163 num_vertices](vk::CommandBuffer cmdbuf) { 164 num_vertices](vk::CommandBuffer cmdbuf) {
164 constexpr u32 dispatch_size = 1024; 165 static constexpr u32 DISPATCH_SIZE = 1024;
166 static constexpr VkMemoryBarrier WRITE_BARRIER{
167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
168 .pNext = nullptr,
169 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
170 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
171 };
165 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 172 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
166 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 173 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
167 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); 174 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
168
169 VkBufferMemoryBarrier barrier;
170 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
171 barrier.pNext = nullptr;
172 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
173 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
174 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
175 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
176 barrier.buffer = buffer;
177 barrier.offset = 0;
178 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
179 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 175 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
180 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 176 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
181 }); 177 });
182 return {staging.buffer, 0}; 178 return {staging.buffer, staging.offset};
183} 179}
184 180
185QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 181QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
194 190
195QuadIndexedPass::~QuadIndexedPass() = default; 191QuadIndexedPass::~QuadIndexedPass() = default;
196 192
197std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( 193std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
198 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 194 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
199 VkBuffer src_buffer, u32 src_offset) { 195 VkBuffer src_buffer, u32 src_offset) {
200 const u32 index_shift = [index_format] { 196 const u32 index_shift = [index_format] {
@@ -217,34 +213,29 @@ std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
217 213
218 update_descriptor_queue.Acquire(); 214 update_descriptor_queue.Acquire();
219 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 215 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
220 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); 216 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
221 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 217 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
222 218
223 scheduler.RequestOutsideRenderPassOperationContext(); 219 scheduler.RequestOutsideRenderPassOperationContext();
224 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 220 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
225 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 221 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
226 static constexpr u32 dispatch_size = 1024; 222 static constexpr u32 DISPATCH_SIZE = 1024;
223 static constexpr VkMemoryBarrier WRITE_BARRIER{
224 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
225 .pNext = nullptr,
226 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
227 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
228 };
227 const std::array push_constants = {base_vertex, index_shift}; 229 const std::array push_constants = {base_vertex, index_shift};
228 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 230 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
229 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 231 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
230 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 232 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
231 &push_constants); 233 &push_constants);
232 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); 234 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
233
234 VkBufferMemoryBarrier barrier;
235 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
236 barrier.pNext = nullptr;
237 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
238 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
239 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
240 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
241 barrier.buffer = buffer;
242 barrier.offset = 0;
243 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
244 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 235 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
245 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 236 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
246 }); 237 });
247 return {staging.buffer, 0}; 238 return {staging.buffer, staging.offset};
248} 239}
249 240
250} // namespace Vulkan 241} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 4904019f5..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -50,7 +50,8 @@ public:
50 50
51 /// Assemble uint8 indices into an uint16 index buffer 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is 52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); 53 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
54 u32 src_offset);
54 55
55private: 56private:
56 VKScheduler& scheduler; 57 VKScheduler& scheduler;
@@ -66,9 +67,9 @@ public:
66 VKUpdateDescriptorQueue& update_descriptor_queue_); 67 VKUpdateDescriptorQueue& update_descriptor_queue_);
67 ~QuadIndexedPass(); 68 ~QuadIndexedPass();
68 69
69 std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 70 std::pair<VkBuffer, VkDeviceSize> Assemble(
70 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 71 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
71 u32 src_offset); 72 u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
72 73
73private: 74private:
74 VKScheduler& scheduler; 75 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..275d740b8 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
8 8
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/bit_util.h" 13#include "common/bit_util.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -17,14 +18,117 @@
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
21namespace {
22// Maximum potential alignment of a Vulkan buffer
23constexpr VkDeviceSize MAX_ALIGNMENT = 256;
24// Maximum size to put elements in the stream buffer
25constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
26// Stream buffer size in bytes
27constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
28constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
29
30constexpr VkMemoryPropertyFlags HOST_FLAGS =
31 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
32constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
33
34bool IsStreamHeap(VkMemoryHeap heap) noexcept {
35 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
36}
37
38std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
39 VkMemoryPropertyFlags flags) noexcept {
40 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
41 if (((type_mask >> type_index) & 1) == 0) {
42 // Memory type is incompatible
43 continue;
44 }
45 const VkMemoryType& memory_type = props.memoryTypes[type_index];
46 if ((memory_type.propertyFlags & flags) != flags) {
47 // Memory type doesn't have the flags we want
48 continue;
49 }
50 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
51 // Memory heap is not suitable for streaming
52 continue;
53 }
54 // Success!
55 return type_index;
56 }
57 return std::nullopt;
58}
59
60u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
61 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
62 std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
63 if (type) {
64 return *type;
65 }
66 // Otherwise try without the DEVICE_LOCAL_BIT
67 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
68 if (type) {
69 return *type;
70 }
71 // This should never happen, and in case it does, signal it as an out of memory situation
72 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
73}
74
75size_t Region(size_t iterator) noexcept {
76 return iterator / REGION_SIZE;
77}
78} // Anonymous namespace
20 79
21StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 80StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
22 VKScheduler& scheduler_) 81 VKScheduler& scheduler_)
23 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} 82 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
83 const vk::Device& dev = device.GetLogical();
84 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
85 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
86 .pNext = nullptr,
87 .flags = 0,
88 .size = STREAM_BUFFER_SIZE,
89 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
90 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
91 .queueFamilyIndexCount = 0,
92 .pQueueFamilyIndices = nullptr,
93 });
94 if (device.HasDebuggingToolAttached()) {
95 stream_buffer.SetObjectNameEXT("Stream Buffer");
96 }
97 VkMemoryDedicatedRequirements dedicated_reqs{
98 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
99 .pNext = nullptr,
100 .prefersDedicatedAllocation = VK_FALSE,
101 .requiresDedicatedAllocation = VK_FALSE,
102 };
103 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
104 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
105 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
106 const VkMemoryDedicatedAllocateInfo dedicated_info{
107 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
108 .pNext = nullptr,
109 .image = nullptr,
110 .buffer = *stream_buffer,
111 };
112 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
113 stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
114 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
115 .pNext = make_dedicated ? &dedicated_info : nullptr,
116 .allocationSize = requirements.size,
117 .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
118 });
119 if (device.HasDebuggingToolAttached()) {
120 stream_memory.SetObjectNameEXT("Stream Buffer Memory");
121 }
122 stream_buffer.BindMemory(*stream_memory, 0);
123 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
124}
24 125
25StagingBufferPool::~StagingBufferPool() = default; 126StagingBufferPool::~StagingBufferPool() = default;
26 127
27StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { 128StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
129 if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
130 return GetStreamBuffer(size);
131 }
28 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { 132 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
29 return *ref; 133 return *ref;
30 } 134 }
@@ -39,6 +143,42 @@ void StagingBufferPool::TickFrame() {
39 ReleaseCache(MemoryUsage::Download); 143 ReleaseCache(MemoryUsage::Download);
40} 144}
41 145
146StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
147 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
148 ++region) {
149 sync_ticks[region] = scheduler.CurrentTick();
150 }
151 used_iterator = iterator;
152
153 for (size_t region = Region(free_iterator) + 1,
154 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
155 region < region_end; ++region) {
156 scheduler.Wait(sync_ticks[region]);
157 }
158 if (iterator + size > free_iterator) {
159 free_iterator = iterator + size;
160 }
161 if (iterator + size > STREAM_BUFFER_SIZE) {
162 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
163 sync_ticks[region] = scheduler.CurrentTick();
164 }
165 used_iterator = 0;
166 iterator = 0;
167 free_iterator = size;
168
169 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
170 scheduler.Wait(sync_ticks[region]);
171 }
172 }
173 const size_t offset = iterator;
174 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
175 return StagingBufferRef{
176 .buffer = *stream_buffer,
177 .offset = static_cast<VkDeviceSize>(offset),
178 .mapped_span = std::span<u8>(stream_pointer + offset, size),
179 };
180}
181
42std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, 182std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
43 MemoryUsage usage) { 183 MemoryUsage usage) {
44 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; 184 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..4ed99c0df 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
19 19
20struct StagingBufferRef { 20struct StagingBufferRef {
21 VkBuffer buffer; 21 VkBuffer buffer;
22 VkDeviceSize offset;
22 std::span<u8> mapped_span; 23 std::span<u8> mapped_span;
23}; 24};
24 25
25class StagingBufferPool { 26class StagingBufferPool {
26public: 27public:
28 static constexpr size_t NUM_SYNCS = 16;
29
27 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, 30 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
28 VKScheduler& scheduler); 31 VKScheduler& scheduler);
29 ~StagingBufferPool(); 32 ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
33 void TickFrame(); 36 void TickFrame();
34 37
35private: 38private:
39 struct StreamBufferCommit {
40 size_t upper_bound;
41 u64 tick;
42 };
43
36 struct StagingBuffer { 44 struct StagingBuffer {
37 vk::Buffer buffer; 45 vk::Buffer buffer;
38 MemoryCommit commit; 46 MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
42 StagingBufferRef Ref() const noexcept { 50 StagingBufferRef Ref() const noexcept {
43 return { 51 return {
44 .buffer = *buffer, 52 .buffer = *buffer,
53 .offset = 0,
45 .mapped_span = mapped_span, 54 .mapped_span = mapped_span,
46 }; 55 };
47 } 56 }
@@ -56,6 +65,8 @@ private:
56 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; 65 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
57 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; 66 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
58 67
68 StagingBufferRef GetStreamBuffer(size_t size);
69
59 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); 70 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
60 71
61 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); 72 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +81,15 @@ private:
70 MemoryAllocator& memory_allocator; 81 MemoryAllocator& memory_allocator;
71 VKScheduler& scheduler; 82 VKScheduler& scheduler;
72 83
84 vk::Buffer stream_buffer;
85 vk::DeviceMemory stream_memory;
86 u8* stream_pointer = nullptr;
87
88 size_t iterator = 0;
89 size_t used_iterator = 0;
90 size_t free_iterator = 0;
91 std::array<u64, NUM_SYNCS> sync_ticks{};
92
73 StagingBuffersCache device_local_cache; 93 StagingBuffersCache device_local_cache;
74 StagingBuffersCache upload_cache; 94 StagingBuffersCache upload_cache;
75 StagingBuffersCache download_cache; 95 StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 1eeb45ca9..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -818,11 +818,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
818 } 818 }
819} 819}
820 820
821void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
822 std::span<const BufferImageCopy> copies) {
823 // TODO: Move this to another API 822 // TODO: Move this to another API
824 scheduler->RequestOutsideRenderPassOperationContext(); 823 scheduler->RequestOutsideRenderPassOperationContext();
825 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 824 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
826 const VkBuffer src_buffer = map.buffer; 825 const VkBuffer src_buffer = map.buffer;
827 const VkImage vk_image = *image; 826 const VkImage vk_image = *image;
828 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 827 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -833,11 +832,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
833 }); 832 });
834} 833}
835 834
836void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 835void Image::UploadMemory(const StagingBufferRef& map,
837 std::span<const VideoCommon::BufferCopy> copies) { 836 std::span<const VideoCommon::BufferCopy> copies) {
838 // TODO: Move this to another API 837 // TODO: Move this to another API
839 scheduler->RequestOutsideRenderPassOperationContext(); 838 scheduler->RequestOutsideRenderPassOperationContext();
840 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 839 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
841 const VkBuffer src_buffer = map.buffer; 840 const VkBuffer src_buffer = map.buffer;
842 const VkBuffer dst_buffer = *buffer; 841 const VkBuffer dst_buffer = *buffer;
843 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 842 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
@@ -846,9 +845,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
846 }); 845 });
847} 846}
848 847
849void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, 848void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
850 std::span<const BufferImageCopy> copies) { 849 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
851 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
852 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, 850 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
853 vk_copies](vk::CommandBuffer cmdbuf) { 851 vk_copies](vk::CommandBuffer cmdbuf) {
854 const VkImageMemoryBarrier read_barrier{ 852 const VkImageMemoryBarrier read_barrier{
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4558c3297..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -82,7 +82,7 @@ struct TextureCacheRuntime {
82 return false; 82 return false;
83 } 83 }
84 84
85 void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&,
86 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
87 UNREACHABLE(); 87 UNREACHABLE();
88 } 88 }
@@ -100,13 +100,12 @@ public:
100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
101 VAddr cpu_addr); 101 VAddr cpu_addr);
102 102
103 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map,
104 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
105 105
106 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
107 std::span<const VideoCommon::BufferCopy> copies);
108 107
109 void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, 108 void DownloadMemory(const StagingBufferRef& map,
110 std::span<const VideoCommon::BufferImageCopy> copies); 109 std::span<const VideoCommon::BufferImageCopy> copies);
111 110
112 [[nodiscard]] VkImage Handle() const noexcept { 111 [[nodiscard]] VkImage Handle() const noexcept {