summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-16 16:20:18 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:24 -0300
commit35df1d1864ba721ea7b1cebf9a106dd771cde4f5 (patch)
tree034a8281294246e2a8eea92d1937607ad00ed428 /src
parentvulkan_device: Enable robustBufferAccess (diff)
downloadyuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.gz
yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.tar.xz
yuzu-35df1d1864ba721ea7b1cebf9a106dd771cde4f5.zip
vk_staging_buffer_pool: Add stream buffer for small uploads
This uses a ring buffer similar to OpenGL's stream buffer for small uploads. This stops us from allocating several small buffers, reducing memory fragmentation and cache locality. It uses dedicated allocations when possible.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h11
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp18
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp61
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h9
-rw-r--r--src/video_core/texture_cache/texture_cache.h38
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp20
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h5
15 files changed, 298 insertions, 127 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index e4f3c8e35..d6399bf24 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -239,8 +239,7 @@ private:
239 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, 239 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
240 std::span<const BufferCopy> copies); 240 std::span<const BufferCopy> copies);
241 241
242 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 242 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
243 std::span<const BufferCopy> copies);
244 243
245 void DeleteBuffer(BufferId buffer_id); 244 void DeleteBuffer(BufferId buffer_id);
246 245
@@ -362,11 +361,17 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
362 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 361 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
363 const u8* const mapped_memory = download_staging.mapped_span.data(); 362 const u8* const mapped_memory = download_staging.mapped_span.data();
364 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); 363 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
364 for (BufferCopy& copy : copies) {
365 // Modify copies to have the staging offset in mind
366 copy.dst_offset += download_staging.offset;
367 }
365 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); 368 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
366 runtime.Finish(); 369 runtime.Finish();
367 for (const BufferCopy& copy : copies) { 370 for (const BufferCopy& copy : copies) {
368 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; 371 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
369 const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; 372 // Undo the modified offset
373 const u64 dst_offset = copy.dst_offset - download_staging.offset;
374 const u8* copy_mapped_memory = mapped_memory + dst_offset;
370 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); 375 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
371 } 376 }
372 } else { 377 } else {
@@ -554,7 +559,9 @@ void BufferCache<P>::PopAsyncFlushes() {
554 } 559 }
555 if constexpr (USE_MEMORY_MAPS) { 560 if constexpr (USE_MEMORY_MAPS) {
556 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 561 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
557 for (const auto [copy, buffer_id] : downloads) { 562 for (auto& [copy, buffer_id] : downloads) {
563 // Have in mind the staging buffer offset for the copy
564 copy.dst_offset += download_staging.offset;
558 const std::array copies{copy}; 565 const std::array copies{copy};
559 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); 566 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies);
560 } 567 }
@@ -562,7 +569,9 @@ void BufferCache<P>::PopAsyncFlushes() {
562 for (const auto [copy, buffer_id] : downloads) { 569 for (const auto [copy, buffer_id] : downloads) {
563 const Buffer& buffer = slot_buffers[buffer_id]; 570 const Buffer& buffer = slot_buffers[buffer_id];
564 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 571 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
565 const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; 572 // Undo the modified offset
573 const u64 dst_offset = copy.dst_offset - download_staging.offset;
574 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
566 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 575 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
567 } 576 }
568 } else { 577 } else {
@@ -1117,13 +1126,16 @@ void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
1117 1126
1118template <class P> 1127template <class P>
1119void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 1128void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1120 std::span<const BufferCopy> copies) { 1129 std::span<BufferCopy> copies) {
1121 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); 1130 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1122 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1131 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1123 for (const BufferCopy& copy : copies) { 1132 for (BufferCopy& copy : copies) {
1124 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1125 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1133 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1134 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1126 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1135 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1136
1137 // Apply the staging offset
1138 copy.src_offset += upload_staging.offset;
1127 } 1139 }
1128 runtime.CopyBuffer(buffer, upload_staging.buffer, copies); 1140 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1129} 1141}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 37572ab28..31eb54123 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -550,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
550} 550}
551 551
552void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, 552void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
553 size_t buffer_offset,
554 std::span<const SwizzleParameters> swizzles) { 553 std::span<const SwizzleParameters> swizzles) {
555 switch (image.info.type) { 554 switch (image.info.type) {
556 case ImageType::e2D: 555 case ImageType::e2D:
557 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); 556 return util_shaders.BlockLinearUpload2D(image, map, swizzles);
558 case ImageType::e3D: 557 case ImageType::e3D:
559 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); 558 return util_shaders.BlockLinearUpload3D(image, map, swizzles);
560 case ImageType::Linear: 559 case ImageType::Linear:
561 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); 560 return util_shaders.PitchUpload(image, map, swizzles);
562 default: 561 default:
563 UNREACHABLE(); 562 UNREACHABLE();
564 break; 563 break;
@@ -710,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
710 } 709 }
711} 710}
712 711
713void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 712void Image::UploadMemory(const ImageBufferMap& map,
714 std::span<const VideoCommon::BufferImageCopy> copies) { 713 std::span<const VideoCommon::BufferImageCopy> copies) {
715 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); 714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
716 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); 715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes);
717 716
718 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); 717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
719 718
@@ -729,19 +728,19 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
729 current_image_height = copy.buffer_image_height; 728 current_image_height = copy.buffer_image_height;
730 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); 729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
731 } 730 }
732 CopyBufferToImage(copy, buffer_offset); 731 CopyBufferToImage(copy, map.offset);
733 } 732 }
734} 733}
735 734
736void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 735void Image::UploadMemory(const ImageBufferMap& map,
737 std::span<const VideoCommon::BufferCopy> copies) { 736 std::span<const VideoCommon::BufferCopy> copies) {
738 for (const VideoCommon::BufferCopy& copy : copies) { 737 for (const VideoCommon::BufferCopy& copy : copies) {
739 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset, 738 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
740 copy.dst_offset, copy.size); 739 copy.dst_offset, copy.size);
741 } 740 }
742} 741}
743 742
744void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 743void Image::DownloadMemory(ImageBufferMap& map,
745 std::span<const VideoCommon::BufferImageCopy> copies) { 744 std::span<const VideoCommon::BufferImageCopy> copies) {
746 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
747 746
@@ -760,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
760 current_image_height = copy.buffer_image_height; 759 current_image_height = copy.buffer_image_height;
761 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); 760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
762 } 761 }
763 CopyImageToBuffer(copy, buffer_offset); 762 CopyImageToBuffer(copy, map.offset);
764 } 763 }
765} 764}
766 765
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 60d08d6d6..874cf54f4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -35,6 +35,7 @@ struct ImageBufferMap {
35 ~ImageBufferMap(); 35 ~ImageBufferMap();
36 36
37 std::span<u8> mapped_span; 37 std::span<u8> mapped_span;
38 size_t offset = 0;
38 OGLSync* sync; 39 OGLSync* sync;
39 GLuint buffer; 40 GLuint buffer;
40}; 41};
@@ -78,7 +79,7 @@ public:
78 Tegra::Engines::Fermi2D::Filter filter, 79 Tegra::Engines::Fermi2D::Filter filter,
79 Tegra::Engines::Fermi2D::Operation operation); 80 Tegra::Engines::Fermi2D::Operation operation);
80 81
81 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 82 void AccelerateImageUpload(Image& image, const ImageBufferMap& map,
82 std::span<const VideoCommon::SwizzleParameters> swizzles); 83 std::span<const VideoCommon::SwizzleParameters> swizzles);
83 84
84 void InsertUploadMemoryBarrier(); 85 void InsertUploadMemoryBarrier();
@@ -137,14 +138,12 @@ public:
137 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 138 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
138 VAddr cpu_addr); 139 VAddr cpu_addr);
139 140
140 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 141 void UploadMemory(const ImageBufferMap& map,
141 std::span<const VideoCommon::BufferImageCopy> copies); 142 std::span<const VideoCommon::BufferImageCopy> copies);
142 143
143 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, 144 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
144 std::span<const VideoCommon::BufferCopy> copies);
145 145
146 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, 146 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
147 std::span<const VideoCommon::BufferImageCopy> copies);
148 147
149 GLuint Handle() const noexcept { 148 GLuint Handle() const noexcept {
150 return texture.handle; 149 return texture.handle;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index aeb36551c..1b58e8617 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
63 63
64UtilShaders::~UtilShaders() = default; 64UtilShaders::~UtilShaders() = default;
65 65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
67 std::span<const SwizzleParameters> swizzles) { 67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; 69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72 72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); 74 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76 76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) { 78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles; 79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 80 const size_t input_offset = swizzle.buffer_offset + map.offset;
81 81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s
100 program_manager.RestoreGuestCompute(); 100 program_manager.RestoreGuestCompute();
101} 101}
102 102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
104 std::span<const SwizzleParameters> swizzles) { 104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; 105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106 106
@@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110 110
111 glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); 111 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114 114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) { 116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles; 117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 118 const size_t input_offset = swizzle.buffer_offset + map.offset;
119 119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
@@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s
141 program_manager.RestoreGuestCompute(); 141 program_manager.RestoreGuestCompute();
142} 142}
143 143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
145 std::span<const SwizzleParameters> swizzles) { 145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; 146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
@@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
159 "Non-power of two images are not implemented"); 159 "Non-power of two images are not implemented");
160 160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); 162 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0); 163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0); 164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); 165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
@@ -167,7 +167,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); 167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
168 for (const SwizzleParameters& swizzle : swizzles) { 168 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles; 169 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset; 170 const size_t input_offset = swizzle.buffer_offset + map.offset;
171 171
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); 172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); 173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index bec026bc3..7b1d16b09 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -24,13 +24,13 @@ public:
24 explicit UtilShaders(ProgramManager& program_manager); 24 explicit UtilShaders(ProgramManager& program_manager);
25 ~UtilShaders(); 25 ~UtilShaders();
26 26
27 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 27 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
28 std::span<const VideoCommon::SwizzleParameters> swizzles); 28 std::span<const VideoCommon::SwizzleParameters> swizzles);
29 29
30 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, 30 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
31 std::span<const VideoCommon::SwizzleParameters> swizzles); 31 std::span<const VideoCommon::SwizzleParameters> swizzles);
32 32
33 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, 33 void PitchUpload(Image& image, const ImageBufferMap& map,
34 std::span<const VideoCommon::SwizzleParameters> swizzles); 34 std::span<const VideoCommon::SwizzleParameters> swizzles);
35 35
36 void CopyBC4(Image& dst_image, Image& src_image, 36 void CopyBC4(Image& dst_image, Image& src_image,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 48fc5d966..4f1e4ec28 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, 138void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format,
139 u32 base_vertex, u32 num_indices, VkBuffer buffer, 139 u32 base_vertex, u32 num_indices, VkBuffer buffer,
140 u32 offset, [[maybe_unused]] u32 size) { 140 u32 offset, [[maybe_unused]] u32 size) {
141 VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); 141 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
142 VkDeviceSize vk_offset = offset;
142 if (topology == PrimitiveTopology::Quads) { 143 if (topology == PrimitiveTopology::Quads) {
143 index_type = VK_INDEX_TYPE_UINT32; 144 vk_index_type = VK_INDEX_TYPE_UINT32;
144 std::tie(buffer, offset) = 145 std::tie(buffer, vk_offset) =
145 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); 146 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset);
146 } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { 147 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
147 index_type = VK_INDEX_TYPE_UINT16; 148 vk_index_type = VK_INDEX_TYPE_UINT16;
148 std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); 149 std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
149 } 150 }
150 scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { 151 scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
151 cmdbuf.BindIndexBuffer(buffer, offset, index_type); 152 cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type);
152 }); 153 });
153} 154}
154 155
@@ -251,10 +252,10 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle
251 } 252 }
252 } 253 }
253 scheduler.RequestOutsideRenderPassOperationContext(); 254 scheduler.RequestOutsideRenderPassOperationContext();
254 scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, 255 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
255 size_bytes](vk::CommandBuffer cmdbuf) { 256 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
256 const VkBufferCopy copy{ 257 const VkBufferCopy copy{
257 .srcOffset = 0, 258 .srcOffset = src_offset,
258 .dstOffset = 0, 259 .dstOffset = 0,
259 .size = size_bytes, 260 .size = size_bytes,
260 }; 261 };
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index a4fdcdf81..2f9a7b028 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,6 +10,7 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/div_ceil.h"
13#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
14#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
15#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
148 149
149Uint8Pass::~Uint8Pass() = default; 150Uint8Pass::~Uint8Pass() = default;
150 151
151std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, 152std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
152 u32 src_offset) { 153 u32 src_offset) {
153 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 154 const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
154 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 155 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
155 156
156 update_descriptor_queue.Acquire(); 157 update_descriptor_queue.Acquire();
157 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 158 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
158 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); 159 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
159 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 160 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
160 161
161 scheduler.RequestOutsideRenderPassOperationContext(); 162 scheduler.RequestOutsideRenderPassOperationContext();
162 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 163 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
163 num_vertices](vk::CommandBuffer cmdbuf) { 164 num_vertices](vk::CommandBuffer cmdbuf) {
164 constexpr u32 dispatch_size = 1024; 165 static constexpr u32 DISPATCH_SIZE = 1024;
166 static constexpr VkMemoryBarrier WRITE_BARRIER{
167 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
168 .pNext = nullptr,
169 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
170 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
171 };
165 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 172 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
166 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 173 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
167 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); 174 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
168
169 VkBufferMemoryBarrier barrier;
170 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
171 barrier.pNext = nullptr;
172 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
173 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
174 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
175 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
176 barrier.buffer = buffer;
177 barrier.offset = 0;
178 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
179 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 175 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
180 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 176 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
181 }); 177 });
182 return {staging.buffer, 0}; 178 return {staging.buffer, staging.offset};
183} 179}
184 180
185QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 181QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
@@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
194 190
195QuadIndexedPass::~QuadIndexedPass() = default; 191QuadIndexedPass::~QuadIndexedPass() = default;
196 192
197std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( 193std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
198 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 194 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
199 VkBuffer src_buffer, u32 src_offset) { 195 VkBuffer src_buffer, u32 src_offset) {
200 const u32 index_shift = [index_format] { 196 const u32 index_shift = [index_format] {
@@ -217,34 +213,29 @@ std::pair<VkBuffer, u32> QuadIndexedPass::Assemble(
217 213
218 update_descriptor_queue.Acquire(); 214 update_descriptor_queue.Acquire();
219 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 215 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
220 update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); 216 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
221 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 217 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
222 218
223 scheduler.RequestOutsideRenderPassOperationContext(); 219 scheduler.RequestOutsideRenderPassOperationContext();
224 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 220 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set,
225 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 221 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
226 static constexpr u32 dispatch_size = 1024; 222 static constexpr u32 DISPATCH_SIZE = 1024;
223 static constexpr VkMemoryBarrier WRITE_BARRIER{
224 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
225 .pNext = nullptr,
226 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
227 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
228 };
227 const std::array push_constants = {base_vertex, index_shift}; 229 const std::array push_constants = {base_vertex, index_shift};
228 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 230 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
229 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 231 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
230 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 232 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
231 &push_constants); 233 &push_constants);
232 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); 234 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
233
234 VkBufferMemoryBarrier barrier;
235 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
236 barrier.pNext = nullptr;
237 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
238 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
239 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
240 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
241 barrier.buffer = buffer;
242 barrier.offset = 0;
243 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
244 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 235 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
245 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); 236 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
246 }); 237 });
247 return {staging.buffer, 0}; 238 return {staging.buffer, staging.offset};
248} 239}
249 240
250} // namespace Vulkan 241} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 4904019f5..17d781d99 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -50,7 +50,8 @@ public:
50 50
51 /// Assemble uint8 indices into an uint16 index buffer 51 /// Assemble uint8 indices into an uint16 index buffer
52 /// Returns a pair with the staging buffer, and the offset where the assembled data is 52 /// Returns a pair with the staging buffer, and the offset where the assembled data is
53 std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); 53 std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer,
54 u32 src_offset);
54 55
55private: 56private:
56 VKScheduler& scheduler; 57 VKScheduler& scheduler;
@@ -66,9 +67,9 @@ public:
66 VKUpdateDescriptorQueue& update_descriptor_queue_); 67 VKUpdateDescriptorQueue& update_descriptor_queue_);
67 ~QuadIndexedPass(); 68 ~QuadIndexedPass();
68 69
69 std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, 70 std::pair<VkBuffer, VkDeviceSize> Assemble(
70 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, 71 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
71 u32 src_offset); 72 u32 base_vertex, VkBuffer src_buffer, u32 src_offset);
72 73
73private: 74private:
74 VKScheduler& scheduler; 75 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 97fd41cc1..275d740b8 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -8,6 +8,7 @@
8 8
9#include <fmt/format.h> 9#include <fmt/format.h>
10 10
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/bit_util.h" 13#include "common/bit_util.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
@@ -17,14 +18,117 @@
17#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
21namespace {
22// Maximum potential alignment of a Vulkan buffer
23constexpr VkDeviceSize MAX_ALIGNMENT = 256;
24// Maximum size to put elements in the stream buffer
25constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024;
26// Stream buffer size in bytes
27constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
28constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
29
30constexpr VkMemoryPropertyFlags HOST_FLAGS =
31 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
32constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
33
34bool IsStreamHeap(VkMemoryHeap heap) noexcept {
35 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
36}
37
38std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
39 VkMemoryPropertyFlags flags) noexcept {
40 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
41 if (((type_mask >> type_index) & 1) == 0) {
42 // Memory type is incompatible
43 continue;
44 }
45 const VkMemoryType& memory_type = props.memoryTypes[type_index];
46 if ((memory_type.propertyFlags & flags) != flags) {
47 // Memory type doesn't have the flags we want
48 continue;
49 }
50 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
51 // Memory heap is not suitable for streaming
52 continue;
53 }
54 // Success!
55 return type_index;
56 }
57 return std::nullopt;
58}
59
60u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) {
61 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
62 std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
63 if (type) {
64 return *type;
65 }
66 // Otherwise try without the DEVICE_LOCAL_BIT
67 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
68 if (type) {
69 return *type;
70 }
71 // This should never happen, and in case it does, signal it as an out of memory situation
72 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
73}
74
75size_t Region(size_t iterator) noexcept {
76 return iterator / REGION_SIZE;
77}
78} // Anonymous namespace
20 79
21StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 80StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
22 VKScheduler& scheduler_) 81 VKScheduler& scheduler_)
23 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} 82 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
83 const vk::Device& dev = device.GetLogical();
84 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
85 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
86 .pNext = nullptr,
87 .flags = 0,
88 .size = STREAM_BUFFER_SIZE,
89 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
90 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
91 .queueFamilyIndexCount = 0,
92 .pQueueFamilyIndices = nullptr,
93 });
94 if (device.HasDebuggingToolAttached()) {
95 stream_buffer.SetObjectNameEXT("Stream Buffer");
96 }
97 VkMemoryDedicatedRequirements dedicated_reqs{
98 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
99 .pNext = nullptr,
100 .prefersDedicatedAllocation = VK_FALSE,
101 .requiresDedicatedAllocation = VK_FALSE,
102 };
103 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
104 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
105 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
106 const VkMemoryDedicatedAllocateInfo dedicated_info{
107 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
108 .pNext = nullptr,
109 .image = nullptr,
110 .buffer = *stream_buffer,
111 };
112 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
113 stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{
114 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
115 .pNext = make_dedicated ? &dedicated_info : nullptr,
116 .allocationSize = requirements.size,
117 .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits),
118 });
119 if (device.HasDebuggingToolAttached()) {
120 stream_memory.SetObjectNameEXT("Stream Buffer Memory");
121 }
122 stream_buffer.BindMemory(*stream_memory, 0);
123 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
124}
24 125
25StagingBufferPool::~StagingBufferPool() = default; 126StagingBufferPool::~StagingBufferPool() = default;
26 127
27StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { 128StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
129 if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
130 return GetStreamBuffer(size);
131 }
28 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { 132 if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
29 return *ref; 133 return *ref;
30 } 134 }
@@ -39,6 +143,42 @@ void StagingBufferPool::TickFrame() {
39 ReleaseCache(MemoryUsage::Download); 143 ReleaseCache(MemoryUsage::Download);
40} 144}
41 145
146StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
147 for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
148 ++region) {
149 sync_ticks[region] = scheduler.CurrentTick();
150 }
151 used_iterator = iterator;
152
153 for (size_t region = Region(free_iterator) + 1,
154 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
155 region < region_end; ++region) {
156 scheduler.Wait(sync_ticks[region]);
157 }
158 if (iterator + size > free_iterator) {
159 free_iterator = iterator + size;
160 }
161 if (iterator + size > STREAM_BUFFER_SIZE) {
162 for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
163 sync_ticks[region] = scheduler.CurrentTick();
164 }
165 used_iterator = 0;
166 iterator = 0;
167 free_iterator = size;
168
169 for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
170 scheduler.Wait(sync_ticks[region]);
171 }
172 }
173 const size_t offset = iterator;
174 iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
175 return StagingBufferRef{
176 .buffer = *stream_buffer,
177 .offset = static_cast<VkDeviceSize>(offset),
178 .mapped_span = std::span<u8>(stream_pointer + offset, size),
179 };
180}
181
42std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, 182std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
43 MemoryUsage usage) { 183 MemoryUsage usage) {
44 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; 184 StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index d42918a47..4ed99c0df 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -19,11 +19,14 @@ class VKScheduler;
19 19
20struct StagingBufferRef { 20struct StagingBufferRef {
21 VkBuffer buffer; 21 VkBuffer buffer;
22 VkDeviceSize offset;
22 std::span<u8> mapped_span; 23 std::span<u8> mapped_span;
23}; 24};
24 25
25class StagingBufferPool { 26class StagingBufferPool {
26public: 27public:
28 static constexpr size_t NUM_SYNCS = 16;
29
27 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, 30 explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
28 VKScheduler& scheduler); 31 VKScheduler& scheduler);
29 ~StagingBufferPool(); 32 ~StagingBufferPool();
@@ -33,6 +36,11 @@ public:
33 void TickFrame(); 36 void TickFrame();
34 37
35private: 38private:
39 struct StreamBufferCommit {
40 size_t upper_bound;
41 u64 tick;
42 };
43
36 struct StagingBuffer { 44 struct StagingBuffer {
37 vk::Buffer buffer; 45 vk::Buffer buffer;
38 MemoryCommit commit; 46 MemoryCommit commit;
@@ -42,6 +50,7 @@ private:
42 StagingBufferRef Ref() const noexcept { 50 StagingBufferRef Ref() const noexcept {
43 return { 51 return {
44 .buffer = *buffer, 52 .buffer = *buffer,
53 .offset = 0,
45 .mapped_span = mapped_span, 54 .mapped_span = mapped_span,
46 }; 55 };
47 } 56 }
@@ -56,6 +65,8 @@ private:
56 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; 65 static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
57 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; 66 using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
58 67
68 StagingBufferRef GetStreamBuffer(size_t size);
69
59 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); 70 std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
60 71
61 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); 72 StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
@@ -70,6 +81,15 @@ private:
70 MemoryAllocator& memory_allocator; 81 MemoryAllocator& memory_allocator;
71 VKScheduler& scheduler; 82 VKScheduler& scheduler;
72 83
84 vk::Buffer stream_buffer;
85 vk::DeviceMemory stream_memory;
86 u8* stream_pointer = nullptr;
87
88 size_t iterator = 0;
89 size_t used_iterator = 0;
90 size_t free_iterator = 0;
91 std::array<u64, NUM_SYNCS> sync_ticks{};
92
73 StagingBuffersCache device_local_cache; 93 StagingBuffersCache device_local_cache;
74 StagingBuffersCache upload_cache; 94 StagingBuffersCache upload_cache;
75 StagingBuffersCache download_cache; 95 StagingBuffersCache download_cache;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 1eeb45ca9..22a1014a9 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -818,11 +818,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
818 } 818 }
819} 819}
820 820
821void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
822 std::span<const BufferImageCopy> copies) {
823 // TODO: Move this to another API 822 // TODO: Move this to another API
824 scheduler->RequestOutsideRenderPassOperationContext(); 823 scheduler->RequestOutsideRenderPassOperationContext();
825 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); 824 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
826 const VkBuffer src_buffer = map.buffer; 825 const VkBuffer src_buffer = map.buffer;
827 const VkImage vk_image = *image; 826 const VkImage vk_image = *image;
828 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 827 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -833,11 +832,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
833 }); 832 });
834} 833}
835 834
836void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 835void Image::UploadMemory(const StagingBufferRef& map,
837 std::span<const VideoCommon::BufferCopy> copies) { 836 std::span<const VideoCommon::BufferCopy> copies) {
838 // TODO: Move this to another API 837 // TODO: Move this to another API
839 scheduler->RequestOutsideRenderPassOperationContext(); 838 scheduler->RequestOutsideRenderPassOperationContext();
840 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); 839 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
841 const VkBuffer src_buffer = map.buffer; 840 const VkBuffer src_buffer = map.buffer;
842 const VkBuffer dst_buffer = *buffer; 841 const VkBuffer dst_buffer = *buffer;
843 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { 842 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
@@ -846,9 +845,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset,
846 }); 845 });
847} 846}
848 847
849void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, 848void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
850 std::span<const BufferImageCopy> copies) { 849 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
851 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
852 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, 850 scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask,
853 vk_copies](vk::CommandBuffer cmdbuf) { 851 vk_copies](vk::CommandBuffer cmdbuf) {
854 const VkImageMemoryBarrier read_barrier{ 852 const VkImageMemoryBarrier read_barrier{
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4558c3297..b08c23459 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -82,7 +82,7 @@ struct TextureCacheRuntime {
82 return false; 82 return false;
83 } 83 }
84 84
85 void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, 85 void AccelerateImageUpload(Image&, const StagingBufferRef&,
86 std::span<const VideoCommon::SwizzleParameters>) { 86 std::span<const VideoCommon::SwizzleParameters>) {
87 UNREACHABLE(); 87 UNREACHABLE();
88 } 88 }
@@ -100,13 +100,12 @@ public:
100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 100 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
101 VAddr cpu_addr); 101 VAddr cpu_addr);
102 102
103 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 103 void UploadMemory(const StagingBufferRef& map,
104 std::span<const VideoCommon::BufferImageCopy> copies); 104 std::span<const VideoCommon::BufferImageCopy> copies);
105 105
106 void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, 106 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
107 std::span<const VideoCommon::BufferCopy> copies);
108 107
109 void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, 108 void DownloadMemory(const StagingBufferRef& map,
110 std::span<const VideoCommon::BufferImageCopy> copies); 109 std::span<const VideoCommon::BufferImageCopy> copies);
111 110
112 [[nodiscard]] VkImage Handle() const noexcept { 111 [[nodiscard]] VkImage Handle() const noexcept {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f336b705f..b1da69971 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -212,7 +212,7 @@ private:
212 212
213 /// Upload data from guest to an image 213 /// Upload data from guest to an image
214 template <typename StagingBuffer> 214 template <typename StagingBuffer>
215 void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); 215 void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
216 216
217 /// Find or create an image view from a guest descriptor 217 /// Find or create an image view from a guest descriptor
218 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); 218 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -592,7 +592,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
592 Image& image = slot_images[image_id]; 592 Image& image = slot_images[image_id];
593 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); 593 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
594 const auto copies = FullDownloadCopies(image.info); 594 const auto copies = FullDownloadCopies(image.info);
595 image.DownloadMemory(map, 0, copies); 595 image.DownloadMemory(map, copies);
596 runtime.Finish(); 596 runtime.Finish();
597 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 597 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
598 } 598 }
@@ -750,24 +750,24 @@ void TextureCache<P>::PopAsyncFlushes() {
750 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 750 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
751 } 751 }
752 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); 752 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
753 size_t buffer_offset = 0; 753 const size_t original_offset = download_map.offset;
754 for (const ImageId image_id : download_ids) { 754 for (const ImageId image_id : download_ids) {
755 Image& image = slot_images[image_id]; 755 Image& image = slot_images[image_id];
756 const auto copies = FullDownloadCopies(image.info); 756 const auto copies = FullDownloadCopies(image.info);
757 image.DownloadMemory(download_map, buffer_offset, copies); 757 image.DownloadMemory(download_map, copies);
758 buffer_offset += image.unswizzled_size_bytes; 758 download_map.offset += image.unswizzled_size_bytes;
759 } 759 }
760 // Wait for downloads to finish 760 // Wait for downloads to finish
761 runtime.Finish(); 761 runtime.Finish();
762 762
763 buffer_offset = 0; 763 download_map.offset = original_offset;
764 const std::span<u8> download_span = download_map.mapped_span; 764 std::span<u8> download_span = download_map.mapped_span;
765 for (const ImageId image_id : download_ids) { 765 for (const ImageId image_id : download_ids) {
766 const ImageBase& image = slot_images[image_id]; 766 const ImageBase& image = slot_images[image_id];
767 const auto copies = FullDownloadCopies(image.info); 767 const auto copies = FullDownloadCopies(image.info);
768 const std::span<u8> image_download_span = download_span.subspan(buffer_offset); 768 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
769 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); 769 download_map.offset += image.unswizzled_size_bytes;
770 buffer_offset += image.unswizzled_size_bytes; 770 download_span = download_span.subspan(image.unswizzled_size_bytes);
771 } 771 }
772 committed_downloads.pop(); 772 committed_downloads.pop();
773} 773}
@@ -798,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) {
798 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 798 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
799 return; 799 return;
800 } 800 }
801 auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); 801 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
802 UploadImageContents(image, map, 0); 802 UploadImageContents(image, staging);
803 runtime.InsertUploadMemoryBarrier(); 803 runtime.InsertUploadMemoryBarrier();
804} 804}
805 805
806template <class P> 806template <class P>
807template <typename MapBuffer> 807template <typename StagingBuffer>
808void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { 808void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
809 const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset); 809 const std::span<u8> mapped_span = staging.mapped_span;
810 const GPUVAddr gpu_addr = image.gpu_addr; 810 const GPUVAddr gpu_addr = image.gpu_addr;
811 811
812 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 812 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
813 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 813 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
814 const auto uploads = FullUploadSwizzles(image.info); 814 const auto uploads = FullUploadSwizzles(image.info);
815 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); 815 runtime.AccelerateImageUpload(image, staging, uploads);
816 } else if (True(image.flags & ImageFlagBits::Converted)) { 816 } else if (True(image.flags & ImageFlagBits::Converted)) {
817 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 817 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
818 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 818 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
819 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 819 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
820 image.UploadMemory(map, buffer_offset, copies); 820 image.UploadMemory(staging, copies);
821 } else if (image.info.type == ImageType::Buffer) { 821 } else if (image.info.type == ImageType::Buffer) {
822 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; 822 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
823 image.UploadMemory(map, buffer_offset, copies); 823 image.UploadMemory(staging, copies);
824 } else { 824 } else {
825 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 825 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
826 image.UploadMemory(map, buffer_offset, copies); 826 image.UploadMemory(staging, copies);
827 } 827 }
828} 828}
829 829
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index d39bbdc70..2aa0ffbe6 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -168,7 +168,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
168 X(vkFreeCommandBuffers); 168 X(vkFreeCommandBuffers);
169 X(vkFreeDescriptorSets); 169 X(vkFreeDescriptorSets);
170 X(vkFreeMemory); 170 X(vkFreeMemory);
171 X(vkGetBufferMemoryRequirements); 171 X(vkGetBufferMemoryRequirements2);
172 X(vkGetDeviceQueue); 172 X(vkGetDeviceQueue);
173 X(vkGetEventStatus); 173 X(vkGetEventStatus);
174 X(vkGetFenceStatus); 174 X(vkGetFenceStatus);
@@ -786,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const {
786 return DeviceMemory(memory, handle, *dld); 786 return DeviceMemory(memory, handle, *dld);
787} 787}
788 788
789VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { 789VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer,
790 VkMemoryRequirements requirements; 790 void* pnext) const noexcept {
791 dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); 791 const VkBufferMemoryRequirementsInfo2 info{
792 return requirements; 792 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
793 .pNext = nullptr,
794 .buffer = buffer,
795 };
796 VkMemoryRequirements2 requirements{
797 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
798 .pNext = pnext,
799 .memoryRequirements{},
800 };
801 dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements);
802 return requirements.memoryRequirements;
793} 803}
794 804
795VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { 805VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept {
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 7f781b081..3e36d356a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -283,7 +283,7 @@ struct DeviceDispatch : InstanceDispatch {
283 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; 283 PFN_vkFreeCommandBuffers vkFreeCommandBuffers{};
284 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; 284 PFN_vkFreeDescriptorSets vkFreeDescriptorSets{};
285 PFN_vkFreeMemory vkFreeMemory{}; 285 PFN_vkFreeMemory vkFreeMemory{};
286 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; 286 PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{};
287 PFN_vkGetDeviceQueue vkGetDeviceQueue{}; 287 PFN_vkGetDeviceQueue vkGetDeviceQueue{};
288 PFN_vkGetEventStatus vkGetEventStatus{}; 288 PFN_vkGetEventStatus vkGetEventStatus{};
289 PFN_vkGetFenceStatus vkGetFenceStatus{}; 289 PFN_vkGetFenceStatus vkGetFenceStatus{};
@@ -871,7 +871,8 @@ public:
871 871
872 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; 872 DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const;
873 873
874 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; 874 VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer,
875 void* pnext = nullptr) const noexcept;
875 876
876 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; 877 VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept;
877 878