diff options
Diffstat (limited to 'src/video_core/renderer_vulkan')
7 files changed, 214 insertions, 64 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 48fc5d966..4f1e4ec28 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, | 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, |
| 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, | 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, |
| 140 | u32 offset, [[maybe_unused]] u32 size) { | 140 | u32 offset, [[maybe_unused]] u32 size) { |
| 141 | VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); | 141 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); |
| 142 | VkDeviceSize vk_offset = offset; | ||
| 142 | if (topology == PrimitiveTopology::Quads) { | 143 | if (topology == PrimitiveTopology::Quads) { |
| 143 | index_type = VK_INDEX_TYPE_UINT32; | 144 | vk_index_type = VK_INDEX_TYPE_UINT32; |
| 144 | std::tie(buffer, offset) = | 145 | std::tie(buffer, vk_offset) = |
| 145 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | 146 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); |
| 146 | } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | 147 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { |
| 147 | index_type = VK_INDEX_TYPE_UINT16; | 148 | vk_index_type = VK_INDEX_TYPE_UINT16; |
| 148 | std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); | 149 | std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); |
| 149 | } | 150 | } |
| 150 | scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { | 151 | scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { |
| 151 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | 152 | cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type); |
| 152 | }); | 153 | }); |
| 153 | } | 154 | } |
| 154 | 155 | ||
| @@ -251,10 +252,10 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle | |||
| 251 | } | 252 | } |
| 252 | } | 253 | } |
| 253 | scheduler.RequestOutsideRenderPassOperationContext(); | 254 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 254 | scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, | 255 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, |
| 255 | size_bytes](vk::CommandBuffer cmdbuf) { | 256 | dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { |
| 256 | const VkBufferCopy copy{ | 257 | const VkBufferCopy copy{ |
| 257 | .srcOffset = 0, | 258 | .srcOffset = src_offset, |
| 258 | .dstOffset = 0, | 259 | .dstOffset = 0, |
| 259 | .size = size_bytes, | 260 | .size = size_bytes, |
| 260 | }; | 261 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a4fdcdf81..2f9a7b028 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | ||
| 13 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 14 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 15 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 148 | 149 | ||
| 149 | Uint8Pass::~Uint8Pass() = default; | 150 | Uint8Pass::~Uint8Pass() = default; |
| 150 | 151 | ||
| 151 | std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | 152 | std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 152 | u32 src_offset) { | 153 | u32 src_offset) { |
| 153 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 154 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 154 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 155 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 155 | 156 | ||
| 156 | update_descriptor_queue.Acquire(); | 157 | update_descriptor_queue.Acquire(); |
| 157 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 158 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 158 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); | 159 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 159 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 160 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 160 | 161 | ||
| 161 | scheduler.RequestOutsideRenderPassOperationContext(); | 162 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 162 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 163 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 163 | num_vertices](vk::CommandBuffer cmdbuf) { | 164 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 164 | constexpr u32 dispatch_size = 1024; | 165 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 166 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 170 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 171 | }; | ||
| 165 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 172 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 166 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 173 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 167 | cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); | 174 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 168 | |||
| 169 | VkBufferMemoryBarrier barrier; | ||
| 170 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 171 | barrier.pNext = nullptr; | ||
| 172 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 173 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 174 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 175 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 176 | barrier.buffer = buffer; | ||
| 177 | barrier.offset = 0; | ||
| 178 | barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||
| 179 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 175 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 180 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 176 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 181 | }); | 177 | }); |
| 182 | return {staging.buffer, 0}; | 178 | return {staging.buffer, staging.offset}; |
| 183 | } | 179 | } |
| 184 | 180 | ||
| 185 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 181 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| @@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 194 | 190 | ||
| 195 | QuadIndexedPass::~QuadIndexedPass() = default; | 191 | QuadIndexedPass::~QuadIndexedPass() = default; |
| 196 | 192 | ||
| 197 | std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( | 193 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( |
| 198 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 194 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 199 | VkBuffer src_buffer, u32 src_offset) { | 195 | VkBuffer src_buffer, u32 src_offset) { |
| 200 | const u32 index_shift = [index_format] { | 196 | const u32 index_shift = [index_format] { |
| @@ -217,34 +213,29 @@ std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( | |||
| 217 | 213 | ||
| 218 | update_descriptor_queue.Acquire(); | 214 | update_descriptor_queue.Acquire(); |
| 219 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 215 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 220 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); | 216 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 221 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 217 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 222 | 218 | ||
| 223 | scheduler.RequestOutsideRenderPassOperationContext(); | 219 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 224 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 220 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 225 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 221 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { |
| 226 | static constexpr u32 dispatch_size = 1024; | 222 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 223 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 227 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 228 | }; | ||
| 227 | const std::array push_constants = {base_vertex, index_shift}; | 229 | const std::array push_constants = {base_vertex, index_shift}; |
| 228 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 230 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 229 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 231 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 230 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 232 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), |
| 231 | &push_constants); | 233 | &push_constants); |
| 232 | cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | 234 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 233 | |||
| 234 | VkBufferMemoryBarrier barrier; | ||
| 235 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 236 | barrier.pNext = nullptr; | ||
| 237 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 238 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 239 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 240 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 241 | barrier.buffer = buffer; | ||
| 242 | barrier.offset = 0; | ||
| 243 | barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||
| 244 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 235 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 245 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 236 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 246 | }); | 237 | }); |
| 247 | return {staging.buffer, 0}; | 238 | return {staging.buffer, staging.offset}; |
| 248 | } | 239 | } |
| 249 | 240 | ||
| 250 | } // namespace Vulkan | 241 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 4904019f5..17d781d99 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -50,7 +50,8 @@ public: | |||
| 50 | 50 | ||
| 51 | /// Assemble uint8 indices into an uint16 index buffer | 51 | /// Assemble uint8 indices into an uint16 index buffer |
| 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is | 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is |
| 53 | std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); | 53 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 54 | u32 src_offset); | ||
| 54 | 55 | ||
| 55 | private: | 56 | private: |
| 56 | VKScheduler& scheduler; | 57 | VKScheduler& scheduler; |
| @@ -66,9 +67,9 @@ public: | |||
| 66 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 67 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 67 | ~QuadIndexedPass(); | 68 | ~QuadIndexedPass(); |
| 68 | 69 | ||
| 69 | std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | 70 | std::pair<VkBuffer, VkDeviceSize> Assemble( |
| 70 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | 71 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |
| 71 | u32 src_offset); | 72 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset); |
| 72 | 73 | ||
| 73 | private: | 74 | private: |
| 74 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..275d740b8 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 12 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -17,14 +18,117 @@ | |||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | namespace { | ||
| 22 | // Maximum potential alignment of a Vulkan buffer | ||
| 23 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||
| 24 | // Maximum size to put elements in the stream buffer | ||
| 25 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; | ||
| 26 | // Stream buffer size in bytes | ||
| 27 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||
| 28 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 29 | |||
| 30 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||
| 31 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 32 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||
| 33 | |||
| 34 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||
| 35 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 39 | VkMemoryPropertyFlags flags) noexcept { | ||
| 40 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 41 | if (((type_mask >> type_index) & 1) == 0) { | ||
| 42 | // Memory type is incompatible | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||
| 46 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 47 | // Memory type doesn't have the flags we want | ||
| 48 | continue; | ||
| 49 | } | ||
| 50 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||
| 51 | // Memory heap is not suitable for streaming | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | // Success! | ||
| 55 | return type_index; | ||
| 56 | } | ||
| 57 | return std::nullopt; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { | ||
| 61 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | ||
| 62 | std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||
| 63 | if (type) { | ||
| 64 | return *type; | ||
| 65 | } | ||
| 66 | // Otherwise try without the DEVICE_LOCAL_BIT | ||
| 67 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||
| 68 | if (type) { | ||
| 69 | return *type; | ||
| 70 | } | ||
| 71 | // This should never happen, and in case it does, signal it as an out of memory situation | ||
| 72 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||
| 73 | } | ||
| 74 | |||
| 75 | size_t Region(size_t iterator) noexcept { | ||
| 76 | return iterator / REGION_SIZE; | ||
| 77 | } | ||
| 78 | } // Anonymous namespace | ||
| 20 | 79 | ||
| 21 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 80 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 22 | VKScheduler& scheduler_) | 81 | VKScheduler& scheduler_) |
| 23 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} | 82 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 83 | const vk::Device& dev = device.GetLogical(); | ||
| 84 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||
| 85 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 86 | .pNext = nullptr, | ||
| 87 | .flags = 0, | ||
| 88 | .size = STREAM_BUFFER_SIZE, | ||
| 89 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | ||
| 90 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 91 | .queueFamilyIndexCount = 0, | ||
| 92 | .pQueueFamilyIndices = nullptr, | ||
| 93 | }); | ||
| 94 | if (device.HasDebuggingToolAttached()) { | ||
| 95 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||
| 96 | } | ||
| 97 | VkMemoryDedicatedRequirements dedicated_reqs{ | ||
| 98 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||
| 99 | .pNext = nullptr, | ||
| 100 | .prefersDedicatedAllocation = VK_FALSE, | ||
| 101 | .requiresDedicatedAllocation = VK_FALSE, | ||
| 102 | }; | ||
| 103 | const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||
| 104 | const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||
| 105 | dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||
| 106 | const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||
| 107 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||
| 108 | .pNext = nullptr, | ||
| 109 | .image = nullptr, | ||
| 110 | .buffer = *stream_buffer, | ||
| 111 | }; | ||
| 112 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||
| 113 | stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ | ||
| 114 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||
| 115 | .pNext = make_dedicated ? &dedicated_info : nullptr, | ||
| 116 | .allocationSize = requirements.size, | ||
| 117 | .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), | ||
| 118 | }); | ||
| 119 | if (device.HasDebuggingToolAttached()) { | ||
| 120 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||
| 121 | } | ||
| 122 | stream_buffer.BindMemory(*stream_memory, 0); | ||
| 123 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||
| 124 | } | ||
| 24 | 125 | ||
| 25 | StagingBufferPool::~StagingBufferPool() = default; | 126 | StagingBufferPool::~StagingBufferPool() = default; |
| 26 | 127 | ||
| 27 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { | 128 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { |
| 129 | if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { | ||
| 130 | return GetStreamBuffer(size); | ||
| 131 | } | ||
| 28 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | 132 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { |
| 29 | return *ref; | 133 | return *ref; |
| 30 | } | 134 | } |
| @@ -39,6 +143,42 @@ void StagingBufferPool::TickFrame() { | |||
| 39 | ReleaseCache(MemoryUsage::Download); | 143 | ReleaseCache(MemoryUsage::Download); |
| 40 | } | 144 | } |
| 41 | 145 | ||
| 146 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||
| 147 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; | ||
| 148 | ++region) { | ||
| 149 | sync_ticks[region] = scheduler.CurrentTick(); | ||
| 150 | } | ||
| 151 | used_iterator = iterator; | ||
| 152 | |||
| 153 | for (size_t region = Region(free_iterator) + 1, | ||
| 154 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); | ||
| 155 | region < region_end; ++region) { | ||
| 156 | scheduler.Wait(sync_ticks[region]); | ||
| 157 | } | ||
| 158 | if (iterator + size > free_iterator) { | ||
| 159 | free_iterator = iterator + size; | ||
| 160 | } | ||
| 161 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 162 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { | ||
| 163 | sync_ticks[region] = scheduler.CurrentTick(); | ||
| 164 | } | ||
| 165 | used_iterator = 0; | ||
| 166 | iterator = 0; | ||
| 167 | free_iterator = size; | ||
| 168 | |||
| 169 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 170 | scheduler.Wait(sync_ticks[region]); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | const size_t offset = iterator; | ||
| 174 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 175 | return StagingBufferRef{ | ||
| 176 | .buffer = *stream_buffer, | ||
| 177 | .offset = static_cast<VkDeviceSize>(offset), | ||
| 178 | .mapped_span = std::span<u8>(stream_pointer + offset, size), | ||
| 179 | }; | ||
| 180 | } | ||
| 181 | |||
| 42 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, | 182 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, |
| 43 | MemoryUsage usage) { | 183 | MemoryUsage usage) { |
| 44 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; | 184 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..4ed99c0df 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -19,11 +19,14 @@ class VKScheduler; | |||
| 19 | 19 | ||
| 20 | struct StagingBufferRef { | 20 | struct StagingBufferRef { |
| 21 | VkBuffer buffer; | 21 | VkBuffer buffer; |
| 22 | VkDeviceSize offset; | ||
| 22 | std::span<u8> mapped_span; | 23 | std::span<u8> mapped_span; |
| 23 | }; | 24 | }; |
| 24 | 25 | ||
| 25 | class StagingBufferPool { | 26 | class StagingBufferPool { |
| 26 | public: | 27 | public: |
| 28 | static constexpr size_t NUM_SYNCS = 16; | ||
| 29 | |||
| 27 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, | 30 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, |
| 28 | VKScheduler& scheduler); | 31 | VKScheduler& scheduler); |
| 29 | ~StagingBufferPool(); | 32 | ~StagingBufferPool(); |
| @@ -33,6 +36,11 @@ public: | |||
| 33 | void TickFrame(); | 36 | void TickFrame(); |
| 34 | 37 | ||
| 35 | private: | 38 | private: |
| 39 | struct StreamBufferCommit { | ||
| 40 | size_t upper_bound; | ||
| 41 | u64 tick; | ||
| 42 | }; | ||
| 43 | |||
| 36 | struct StagingBuffer { | 44 | struct StagingBuffer { |
| 37 | vk::Buffer buffer; | 45 | vk::Buffer buffer; |
| 38 | MemoryCommit commit; | 46 | MemoryCommit commit; |
| @@ -42,6 +50,7 @@ private: | |||
| 42 | StagingBufferRef Ref() const noexcept { | 50 | StagingBufferRef Ref() const noexcept { |
| 43 | return { | 51 | return { |
| 44 | .buffer = *buffer, | 52 | .buffer = *buffer, |
| 53 | .offset = 0, | ||
| 45 | .mapped_span = mapped_span, | 54 | .mapped_span = mapped_span, |
| 46 | }; | 55 | }; |
| 47 | } | 56 | } |
| @@ -56,6 +65,8 @@ private: | |||
| 56 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; | 65 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; |
| 57 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; | 66 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; |
| 58 | 67 | ||
| 68 | StagingBufferRef GetStreamBuffer(size_t size); | ||
| 69 | |||
| 59 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); | 70 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); |
| 60 | 71 | ||
| 61 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); | 72 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); |
| @@ -70,6 +81,15 @@ private: | |||
| 70 | MemoryAllocator& memory_allocator; | 81 | MemoryAllocator& memory_allocator; |
| 71 | VKScheduler& scheduler; | 82 | VKScheduler& scheduler; |
| 72 | 83 | ||
| 84 | vk::Buffer stream_buffer; | ||
| 85 | vk::DeviceMemory stream_memory; | ||
| 86 | u8* stream_pointer = nullptr; | ||
| 87 | |||
| 88 | size_t iterator = 0; | ||
| 89 | size_t used_iterator = 0; | ||
| 90 | size_t free_iterator = 0; | ||
| 91 | std::array<u64, NUM_SYNCS> sync_ticks{}; | ||
| 92 | |||
| 73 | StagingBuffersCache device_local_cache; | 93 | StagingBuffersCache device_local_cache; |
| 74 | StagingBuffersCache upload_cache; | 94 | StagingBuffersCache upload_cache; |
| 75 | StagingBuffersCache download_cache; | 95 | StagingBuffersCache download_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1eeb45ca9..22a1014a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -818,11 +818,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 818 | } | 818 | } |
| 819 | } | 819 | } |
| 820 | 820 | ||
| 821 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 821 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 822 | std::span<const BufferImageCopy> copies) { | ||
| 823 | // TODO: Move this to another API | 822 | // TODO: Move this to another API |
| 824 | scheduler->RequestOutsideRenderPassOperationContext(); | 823 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 825 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 824 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 826 | const VkBuffer src_buffer = map.buffer; | 825 | const VkBuffer src_buffer = map.buffer; |
| 827 | const VkImage vk_image = *image; | 826 | const VkImage vk_image = *image; |
| 828 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 827 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| @@ -833,11 +832,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | |||
| 833 | }); | 832 | }); |
| 834 | } | 833 | } |
| 835 | 834 | ||
| 836 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 835 | void Image::UploadMemory(const StagingBufferRef& map, |
| 837 | std::span<const VideoCommon::BufferCopy> copies) { | 836 | std::span<const VideoCommon::BufferCopy> copies) { |
| 838 | // TODO: Move this to another API | 837 | // TODO: Move this to another API |
| 839 | scheduler->RequestOutsideRenderPassOperationContext(); | 838 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 840 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | 839 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); |
| 841 | const VkBuffer src_buffer = map.buffer; | 840 | const VkBuffer src_buffer = map.buffer; |
| 842 | const VkBuffer dst_buffer = *buffer; | 841 | const VkBuffer dst_buffer = *buffer; |
| 843 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 842 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| @@ -846,9 +845,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | |||
| 846 | }); | 845 | }); |
| 847 | } | 846 | } |
| 848 | 847 | ||
| 849 | void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, | 848 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 850 | std::span<const BufferImageCopy> copies) { | 849 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 851 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | ||
| 852 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, | 850 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, |
| 853 | vk_copies](vk::CommandBuffer cmdbuf) { | 851 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 854 | const VkImageMemoryBarrier read_barrier{ | 852 | const VkImageMemoryBarrier read_barrier{ |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4558c3297..b08c23459 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -82,7 +82,7 @@ struct TextureCacheRuntime { | |||
| 82 | return false; | 82 | return false; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, | 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, |
| 86 | std::span<const VideoCommon::SwizzleParameters>) { | 86 | std::span<const VideoCommon::SwizzleParameters>) { |
| 87 | UNREACHABLE(); | 87 | UNREACHABLE(); |
| 88 | } | 88 | } |
| @@ -100,13 +100,12 @@ public: | |||
| 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 101 | VAddr cpu_addr); | 101 | VAddr cpu_addr); |
| 102 | 102 | ||
| 103 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 103 | void UploadMemory(const StagingBufferRef& map, |
| 104 | std::span<const VideoCommon::BufferImageCopy> copies); | 104 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 105 | 105 | ||
| 106 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 106 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); |
| 107 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 108 | 107 | ||
| 109 | void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, | 108 | void DownloadMemory(const StagingBufferRef& map, |
| 110 | std::span<const VideoCommon::BufferImageCopy> copies); | 109 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 111 | 110 | ||
| 112 | [[nodiscard]] VkImage Handle() const noexcept { | 111 | [[nodiscard]] VkImage Handle() const noexcept { |