diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/host_shaders/vulkan_quad_indexed.comp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 299 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 8 |
8 files changed, 229 insertions, 122 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 502b4d90a..158360830 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -666,9 +666,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | |||
| 666 | BindHostIndexBuffer(); | 666 | BindHostIndexBuffer(); |
| 667 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 667 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 668 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 668 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 669 | if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { | 669 | if (draw_state.topology == Maxwell::PrimitiveTopology::Quads || |
| 670 | runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, | 670 | draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { |
| 671 | draw_state.vertex_buffer.count); | 671 | runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first, |
| 672 | draw_state.vertex_buffer.count); | ||
| 672 | } | 673 | } |
| 673 | } | 674 | } |
| 674 | BindHostVertexBuffers(); | 675 | BindHostVertexBuffers(); |
diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp index a412f30ff..066fe4a9c 100644 --- a/src/video_core/host_shaders/vulkan_quad_indexed.comp +++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp | |||
| @@ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | |||
| 16 | layout (push_constant) uniform PushConstants { | 16 | layout (push_constant) uniform PushConstants { |
| 17 | uint base_vertex; | 17 | uint base_vertex; |
| 18 | int index_shift; // 0: uint8, 1: uint16, 2: uint32 | 18 | int index_shift; // 0: uint8, 1: uint16, 2: uint32 |
| 19 | int is_strip; // 0: quads 1: quadstrip | ||
| 19 | }; | 20 | }; |
| 20 | 21 | ||
| 21 | void main() { | 22 | void main() { |
| @@ -28,9 +29,10 @@ void main() { | |||
| 28 | int flipped_shift = 2 - index_shift; | 29 | int flipped_shift = 2 - index_shift; |
| 29 | int mask = (1 << flipped_shift) - 1; | 30 | int mask = (1 << flipped_shift) - 1; |
| 30 | 31 | ||
| 31 | const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | 32 | const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3); |
| 33 | const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3); | ||
| 32 | for (uint vertex = 0; vertex < 6; ++vertex) { | 34 | for (uint vertex = 0; vertex < 6; ++vertex) { |
| 33 | int offset = primitive * 4 + quad_swizzle[vertex]; | 35 | int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]); |
| 34 | int int_offset = offset >> flipped_shift; | 36 | int int_offset = offset >> flipped_shift; |
| 35 | int bit_offset = (offset & mask) * index_size; | 37 | int bit_offset = (offset & mask) * index_size; |
| 36 | uint packed_input = input_indexes[int_offset]; | 38 | uint packed_input = input_indexes[int_offset]; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 3e03c5cd6..347cfc133 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -310,7 +310,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, | |||
| 310 | case Maxwell::PrimitiveTopology::TriangleFan: | 310 | case Maxwell::PrimitiveTopology::TriangleFan: |
| 311 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; | 311 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; |
| 312 | case Maxwell::PrimitiveTopology::Quads: | 312 | case Maxwell::PrimitiveTopology::Quads: |
| 313 | // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases | 313 | case Maxwell::PrimitiveTopology::QuadStrip: |
| 314 | // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT | ||
| 315 | // whenever it releases | ||
| 314 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; | 316 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; |
| 315 | case Maxwell::PrimitiveTopology::Patches: | 317 | case Maxwell::PrimitiveTopology::Patches: |
| 316 | return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; | 318 | return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 558b8db56..54a12b35f 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) { | |||
| 51 | } | 51 | } |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | template <typename T> | ||
| 55 | std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||
| 56 | std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||
| 57 | for (T& index : indices) { | ||
| 58 | index = static_cast<T>(first + index + quad * 4); | ||
| 59 | } | ||
| 60 | return indices; | ||
| 61 | } | ||
| 62 | |||
| 63 | vk::Buffer CreateBuffer(const Device& device, u64 size) { | 54 | vk::Buffer CreateBuffer(const Device& device, u64 size) { |
| 64 | VkBufferUsageFlags flags = | 55 | VkBufferUsageFlags flags = |
| 65 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 56 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| @@ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat | |||
| 123 | return *views.back().handle; | 114 | return *views.back().handle; |
| 124 | } | 115 | } |
| 125 | 116 | ||
| 117 | class QuadIndexBuffer { | ||
| 118 | public: | ||
| 119 | QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||
| 120 | Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||
| 121 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 122 | staging_pool{staging_pool_} {} | ||
| 123 | |||
| 124 | virtual ~QuadIndexBuffer() = default; | ||
| 125 | |||
| 126 | void UpdateBuffer(u32 num_indices_) { | ||
| 127 | if (num_indices_ <= num_indices) { | ||
| 128 | return; | ||
| 129 | } | ||
| 130 | |||
| 131 | scheduler.Finish(); | ||
| 132 | |||
| 133 | num_indices = num_indices_; | ||
| 134 | index_type = IndexTypeFromNumElements(device, num_indices); | ||
| 135 | |||
| 136 | const u32 num_quads = GetQuadsNum(num_indices); | ||
| 137 | const u32 num_triangle_indices = num_quads * 6; | ||
| 138 | const u32 num_first_offset_copies = 4; | ||
| 139 | const size_t bytes_per_index = BytesPerIndex(index_type); | ||
| 140 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||
| 141 | buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 142 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 143 | .pNext = nullptr, | ||
| 144 | .flags = 0, | ||
| 145 | .size = size_bytes, | ||
| 146 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 147 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 148 | .queueFamilyIndexCount = 0, | ||
| 149 | .pQueueFamilyIndices = nullptr, | ||
| 150 | }); | ||
| 151 | if (device.HasDebuggingToolAttached()) { | ||
| 152 | buffer.SetObjectNameEXT("Quad LUT"); | ||
| 153 | } | ||
| 154 | memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 155 | |||
| 156 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 157 | u8* staging_data = staging.mapped_span.data(); | ||
| 158 | const size_t quad_size = bytes_per_index * 6; | ||
| 159 | |||
| 160 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||
| 161 | for (u32 quad = 0; quad < num_quads; ++quad) { | ||
| 162 | MakeAndUpdateIndices(staging_data, quad_size, quad, first); | ||
| 163 | staging_data += quad_size; | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 168 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||
| 169 | dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { | ||
| 170 | const VkBufferCopy copy{ | ||
| 171 | .srcOffset = src_offset, | ||
| 172 | .dstOffset = 0, | ||
| 173 | .size = size_bytes, | ||
| 174 | }; | ||
| 175 | const VkBufferMemoryBarrier write_barrier{ | ||
| 176 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 177 | .pNext = nullptr, | ||
| 178 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 179 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||
| 180 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 181 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 182 | .buffer = dst_buffer, | ||
| 183 | .offset = 0, | ||
| 184 | .size = size_bytes, | ||
| 185 | }; | ||
| 186 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 187 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 188 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); | ||
| 189 | }); | ||
| 190 | } | ||
| 191 | |||
| 192 | void BindBuffer(u32 first) { | ||
| 193 | const VkIndexType index_type_ = index_type; | ||
| 194 | const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices); | ||
| 195 | const size_t offset = | ||
| 196 | (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); | ||
| 197 | scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { | ||
| 198 | cmdbuf.BindIndexBuffer(buffer, offset, index_type_); | ||
| 199 | }); | ||
| 200 | } | ||
| 201 | |||
| 202 | protected: | ||
| 203 | virtual u32 GetQuadsNum(u32 num_indices) const = 0; | ||
| 204 | |||
| 205 | virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0; | ||
| 206 | |||
| 207 | const Device& device; | ||
| 208 | MemoryAllocator& memory_allocator; | ||
| 209 | Scheduler& scheduler; | ||
| 210 | StagingBufferPool& staging_pool; | ||
| 211 | |||
| 212 | vk::Buffer buffer{}; | ||
| 213 | MemoryCommit memory_commit{}; | ||
| 214 | VkIndexType index_type{}; | ||
| 215 | u32 num_indices = 0; | ||
| 216 | }; | ||
| 217 | |||
| 218 | class QuadArrayIndexBuffer : public QuadIndexBuffer { | ||
| 219 | public: | ||
| 220 | QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||
| 221 | Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||
| 222 | : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||
| 223 | |||
| 224 | ~QuadArrayIndexBuffer() = default; | ||
| 225 | |||
| 226 | private: | ||
| 227 | u32 GetQuadsNum(u32 num_indices_) const override { | ||
| 228 | return num_indices_ / 4; | ||
| 229 | } | ||
| 230 | |||
| 231 | template <typename T> | ||
| 232 | static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||
| 233 | std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||
| 234 | for (T& index : indices) { | ||
| 235 | index = static_cast<T>(first + index + quad * 4); | ||
| 236 | } | ||
| 237 | return indices; | ||
| 238 | } | ||
| 239 | |||
| 240 | void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||
| 241 | switch (index_type) { | ||
| 242 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 243 | std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||
| 244 | break; | ||
| 245 | case VK_INDEX_TYPE_UINT16: | ||
| 246 | std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||
| 247 | break; | ||
| 248 | case VK_INDEX_TYPE_UINT32: | ||
| 249 | std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||
| 250 | break; | ||
| 251 | default: | ||
| 252 | ASSERT(false); | ||
| 253 | break; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | }; | ||
| 257 | |||
| 258 | class QuadStripIndexBuffer : public QuadIndexBuffer { | ||
| 259 | public: | ||
| 260 | QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||
| 261 | Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||
| 262 | : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||
| 263 | |||
| 264 | ~QuadStripIndexBuffer() = default; | ||
| 265 | |||
| 266 | private: | ||
| 267 | u32 GetQuadsNum(u32 num_indices_) const override { | ||
| 268 | return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0; | ||
| 269 | } | ||
| 270 | |||
| 271 | template <typename T> | ||
| 272 | static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||
| 273 | std::array<T, 6> indices{0, 3, 1, 0, 2, 3}; | ||
| 274 | for (T& index : indices) { | ||
| 275 | index = static_cast<T>(first + index + quad * 2); | ||
| 276 | } | ||
| 277 | return indices; | ||
| 278 | } | ||
| 279 | |||
| 280 | void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||
| 281 | switch (index_type) { | ||
| 282 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 283 | std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||
| 284 | break; | ||
| 285 | case VK_INDEX_TYPE_UINT16: | ||
| 286 | std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||
| 287 | break; | ||
| 288 | case VK_INDEX_TYPE_UINT32: | ||
| 289 | std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||
| 290 | break; | ||
| 291 | default: | ||
| 292 | ASSERT(false); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | } | ||
| 296 | }; | ||
| 297 | |||
| 126 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | 298 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 127 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | 299 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 128 | UpdateDescriptorQueue& update_descriptor_queue_, | 300 | UpdateDescriptorQueue& update_descriptor_queue_, |
| @@ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m | |||
| 130 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | 302 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, |
| 131 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | 303 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, |
| 132 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 304 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 133 | quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | 305 | quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) { |
| 306 | quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, | ||
| 307 | scheduler_, staging_pool_); | ||
| 308 | quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, | ||
| 309 | scheduler_, staging_pool_); | ||
| 310 | } | ||
| 134 | 311 | ||
| 135 | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | 312 | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { |
| 136 | return staging_pool.Request(size, MemoryUsage::Upload); | 313 | return staging_pool.Request(size, MemoryUsage::Upload); |
| @@ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | |||
| 245 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | 422 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); |
| 246 | VkDeviceSize vk_offset = offset; | 423 | VkDeviceSize vk_offset = offset; |
| 247 | VkBuffer vk_buffer = buffer; | 424 | VkBuffer vk_buffer = buffer; |
| 248 | if (topology == PrimitiveTopology::Quads) { | 425 | if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) { |
| 249 | vk_index_type = VK_INDEX_TYPE_UINT32; | 426 | vk_index_type = VK_INDEX_TYPE_UINT32; |
| 250 | std::tie(vk_buffer, vk_offset) = | 427 | std::tie(vk_buffer, vk_offset) = |
| 251 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | 428 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset, |
| 429 | topology == PrimitiveTopology::QuadStrip); | ||
| 252 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | 430 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { |
| 253 | vk_index_type = VK_INDEX_TYPE_UINT16; | 431 | vk_index_type = VK_INDEX_TYPE_UINT16; |
| 254 | std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | 432 | std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); |
| @@ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | |||
| 263 | }); | 441 | }); |
| 264 | } | 442 | } |
| 265 | 443 | ||
| 266 | void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | 444 | void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) { |
| 267 | if (count == 0) { | 445 | if (count == 0) { |
| 268 | ReserveNullBuffer(); | 446 | ReserveNullBuffer(); |
| 269 | scheduler.Record([this](vk::CommandBuffer cmdbuf) { | 447 | scheduler.Record([this](vk::CommandBuffer cmdbuf) { |
| @@ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | |||
| 271 | }); | 449 | }); |
| 272 | return; | 450 | return; |
| 273 | } | 451 | } |
| 274 | ReserveQuadArrayLUT(first + count, true); | 452 | |
| 275 | 453 | if (topology == PrimitiveTopology::Quads) { | |
| 276 | // The LUT has the indices 0, 1, 2, and 3 copied as an array | 454 | quad_array_index_buffer->UpdateBuffer(first + count); |
| 277 | // To apply these 'first' offsets we can apply an offset based on the modulus. | 455 | quad_array_index_buffer->BindBuffer(first); |
| 278 | const VkIndexType index_type = quad_array_lut_index_type; | 456 | } else if (topology == PrimitiveTopology::QuadStrip) { |
| 279 | const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | 457 | quad_strip_index_buffer->UpdateBuffer(first + count); |
| 280 | const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | 458 | quad_strip_index_buffer->BindBuffer(first); |
| 281 | scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | 459 | } |
| 282 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||
| 283 | }); | ||
| 284 | } | 460 | } |
| 285 | 461 | ||
| 286 | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, | 462 | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, |
| @@ -320,83 +496,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, | |||
| 320 | }); | 496 | }); |
| 321 | } | 497 | } |
| 322 | 498 | ||
| 323 | void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { | ||
| 324 | if (num_indices <= current_num_indices) { | ||
| 325 | return; | ||
| 326 | } | ||
| 327 | if (wait_for_idle) { | ||
| 328 | scheduler.Finish(); | ||
| 329 | } | ||
| 330 | current_num_indices = num_indices; | ||
| 331 | quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||
| 332 | |||
| 333 | const u32 num_quads = num_indices / 4; | ||
| 334 | const u32 num_triangle_indices = num_quads * 6; | ||
| 335 | const u32 num_first_offset_copies = 4; | ||
| 336 | const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||
| 337 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||
| 338 | quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 339 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 340 | .pNext = nullptr, | ||
| 341 | .flags = 0, | ||
| 342 | .size = size_bytes, | ||
| 343 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 344 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 345 | .queueFamilyIndexCount = 0, | ||
| 346 | .pQueueFamilyIndices = nullptr, | ||
| 347 | }); | ||
| 348 | if (device.HasDebuggingToolAttached()) { | ||
| 349 | quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||
| 350 | } | ||
| 351 | quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||
| 352 | |||
| 353 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 354 | u8* staging_data = staging.mapped_span.data(); | ||
| 355 | const size_t quad_size = bytes_per_index * 6; | ||
| 356 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||
| 357 | for (u32 quad = 0; quad < num_quads; ++quad) { | ||
| 358 | switch (quad_array_lut_index_type) { | ||
| 359 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 360 | std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||
| 361 | break; | ||
| 362 | case VK_INDEX_TYPE_UINT16: | ||
| 363 | std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||
| 364 | break; | ||
| 365 | case VK_INDEX_TYPE_UINT32: | ||
| 366 | std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||
| 367 | break; | ||
| 368 | default: | ||
| 369 | ASSERT(false); | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | staging_data += quad_size; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 376 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||
| 377 | dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { | ||
| 378 | const VkBufferCopy copy{ | ||
| 379 | .srcOffset = src_offset, | ||
| 380 | .dstOffset = 0, | ||
| 381 | .size = size_bytes, | ||
| 382 | }; | ||
| 383 | const VkBufferMemoryBarrier write_barrier{ | ||
| 384 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 385 | .pNext = nullptr, | ||
| 386 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 387 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||
| 388 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 389 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 390 | .buffer = dst_buffer, | ||
| 391 | .offset = 0, | ||
| 392 | .size = size_bytes, | ||
| 393 | }; | ||
| 394 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 395 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||
| 396 | 0, write_barrier); | ||
| 397 | }); | ||
| 398 | } | ||
| 399 | |||
| 400 | void BufferCacheRuntime::ReserveNullBuffer() { | 499 | void BufferCacheRuntime::ReserveNullBuffer() { |
| 401 | if (null_buffer) { | 500 | if (null_buffer) { |
| 402 | return; | 501 | return; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a15c8b39b..183b33632 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -50,6 +50,9 @@ private: | |||
| 50 | std::vector<BufferView> views; | 50 | std::vector<BufferView> views; |
| 51 | }; | 51 | }; |
| 52 | 52 | ||
| 53 | class QuadArrayIndexBuffer; | ||
| 54 | class QuadStripIndexBuffer; | ||
| 55 | |||
| 53 | class BufferCacheRuntime { | 56 | class BufferCacheRuntime { |
| 54 | friend Buffer; | 57 | friend Buffer; |
| 55 | 58 | ||
| @@ -86,7 +89,7 @@ public: | |||
| 86 | void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | 89 | void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, |
| 87 | u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | 90 | u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); |
| 88 | 91 | ||
| 89 | void BindQuadArrayIndexBuffer(u32 first, u32 count); | 92 | void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); |
| 90 | 93 | ||
| 91 | void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); | 94 | void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); |
| 92 | 95 | ||
| @@ -118,8 +121,6 @@ private: | |||
| 118 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 121 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 119 | } | 122 | } |
| 120 | 123 | ||
| 121 | void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||
| 122 | |||
| 123 | void ReserveNullBuffer(); | 124 | void ReserveNullBuffer(); |
| 124 | 125 | ||
| 125 | const Device& device; | 126 | const Device& device; |
| @@ -128,10 +129,8 @@ private: | |||
| 128 | StagingBufferPool& staging_pool; | 129 | StagingBufferPool& staging_pool; |
| 129 | UpdateDescriptorQueue& update_descriptor_queue; | 130 | UpdateDescriptorQueue& update_descriptor_queue; |
| 130 | 131 | ||
| 131 | vk::Buffer quad_array_lut; | 132 | std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer; |
| 132 | MemoryCommit quad_array_lut_commit; | 133 | std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; |
| 133 | VkIndexType quad_array_lut_index_type{}; | ||
| 134 | u32 current_num_indices = 0; | ||
| 135 | 134 | ||
| 136 | vk::Buffer null_buffer; | 135 | vk::Buffer null_buffer; |
| 137 | MemoryCommit null_buffer_commit; | 136 | MemoryCommit null_buffer_commit; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2c00979d7..1a316b6eb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_, | |||
| 245 | UpdateDescriptorQueue& update_descriptor_queue_) | 245 | UpdateDescriptorQueue& update_descriptor_queue_) |
| 246 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | 246 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 247 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, | 247 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, |
| 248 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), | 248 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV), |
| 249 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 249 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 250 | update_descriptor_queue{update_descriptor_queue_} {} | 250 | update_descriptor_queue{update_descriptor_queue_} {} |
| 251 | 251 | ||
| @@ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default; | |||
| 253 | 253 | ||
| 254 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | 254 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( |
| 255 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 255 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 256 | VkBuffer src_buffer, u32 src_offset) { | 256 | VkBuffer src_buffer, u32 src_offset, bool is_strip) { |
| 257 | const u32 index_shift = [index_format] { | 257 | const u32 index_shift = [index_format] { |
| 258 | switch (index_format) { | 258 | switch (index_format) { |
| 259 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | 259 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: |
| @@ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 267 | return 2; | 267 | return 2; |
| 268 | }(); | 268 | }(); |
| 269 | const u32 input_size = num_vertices << index_shift; | 269 | const u32 input_size = num_vertices << index_shift; |
| 270 | const u32 num_tri_vertices = (num_vertices / 4) * 6; | 270 | const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6; |
| 271 | 271 | ||
| 272 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); | 272 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); |
| 273 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 273 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| @@ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 278 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | 278 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 279 | 279 | ||
| 280 | scheduler.RequestOutsideRenderPassOperationContext(); | 280 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 281 | scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, | 281 | scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, |
| 282 | index_shift](vk::CommandBuffer cmdbuf) { | 282 | is_strip](vk::CommandBuffer cmdbuf) { |
| 283 | static constexpr u32 DISPATCH_SIZE = 1024; | 283 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 284 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | 284 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 285 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 285 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| @@ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 287 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 287 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 288 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | 288 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, |
| 289 | }; | 289 | }; |
| 290 | const std::array<u32, 2> push_constants{base_vertex, index_shift}; | 290 | const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u}; |
| 291 | const VkDescriptorSet set = descriptor_allocator.Commit(); | 291 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 292 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | 292 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 293 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | 293 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5d32e3caf..c4c8fa081 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -74,7 +74,7 @@ public: | |||
| 74 | 74 | ||
| 75 | std::pair<VkBuffer, VkDeviceSize> Assemble( | 75 | std::pair<VkBuffer, VkDeviceSize> Assemble( |
| 76 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, | 76 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |
| 77 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset); | 77 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip); |
| 78 | 78 | ||
| 79 | private: | 79 | private: |
| 80 | Scheduler& scheduler; | 80 | Scheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 823b9ca2a..3774f303a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | |||
| 138 | .first_index = is_indexed ? draw_state.index_buffer.first : 0, | 138 | .first_index = is_indexed ? draw_state.index_buffer.first : 0, |
| 139 | .is_indexed = is_indexed, | 139 | .is_indexed = is_indexed, |
| 140 | }; | 140 | }; |
| 141 | // 6 triangle vertices per quad, base vertex is part of the index | ||
| 142 | // See BindQuadIndexBuffer for more details | ||
| 141 | if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { | 143 | if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { |
| 142 | // 6 triangle vertices per quad, base vertex is part of the index | ||
| 143 | // See BindQuadArrayIndexBuffer for more details | ||
| 144 | params.num_vertices = (params.num_vertices / 4) * 6; | 144 | params.num_vertices = (params.num_vertices / 4) * 6; |
| 145 | params.base_vertex = 0; | 145 | params.base_vertex = 0; |
| 146 | params.is_indexed = true; | 146 | params.is_indexed = true; |
| 147 | } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { | ||
| 148 | params.num_vertices = (params.num_vertices - 2) / 2 * 6; | ||
| 149 | params.base_vertex = 0; | ||
| 150 | params.is_indexed = true; | ||
| 147 | } | 151 | } |
| 148 | return params; | 152 | return params; |
| 149 | } | 153 | } |