summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar FengChen2022-12-12 22:17:33 +0800
committerGravatar FengChen2022-12-26 11:37:34 +0800
commit86d5b4e556072e86b9af3ac8a4ef6842a8d9df67 (patch)
tree238ea02d1aba2663e5a94ae5664812290b2a6326 /src
parentMerge pull request #9420 from liamwhite/aniso (diff)
downloadyuzu-86d5b4e556072e86b9af3ac8a4ef6842a8d9df67.tar.gz
yuzu-86d5b4e556072e86b9af3ac8a4ef6842a8d9df67.tar.xz
yuzu-86d5b4e556072e86b9af3ac8a4ef6842a8d9df67.zip
video_core: Implement vulkan QuadStrip topology
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h7
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp6
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp299
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp8
8 files changed, 229 insertions, 122 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 502b4d90a..158360830 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -666,9 +666,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
666 BindHostIndexBuffer(); 666 BindHostIndexBuffer();
667 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 667 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
668 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 668 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
669 if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { 669 if (draw_state.topology == Maxwell::PrimitiveTopology::Quads ||
670 runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, 670 draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
671 draw_state.vertex_buffer.count); 671 runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first,
672 draw_state.vertex_buffer.count);
672 } 673 }
673 } 674 }
674 BindHostVertexBuffers(); 675 BindHostVertexBuffers();
diff --git a/src/video_core/host_shaders/vulkan_quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index a412f30ff..066fe4a9c 100644
--- a/src/video_core/host_shaders/vulkan_quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
16layout (push_constant) uniform PushConstants { 16layout (push_constant) uniform PushConstants {
17 uint base_vertex; 17 uint base_vertex;
18 int index_shift; // 0: uint8, 1: uint16, 2: uint32 18 int index_shift; // 0: uint8, 1: uint16, 2: uint32
19 int is_strip; // 0: quads 1: quadstrip
19}; 20};
20 21
21void main() { 22void main() {
@@ -28,9 +29,10 @@ void main() {
28 int flipped_shift = 2 - index_shift; 29 int flipped_shift = 2 - index_shift;
29 int mask = (1 << flipped_shift) - 1; 30 int mask = (1 << flipped_shift) - 1;
30 31
31 const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); 32 const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
33 const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3);
32 for (uint vertex = 0; vertex < 6; ++vertex) { 34 for (uint vertex = 0; vertex < 6; ++vertex) {
33 int offset = primitive * 4 + quad_swizzle[vertex]; 35 int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]);
34 int int_offset = offset >> flipped_shift; 36 int int_offset = offset >> flipped_shift;
35 int bit_offset = (offset & mask) * index_size; 37 int bit_offset = (offset & mask) * index_size;
36 uint packed_input = input_indexes[int_offset]; 38 uint packed_input = input_indexes[int_offset];
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 3e03c5cd6..347cfc133 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -310,7 +310,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
310 case Maxwell::PrimitiveTopology::TriangleFan: 310 case Maxwell::PrimitiveTopology::TriangleFan:
311 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; 311 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
312 case Maxwell::PrimitiveTopology::Quads: 312 case Maxwell::PrimitiveTopology::Quads:
313 // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases 313 case Maxwell::PrimitiveTopology::QuadStrip:
314 // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT
315 // whenever it releases
314 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 316 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
315 case Maxwell::PrimitiveTopology::Patches: 317 case Maxwell::PrimitiveTopology::Patches:
316 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; 318 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 558b8db56..54a12b35f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) {
51 } 51 }
52} 52}
53 53
54template <typename T>
55std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
56 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
57 for (T& index : indices) {
58 index = static_cast<T>(first + index + quad * 4);
59 }
60 return indices;
61}
62
63vk::Buffer CreateBuffer(const Device& device, u64 size) { 54vk::Buffer CreateBuffer(const Device& device, u64 size) {
64 VkBufferUsageFlags flags = 55 VkBufferUsageFlags flags =
65 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 56 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
@@ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat
123 return *views.back().handle; 114 return *views.back().handle;
124} 115}
125 116
117class QuadIndexBuffer {
118public:
119 QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
120 Scheduler& scheduler_, StagingBufferPool& staging_pool_)
121 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
122 staging_pool{staging_pool_} {}
123
124 virtual ~QuadIndexBuffer() = default;
125
126 void UpdateBuffer(u32 num_indices_) {
127 if (num_indices_ <= num_indices) {
128 return;
129 }
130
131 scheduler.Finish();
132
133 num_indices = num_indices_;
134 index_type = IndexTypeFromNumElements(device, num_indices);
135
136 const u32 num_quads = GetQuadsNum(num_indices);
137 const u32 num_triangle_indices = num_quads * 6;
138 const u32 num_first_offset_copies = 4;
139 const size_t bytes_per_index = BytesPerIndex(index_type);
140 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
141 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
142 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
143 .pNext = nullptr,
144 .flags = 0,
145 .size = size_bytes,
146 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
147 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
148 .queueFamilyIndexCount = 0,
149 .pQueueFamilyIndices = nullptr,
150 });
151 if (device.HasDebuggingToolAttached()) {
152 buffer.SetObjectNameEXT("Quad LUT");
153 }
154 memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
155
156 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
157 u8* staging_data = staging.mapped_span.data();
158 const size_t quad_size = bytes_per_index * 6;
159
160 for (u32 first = 0; first < num_first_offset_copies; ++first) {
161 for (u32 quad = 0; quad < num_quads; ++quad) {
162 MakeAndUpdateIndices(staging_data, quad_size, quad, first);
163 staging_data += quad_size;
164 }
165 }
166
167 scheduler.RequestOutsideRenderPassOperationContext();
168 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
169 dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
170 const VkBufferCopy copy{
171 .srcOffset = src_offset,
172 .dstOffset = 0,
173 .size = size_bytes,
174 };
175 const VkBufferMemoryBarrier write_barrier{
176 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
177 .pNext = nullptr,
178 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
179 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
180 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
181 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
182 .buffer = dst_buffer,
183 .offset = 0,
184 .size = size_bytes,
185 };
186 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
187 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
188 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
189 });
190 }
191
192 void BindBuffer(u32 first) {
193 const VkIndexType index_type_ = index_type;
194 const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices);
195 const size_t offset =
196 (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type);
197 scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) {
198 cmdbuf.BindIndexBuffer(buffer, offset, index_type_);
199 });
200 }
201
202protected:
203 virtual u32 GetQuadsNum(u32 num_indices) const = 0;
204
205 virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0;
206
207 const Device& device;
208 MemoryAllocator& memory_allocator;
209 Scheduler& scheduler;
210 StagingBufferPool& staging_pool;
211
212 vk::Buffer buffer{};
213 MemoryCommit memory_commit{};
214 VkIndexType index_type{};
215 u32 num_indices = 0;
216};
217
218class QuadArrayIndexBuffer : public QuadIndexBuffer {
219public:
220 QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
221 Scheduler& scheduler_, StagingBufferPool& staging_pool_)
222 : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {}
223
224 ~QuadArrayIndexBuffer() = default;
225
226private:
227 u32 GetQuadsNum(u32 num_indices_) const override {
228 return num_indices_ / 4;
229 }
230
231 template <typename T>
232 static std::array<T, 6> MakeIndices(u32 quad, u32 first) {
233 std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
234 for (T& index : indices) {
235 index = static_cast<T>(first + index + quad * 4);
236 }
237 return indices;
238 }
239
240 void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
241 switch (index_type) {
242 case VK_INDEX_TYPE_UINT8_EXT:
243 std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
244 break;
245 case VK_INDEX_TYPE_UINT16:
246 std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size);
247 break;
248 case VK_INDEX_TYPE_UINT32:
249 std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size);
250 break;
251 default:
252 ASSERT(false);
253 break;
254 }
255 }
256};
257
258class QuadStripIndexBuffer : public QuadIndexBuffer {
259public:
260 QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_,
261 Scheduler& scheduler_, StagingBufferPool& staging_pool_)
262 : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {}
263
264 ~QuadStripIndexBuffer() = default;
265
266private:
267 u32 GetQuadsNum(u32 num_indices_) const override {
268 return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0;
269 }
270
271 template <typename T>
272 static std::array<T, 6> MakeIndices(u32 quad, u32 first) {
273 std::array<T, 6> indices{0, 3, 1, 0, 2, 3};
274 for (T& index : indices) {
275 index = static_cast<T>(first + index + quad * 2);
276 }
277 return indices;
278 }
279
280 void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) {
281 switch (index_type) {
282 case VK_INDEX_TYPE_UINT8_EXT:
283 std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size);
284 break;
285 case VK_INDEX_TYPE_UINT16:
286 std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size);
287 break;
288 case VK_INDEX_TYPE_UINT32:
289 std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size);
290 break;
291 default:
292 ASSERT(false);
293 break;
294 }
295 }
296};
297
126BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, 298BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
127 Scheduler& scheduler_, StagingBufferPool& staging_pool_, 299 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
128 UpdateDescriptorQueue& update_descriptor_queue_, 300 UpdateDescriptorQueue& update_descriptor_queue_,
@@ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
130 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, 302 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
131 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, 303 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
132 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 304 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
133 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} 305 quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {
306 quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
307 scheduler_, staging_pool_);
308 quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
309 scheduler_, staging_pool_);
310}
134 311
135StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { 312StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
136 return staging_pool.Request(size, MemoryUsage::Upload); 313 return staging_pool.Request(size, MemoryUsage::Upload);
@@ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
245 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); 422 VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format);
246 VkDeviceSize vk_offset = offset; 423 VkDeviceSize vk_offset = offset;
247 VkBuffer vk_buffer = buffer; 424 VkBuffer vk_buffer = buffer;
248 if (topology == PrimitiveTopology::Quads) { 425 if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) {
249 vk_index_type = VK_INDEX_TYPE_UINT32; 426 vk_index_type = VK_INDEX_TYPE_UINT32;
250 std::tie(vk_buffer, vk_offset) = 427 std::tie(vk_buffer, vk_offset) =
251 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); 428 quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset,
429 topology == PrimitiveTopology::QuadStrip);
252 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { 430 } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) {
253 vk_index_type = VK_INDEX_TYPE_UINT16; 431 vk_index_type = VK_INDEX_TYPE_UINT16;
254 std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); 432 std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset);
@@ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
263 }); 441 });
264} 442}
265 443
266void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { 444void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) {
267 if (count == 0) { 445 if (count == 0) {
268 ReserveNullBuffer(); 446 ReserveNullBuffer();
269 scheduler.Record([this](vk::CommandBuffer cmdbuf) { 447 scheduler.Record([this](vk::CommandBuffer cmdbuf) {
@@ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) {
271 }); 449 });
272 return; 450 return;
273 } 451 }
274 ReserveQuadArrayLUT(first + count, true); 452
275 453 if (topology == PrimitiveTopology::Quads) {
276 // The LUT has the indices 0, 1, 2, and 3 copied as an array 454 quad_array_index_buffer->UpdateBuffer(first + count);
277 // To apply these 'first' offsets we can apply an offset based on the modulus. 455 quad_array_index_buffer->BindBuffer(first);
278 const VkIndexType index_type = quad_array_lut_index_type; 456 } else if (topology == PrimitiveTopology::QuadStrip) {
279 const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); 457 quad_strip_index_buffer->UpdateBuffer(first + count);
280 const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); 458 quad_strip_index_buffer->BindBuffer(first);
281 scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { 459 }
282 cmdbuf.BindIndexBuffer(buffer, offset, index_type);
283 });
284} 460}
285 461
286void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, 462void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size,
@@ -320,83 +496,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
320 }); 496 });
321} 497}
322 498
323void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
324 if (num_indices <= current_num_indices) {
325 return;
326 }
327 if (wait_for_idle) {
328 scheduler.Finish();
329 }
330 current_num_indices = num_indices;
331 quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices);
332
333 const u32 num_quads = num_indices / 4;
334 const u32 num_triangle_indices = num_quads * 6;
335 const u32 num_first_offset_copies = 4;
336 const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type);
337 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
338 quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
339 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
340 .pNext = nullptr,
341 .flags = 0,
342 .size = size_bytes,
343 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
344 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
345 .queueFamilyIndexCount = 0,
346 .pQueueFamilyIndices = nullptr,
347 });
348 if (device.HasDebuggingToolAttached()) {
349 quad_array_lut.SetObjectNameEXT("Quad LUT");
350 }
351 quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal);
352
353 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
354 u8* staging_data = staging.mapped_span.data();
355 const size_t quad_size = bytes_per_index * 6;
356 for (u32 first = 0; first < num_first_offset_copies; ++first) {
357 for (u32 quad = 0; quad < num_quads; ++quad) {
358 switch (quad_array_lut_index_type) {
359 case VK_INDEX_TYPE_UINT8_EXT:
360 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size);
361 break;
362 case VK_INDEX_TYPE_UINT16:
363 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size);
364 break;
365 case VK_INDEX_TYPE_UINT32:
366 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size);
367 break;
368 default:
369 ASSERT(false);
370 break;
371 }
372 staging_data += quad_size;
373 }
374 }
375 scheduler.RequestOutsideRenderPassOperationContext();
376 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
377 dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) {
378 const VkBufferCopy copy{
379 .srcOffset = src_offset,
380 .dstOffset = 0,
381 .size = size_bytes,
382 };
383 const VkBufferMemoryBarrier write_barrier{
384 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
385 .pNext = nullptr,
386 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
387 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
388 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
389 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
390 .buffer = dst_buffer,
391 .offset = 0,
392 .size = size_bytes,
393 };
394 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
395 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
396 0, write_barrier);
397 });
398}
399
400void BufferCacheRuntime::ReserveNullBuffer() { 499void BufferCacheRuntime::ReserveNullBuffer() {
401 if (null_buffer) { 500 if (null_buffer) {
402 return; 501 return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a15c8b39b..183b33632 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -50,6 +50,9 @@ private:
50 std::vector<BufferView> views; 50 std::vector<BufferView> views;
51}; 51};
52 52
53class QuadArrayIndexBuffer;
54class QuadStripIndexBuffer;
55
53class BufferCacheRuntime { 56class BufferCacheRuntime {
54 friend Buffer; 57 friend Buffer;
55 58
@@ -86,7 +89,7 @@ public:
86 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, 89 void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices,
87 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); 90 u32 base_vertex, VkBuffer buffer, u32 offset, u32 size);
88 91
89 void BindQuadArrayIndexBuffer(u32 first, u32 count); 92 void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count);
90 93
91 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); 94 void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride);
92 95
@@ -118,8 +121,6 @@ private:
118 update_descriptor_queue.AddBuffer(buffer, offset, size); 121 update_descriptor_queue.AddBuffer(buffer, offset, size);
119 } 122 }
120 123
121 void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
122
123 void ReserveNullBuffer(); 124 void ReserveNullBuffer();
124 125
125 const Device& device; 126 const Device& device;
@@ -128,10 +129,8 @@ private:
128 StagingBufferPool& staging_pool; 129 StagingBufferPool& staging_pool;
129 UpdateDescriptorQueue& update_descriptor_queue; 130 UpdateDescriptorQueue& update_descriptor_queue;
130 131
131 vk::Buffer quad_array_lut; 132 std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer;
132 MemoryCommit quad_array_lut_commit; 133 std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
133 VkIndexType quad_array_lut_index_type{};
134 u32 current_num_indices = 0;
135 134
136 vk::Buffer null_buffer; 135 vk::Buffer null_buffer;
137 MemoryCommit null_buffer_commit; 136 MemoryCommit null_buffer_commit;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 2c00979d7..1a316b6eb 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_,
245 UpdateDescriptorQueue& update_descriptor_queue_) 245 UpdateDescriptorQueue& update_descriptor_queue_)
246 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, 246 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
247 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, 247 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
248 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), 248 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV),
249 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 249 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
250 update_descriptor_queue{update_descriptor_queue_} {} 250 update_descriptor_queue{update_descriptor_queue_} {}
251 251
@@ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default;
253 253
254std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( 254std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
255 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, 255 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
256 VkBuffer src_buffer, u32 src_offset) { 256 VkBuffer src_buffer, u32 src_offset, bool is_strip) {
257 const u32 index_shift = [index_format] { 257 const u32 index_shift = [index_format] {
258 switch (index_format) { 258 switch (index_format) {
259 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: 259 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
@@ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
267 return 2; 267 return 2;
268 }(); 268 }();
269 const u32 input_size = num_vertices << index_shift; 269 const u32 input_size = num_vertices << index_shift;
270 const u32 num_tri_vertices = (num_vertices / 4) * 6; 270 const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6;
271 271
272 const std::size_t staging_size = num_tri_vertices * sizeof(u32); 272 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
273 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); 273 const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
@@ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
278 const void* const descriptor_data{update_descriptor_queue.UpdateData()}; 278 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
279 279
280 scheduler.RequestOutsideRenderPassOperationContext(); 280 scheduler.RequestOutsideRenderPassOperationContext();
281 scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, 281 scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift,
282 index_shift](vk::CommandBuffer cmdbuf) { 282 is_strip](vk::CommandBuffer cmdbuf) {
283 static constexpr u32 DISPATCH_SIZE = 1024; 283 static constexpr u32 DISPATCH_SIZE = 1024;
284 static constexpr VkMemoryBarrier WRITE_BARRIER{ 284 static constexpr VkMemoryBarrier WRITE_BARRIER{
285 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 285 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
287 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 287 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
288 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, 288 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
289 }; 289 };
290 const std::array<u32, 2> push_constants{base_vertex, index_shift}; 290 const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u};
291 const VkDescriptorSet set = descriptor_allocator.Commit(); 291 const VkDescriptorSet set = descriptor_allocator.Commit();
292 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); 292 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
293 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); 293 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5d32e3caf..c4c8fa081 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -74,7 +74,7 @@ public:
74 74
75 std::pair<VkBuffer, VkDeviceSize> Assemble( 75 std::pair<VkBuffer, VkDeviceSize> Assemble(
76 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, 76 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices,
77 u32 base_vertex, VkBuffer src_buffer, u32 src_offset); 77 u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip);
78 78
79private: 79private:
80 Scheduler& scheduler; 80 Scheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 823b9ca2a..3774f303a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances,
138 .first_index = is_indexed ? draw_state.index_buffer.first : 0, 138 .first_index = is_indexed ? draw_state.index_buffer.first : 0,
139 .is_indexed = is_indexed, 139 .is_indexed = is_indexed,
140 }; 140 };
141 // 6 triangle vertices per quad, base vertex is part of the index
142 // See BindQuadIndexBuffer for more details
141 if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { 143 if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) {
142 // 6 triangle vertices per quad, base vertex is part of the index
143 // See BindQuadArrayIndexBuffer for more details
144 params.num_vertices = (params.num_vertices / 4) * 6; 144 params.num_vertices = (params.num_vertices / 4) * 6;
145 params.base_vertex = 0; 145 params.base_vertex = 0;
146 params.is_indexed = true; 146 params.is_indexed = true;
147 } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
148 params.num_vertices = (params.num_vertices - 2) / 2 * 6;
149 params.base_vertex = 0;
150 params.is_indexed = true;
147 } 151 }
148 return params; 152 return params;
149} 153}