summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h160
-rw-r--r--src/video_core/dma_pusher.cpp3
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/draw_manager.cpp2
-rw-r--r--src/video_core/engines/draw_manager.h5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.h12
-rw-r--r--src/video_core/macro/macro_hle.cpp45
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h1
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h24
16 files changed, 252 insertions, 72 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 99abe0edf..557227b37 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -171,7 +171,9 @@ public:
171 bool is_written, bool is_image); 171 bool is_written, bool is_image);
172 172
173 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, 173 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
174 bool synchronize, bool mark_as_written); 174 bool synchronize = true,
175 bool mark_as_written = false,
176 bool discard_downloads = false);
175 177
176 void FlushCachedWrites(); 178 void FlushCachedWrites();
177 179
@@ -203,6 +205,14 @@ public:
203 /// Return true when a CPU region is modified from the CPU 205 /// Return true when a CPU region is modified from the CPU
204 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 206 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
205 207
208 void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
209 current_draw_indirect = current_draw_indirect_;
210 }
211
212 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
213
214 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
215
206 std::mutex mutex; 216 std::mutex mutex;
207 Runtime& runtime; 217 Runtime& runtime;
208 218
@@ -275,6 +285,8 @@ private:
275 285
276 void BindHostVertexBuffers(); 286 void BindHostVertexBuffers();
277 287
288 void BindHostDrawIndirectBuffers();
289
278 void BindHostGraphicsUniformBuffers(size_t stage); 290 void BindHostGraphicsUniformBuffers(size_t stage);
279 291
280 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); 292 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
@@ -301,6 +313,8 @@ private:
301 313
302 void UpdateVertexBuffer(u32 index); 314 void UpdateVertexBuffer(u32 index);
303 315
316 void UpdateDrawIndirect();
317
304 void UpdateUniformBuffers(size_t stage); 318 void UpdateUniformBuffers(size_t stage);
305 319
306 void UpdateStorageBuffers(size_t stage); 320 void UpdateStorageBuffers(size_t stage);
@@ -340,6 +354,8 @@ private:
340 354
341 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); 355 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
342 356
357 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
358
343 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 359 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
344 std::span<BufferCopy> copies); 360 std::span<BufferCopy> copies);
345 361
@@ -375,6 +391,8 @@ private:
375 SlotVector<Buffer> slot_buffers; 391 SlotVector<Buffer> slot_buffers;
376 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 392 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
377 393
394 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
395
378 u32 last_index_count = 0; 396 u32 last_index_count = 0;
379 397
380 Binding index_buffer; 398 Binding index_buffer;
@@ -383,6 +401,8 @@ private:
383 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; 401 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
384 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; 402 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
385 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; 403 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
404 Binding count_buffer_binding;
405 Binding indirect_buffer_binding;
386 406
387 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; 407 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
388 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; 408 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
@@ -422,6 +442,7 @@ private:
422 442
423 std::vector<BufferId> cached_write_buffer_ids; 443 std::vector<BufferId> cached_write_buffer_ids;
424 444
445 IntervalSet discarded_ranges;
425 IntervalSet uncommitted_ranges; 446 IntervalSet uncommitted_ranges;
426 IntervalSet common_ranges; 447 IntervalSet common_ranges;
427 std::deque<IntervalSet> committed_ranges; 448 std::deque<IntervalSet> committed_ranges;
@@ -579,13 +600,17 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
579 }}; 600 }};
580 601
581 boost::container::small_vector<IntervalType, 4> tmp_intervals; 602 boost::container::small_vector<IntervalType, 4> tmp_intervals;
603 const bool is_high_accuracy =
604 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
582 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 605 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
583 const u64 size = base_address_end - base_address; 606 const u64 size = base_address_end - base_address;
584 const VAddr diff = base_address - *cpu_src_address; 607 const VAddr diff = base_address - *cpu_src_address;
585 const VAddr new_base_address = *cpu_dest_address + diff; 608 const VAddr new_base_address = *cpu_dest_address + diff;
586 const IntervalType add_interval{new_base_address, new_base_address + size}; 609 const IntervalType add_interval{new_base_address, new_base_address + size};
587 uncommitted_ranges.add(add_interval);
588 tmp_intervals.push_back(add_interval); 610 tmp_intervals.push_back(add_interval);
611 if (is_high_accuracy) {
612 uncommitted_ranges.add(add_interval);
613 }
589 }; 614 };
590 ForEachWrittenRange(*cpu_src_address, amount, mirror); 615 ForEachWrittenRange(*cpu_src_address, amount, mirror);
591 // This subtraction in this order is important for overlapping copies. 616 // This subtraction in this order is important for overlapping copies.
@@ -677,6 +702,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
677 } 702 }
678 BindHostVertexBuffers(); 703 BindHostVertexBuffers();
679 BindHostTransformFeedbackBuffers(); 704 BindHostTransformFeedbackBuffers();
705 if (current_draw_indirect) {
706 BindHostDrawIndirectBuffers();
707 }
680} 708}
681 709
682template <class P> 710template <class P>
@@ -796,7 +824,8 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
796template <class P> 824template <class P>
797std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, 825std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
798 bool synchronize, 826 bool synchronize,
799 bool mark_as_written) { 827 bool mark_as_written,
828 bool discard_downloads) {
800 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 829 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
801 if (!cpu_addr) { 830 if (!cpu_addr) {
802 return {&slot_buffers[NULL_BUFFER_ID], 0}; 831 return {&slot_buffers[NULL_BUFFER_ID], 0};
@@ -804,11 +833,17 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
804 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 833 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
805 Buffer& buffer = slot_buffers[buffer_id]; 834 Buffer& buffer = slot_buffers[buffer_id];
806 if (synchronize) { 835 if (synchronize) {
807 SynchronizeBuffer(buffer, *cpu_addr, size); 836 // SynchronizeBuffer(buffer, *cpu_addr, size);
837 SynchronizeBufferNoModified(buffer, *cpu_addr, size);
808 } 838 }
809 if (mark_as_written) { 839 if (mark_as_written) {
810 MarkWrittenBuffer(buffer_id, *cpu_addr, size); 840 MarkWrittenBuffer(buffer_id, *cpu_addr, size);
811 } 841 }
842 if (discard_downloads) {
843 IntervalType interval{*cpu_addr, size};
844 ClearDownload(interval);
845 discarded_ranges.subtract(interval);
846 }
812 return {&buffer, buffer.Offset(*cpu_addr)}; 847 return {&buffer, buffer.Offset(*cpu_addr)};
813} 848}
814 849
@@ -827,10 +862,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
827 862
828template <class P> 863template <class P>
829void BufferCache<P>::AccumulateFlushes() { 864void BufferCache<P>::AccumulateFlushes() {
830 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
831 uncommitted_ranges.clear();
832 return;
833 }
834 if (uncommitted_ranges.empty()) { 865 if (uncommitted_ranges.empty()) {
835 return; 866 return;
836 } 867 }
@@ -845,12 +876,15 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
845template <class P> 876template <class P>
846void BufferCache<P>::CommitAsyncFlushesHigh() { 877void BufferCache<P>::CommitAsyncFlushesHigh() {
847 AccumulateFlushes(); 878 AccumulateFlushes();
879
880 for (const auto& interval : discarded_ranges) {
881 common_ranges.subtract(interval);
882 }
883
848 if (committed_ranges.empty()) { 884 if (committed_ranges.empty()) {
849 return; 885 return;
850 } 886 }
851 MICROPROFILE_SCOPE(GPU_DownloadMemory); 887 MICROPROFILE_SCOPE(GPU_DownloadMemory);
852 const bool is_accuracy_normal =
853 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
854 888
855 auto it = committed_ranges.begin(); 889 auto it = committed_ranges.begin();
856 while (it != committed_ranges.end()) { 890 while (it != committed_ranges.end()) {
@@ -875,9 +909,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
875 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 909 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
876 buffer.ForEachDownloadRangeAndClear( 910 buffer.ForEachDownloadRangeAndClear(
877 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 911 cpu_addr, size, [&](u64 range_offset, u64 range_size) {
878 if (is_accuracy_normal) {
879 return;
880 }
881 const VAddr buffer_addr = buffer.CpuAddr(); 912 const VAddr buffer_addr = buffer.CpuAddr();
882 const auto add_download = [&](VAddr start, VAddr end) { 913 const auto add_download = [&](VAddr start, VAddr end) {
883 const u64 new_offset = start - buffer_addr; 914 const u64 new_offset = start - buffer_addr;
@@ -891,7 +922,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
891 buffer_id, 922 buffer_id,
892 }); 923 });
893 // Align up to avoid cache conflicts 924 // Align up to avoid cache conflicts
894 constexpr u64 align = 256ULL; 925 constexpr u64 align = 8ULL;
895 constexpr u64 mask = ~(align - 1ULL); 926 constexpr u64 mask = ~(align - 1ULL);
896 total_size_bytes += (new_size + align - 1) & mask; 927 total_size_bytes += (new_size + align - 1) & mask;
897 largest_copy = std::max(largest_copy, new_size); 928 largest_copy = std::max(largest_copy, new_size);
@@ -942,12 +973,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
942 973
943template <class P> 974template <class P>
944void BufferCache<P>::CommitAsyncFlushes() { 975void BufferCache<P>::CommitAsyncFlushes() {
945 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 976 CommitAsyncFlushesHigh();
946 CommitAsyncFlushesHigh();
947 } else {
948 uncommitted_ranges.clear();
949 committed_ranges.clear();
950 }
951} 977}
952 978
953template <class P> 979template <class P>
@@ -1064,6 +1090,19 @@ void BufferCache<P>::BindHostVertexBuffers() {
1064} 1090}
1065 1091
1066template <class P> 1092template <class P>
1093void BufferCache<P>::BindHostDrawIndirectBuffers() {
1094 const auto bind_buffer = [this](const Binding& binding) {
1095 Buffer& buffer = slot_buffers[binding.buffer_id];
1096 TouchBuffer(buffer, binding.buffer_id);
1097 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
1098 };
1099 if (current_draw_indirect->include_count) {
1100 bind_buffer(count_buffer_binding);
1101 }
1102 bind_buffer(indirect_buffer_binding);
1103}
1104
1105template <class P>
1067void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { 1106void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
1068 u32 dirty = ~0U; 1107 u32 dirty = ~0U;
1069 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1108 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
@@ -1294,6 +1333,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1294 UpdateStorageBuffers(stage); 1333 UpdateStorageBuffers(stage);
1295 UpdateTextureBuffers(stage); 1334 UpdateTextureBuffers(stage);
1296 } 1335 }
1336 if (current_draw_indirect) {
1337 UpdateDrawIndirect();
1338 }
1297 } while (has_deleted_buffers); 1339 } while (has_deleted_buffers);
1298} 1340}
1299 1341
@@ -1384,6 +1426,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1384} 1426}
1385 1427
1386template <class P> 1428template <class P>
1429void BufferCache<P>::UpdateDrawIndirect() {
1430 const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) {
1431 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1432 if (!cpu_addr) {
1433 binding = NULL_BINDING;
1434 return;
1435 }
1436 binding = Binding{
1437 .cpu_addr = *cpu_addr,
1438 .size = static_cast<u32>(size),
1439 .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
1440 };
1441 };
1442 if (current_draw_indirect->include_count) {
1443 update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
1444 }
1445 update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
1446 indirect_buffer_binding);
1447}
1448
1449template <class P>
1387void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1450void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1388 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { 1451 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
1389 Binding& binding = uniform_buffers[stage][index]; 1452 Binding& binding = uniform_buffers[stage][index];
@@ -1705,6 +1768,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1705} 1768}
1706 1769
1707template <class P> 1770template <class P>
1771bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1772 boost::container::small_vector<BufferCopy, 4> copies;
1773 u64 total_size_bytes = 0;
1774 u64 largest_copy = 0;
1775 IntervalSet found_sets{};
1776 auto make_copies = [&] {
1777 for (auto& interval : found_sets) {
1778 const std::size_t sub_size = interval.upper() - interval.lower();
1779 const VAddr cpu_addr = interval.lower();
1780 copies.push_back(BufferCopy{
1781 .src_offset = total_size_bytes,
1782 .dst_offset = cpu_addr - buffer.CpuAddr(),
1783 .size = sub_size,
1784 });
1785 total_size_bytes += sub_size;
1786 largest_copy = std::max(largest_copy, sub_size);
1787 }
1788 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1789 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1790 };
1791 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1792 const VAddr base_adr = buffer.CpuAddr() + range_offset;
1793 const VAddr end_adr = base_adr + range_size;
1794 const IntervalType add_interval{base_adr, end_adr};
1795 found_sets.add(add_interval);
1796 });
1797 if (found_sets.empty()) {
1798 return true;
1799 }
1800 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1801 auto it = common_ranges.lower_bound(search_interval);
1802 auto it_end = common_ranges.upper_bound(search_interval);
1803 if (it == common_ranges.end()) {
1804 make_copies();
1805 return false;
1806 }
1807 while (it != it_end) {
1808 found_sets.subtract(*it);
1809 it++;
1810 }
1811 make_copies();
1812 return false;
1813}
1814
1815template <class P>
1708void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1816void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1709 std::span<BufferCopy> copies) { 1817 std::span<BufferCopy> copies) {
1710 if constexpr (USE_MEMORY_MAPS) { 1818 if constexpr (USE_MEMORY_MAPS) {
@@ -1963,4 +2071,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
1963 } 2071 }
1964} 2072}
1965 2073
2074template <class P>
2075std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
2076 auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
2077 return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
2078}
2079
2080template <class P>
2081std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
2082 auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
2083 return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
2084}
2085
1966} // namespace VideoCommon 2086} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index eb1371612..13ff64fa3 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -97,6 +97,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
97 if (dma_state.non_incrementing) { 97 if (dma_state.non_incrementing) {
98 const u32 max_write = static_cast<u32>( 98 const u32 max_write = static_cast<u32>(
99 std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); 99 std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
100 dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
100 CallMultiMethod(&command_header.argument, max_write); 101 CallMultiMethod(&command_header.argument, max_write);
101 dma_state.method_count -= max_write; 102 dma_state.method_count -= max_write;
102 dma_state.is_last_call = true; 103 dma_state.is_last_call = true;
@@ -175,7 +176,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
175 dma_state.method_count); 176 dma_state.method_count);
176 } else { 177 } else {
177 auto subchannel = subchannels[dma_state.subchannel]; 178 auto subchannel = subchannels[dma_state.subchannel];
178 subchannel->current_dma_segment = dma_state.dma_get; 179 subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
179 subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, 180 subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
180 dma_state.method_count); 181 dma_state.method_count);
181 } 182 }
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index ca0899ba7..da7728ded 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -157,6 +157,7 @@ private:
157 u32 method_count; ///< Current method count 157 u32 method_count; ///< Current method count
158 u32 length_pending; ///< Large NI command length pending 158 u32 length_pending; ///< Large NI command length pending
159 GPUVAddr dma_get; ///< Currently read segment 159 GPUVAddr dma_get; ///< Currently read segment
160 u32 dma_word_offset; ///< Current word ofset from address
160 bool non_incrementing; ///< Current command's NI flag 161 bool non_incrementing; ///< Current command's NI flag
161 bool is_last_call; 162 bool is_last_call;
162 }; 163 };
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 4fa77b684..c60f32aad 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -216,7 +216,7 @@ void DrawManager::ProcessDrawIndirect(bool draw_indexed) {
216 UpdateTopology(); 216 UpdateTopology();
217 217
218 if (maxwell3d->ShouldExecute()) { 218 if (maxwell3d->ShouldExecute()) {
219 maxwell3d->rasterizer->DrawIndirect(draw_indexed); 219 maxwell3d->rasterizer->DrawIndirect();
220 } 220 }
221} 221}
222} // namespace Tegra::Engines 222} // namespace Tegra::Engines
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 0cdb37f83..437990162 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -33,7 +33,10 @@ public:
33 }; 33 };
34 34
35 struct IndirectParams { 35 struct IndirectParams {
36 GPUVAddr start_address; 36 bool is_indexed;
37 bool include_count;
38 GPUVAddr count_start_address;
39 GPUVAddr indirect_start_address;
37 size_t buffer_size; 40 size_t buffer_size;
38 size_t max_draw_counts; 41 size_t max_draw_counts;
39 size_t stride; 42 size_t stride;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9b182b653..cd6274a9b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -130,11 +130,15 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
130 } 130 }
131 131
132 macro_params.insert(macro_params.end(), base_start, base_start + amount); 132 macro_params.insert(macro_params.end(), base_start, base_start + amount);
133 for (size_t i = 0; i < amount; i++) {
134 macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
135 }
133 136
134 // Call the macro when there are no more parameters in the command buffer 137 // Call the macro when there are no more parameters in the command buffer
135 if (is_last_call) { 138 if (is_last_call) {
136 CallMacroMethod(executing_macro, macro_params); 139 CallMacroMethod(executing_macro, macro_params);
137 macro_params.clear(); 140 macro_params.clear();
141 macro_addresses.clear();
138 } 142 }
139} 143}
140 144
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 22b904319..ac5e87563 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3066,6 +3066,15 @@ public:
3066 3066
3067 std::unique_ptr<DrawManager> draw_manager; 3067 std::unique_ptr<DrawManager> draw_manager;
3068 friend class DrawManager; 3068 friend class DrawManager;
3069
3070 std::vector<u8> inline_index_draw_indexes;
3071 std::vector<GPUVAddr> macro_addresses;
3072
3073 Core::System& system;
3074 MemoryManager& memory_manager;
3075
3076 /// Handles a write to the CLEAR_BUFFERS register.
3077 void ProcessClearBuffers(u32 layer_count);
3069 3078
3070private: 3079private:
3071 void InitializeRegisterDefaults(); 3080 void InitializeRegisterDefaults();
@@ -3126,9 +3135,6 @@ private:
3126 /// Returns a query's value or an empty object if the value will be deferred through a cache. 3135 /// Returns a query's value or an empty object if the value will be deferred through a cache.
3127 std::optional<u64> GetQueryResult(); 3136 std::optional<u64> GetQueryResult();
3128 3137
3129 Core::System& system;
3130 MemoryManager& memory_manager;
3131
3132 VideoCore::RasterizerInterface* rasterizer = nullptr; 3138 VideoCore::RasterizerInterface* rasterizer = nullptr;
3133 3139
3134 /// Start offsets of each macro in macro_memory 3140 /// Start offsets of each macro in macro_memory
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 1cc202cc7..da988cc0d 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -9,6 +9,7 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/macro/macro.h" 10#include "video_core/macro/macro.h"
11#include "video_core/macro/macro_hle.h" 11#include "video_core/macro/macro_hle.h"
12#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
13 14
14namespace Tegra { 15namespace Tegra {
@@ -24,15 +25,14 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
24 parameters[4], parameters[1], parameters[3], parameters[5], instance_count); 25 parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
25} 26}
26 27
27void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 28void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
28 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); 29 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
29 maxwell3d.draw_manager->DrawArray( 30 maxwell3d.draw_manager->DrawArray(
30 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 31 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
31 parameters[3], parameters[1], parameters[4], instance_count); 32 parameters[3], parameters[1], parameters[4], instance_count);
32} 33}
33 34
34void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 35void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
35 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
36 const u32 element_base = parameters[4]; 36 const u32 element_base = parameters[4];
37 const u32 base_instance = parameters[5]; 37 const u32 base_instance = parameters[5];
38 maxwell3d.regs.vertex_id_base = element_base; 38 maxwell3d.regs.vertex_id_base = element_base;
@@ -41,9 +41,18 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
41 maxwell3d.CallMethod(0x8e4, element_base, true); 41 maxwell3d.CallMethod(0x8e4, element_base, true);
42 maxwell3d.CallMethod(0x8e5, base_instance, true); 42 maxwell3d.CallMethod(0x8e5, base_instance, true);
43 43
44 maxwell3d.draw_manager->DrawIndex( 44 auto& params = maxwell3d.draw_manager->GetIndirectParams();
45 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 45 params.is_indexed = true;
46 parameters[3], parameters[1], element_base, base_instance, instance_count); 46 params.include_count = false;
47 params.count_start_address = 0;
48 params.indirect_start_address = maxwell3d.macro_addresses[1];
49 params.buffer_size = 5 * sizeof(u32);
50 params.max_draw_counts = 1;
51 params.stride = 0;
52
53 maxwell3d.draw_manager->DrawIndexedIndirect(
54 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 0,
55 1U << 18);
47 56
48 maxwell3d.regs.vertex_id_base = 0x0; 57 maxwell3d.regs.vertex_id_base = 0x0;
49 maxwell3d.CallMethod(0x8e3, 0x640, true); 58 maxwell3d.CallMethod(0x8e3, 0x640, true);
@@ -51,8 +60,9 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
51 maxwell3d.CallMethod(0x8e5, 0x0, true); 60 maxwell3d.CallMethod(0x8e5, 0x0, true);
52} 61}
53 62
54// Multidraw Indirect 63// Multidraw Indixed Indirect
55void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 64void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d,
65 const std::vector<u32>& parameters) {
56 const u32 start_indirect = parameters[0]; 66 const u32 start_indirect = parameters[0];
57 const u32 end_indirect = parameters[1]; 67 const u32 end_indirect = parameters[1];
58 if (start_indirect >= end_indirect) { 68 if (start_indirect >= end_indirect) {
@@ -66,7 +76,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
66 // size of each indirect segment 76 // size of each indirect segment
67 const u32 indirect_words = 5 + padding; 77 const u32 indirect_words = 5 + padding;
68 const u32 stride = indirect_words * sizeof(u32); 78 const u32 stride = indirect_words * sizeof(u32);
69 const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32);
70 const std::size_t draw_count = end_indirect - start_indirect; 79 const std::size_t draw_count = end_indirect - start_indirect;
71 u32 lowest_first = std::numeric_limits<u32>::max(); 80 u32 lowest_first = std::numeric_limits<u32>::max();
72 u32 highest_limit = std::numeric_limits<u32>::min(); 81 u32 highest_limit = std::numeric_limits<u32>::min();
@@ -80,12 +89,16 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
80 89
81 const u32 base_vertex = parameters[8]; 90 const u32 base_vertex = parameters[8];
82 const u32 base_instance = parameters[9]; 91 const u32 base_instance = parameters[9];
92 maxwell3d.regs.vertex_id_base = base_vertex;
83 maxwell3d.CallMethod(0x8e3, 0x640, true); 93 maxwell3d.CallMethod(0x8e3, 0x640, true);
84 maxwell3d.CallMethod(0x8e4, base_vertex, true); 94 maxwell3d.CallMethod(0x8e4, base_vertex, true);
85 maxwell3d.CallMethod(0x8e5, base_instance, true); 95 maxwell3d.CallMethod(0x8e5, base_instance, true);
86 auto& params = maxwell3d.draw_manager->GetIndirectParams(); 96 auto& params = maxwell3d.draw_manager->GetIndirectParams();
87 params.start_address = start_address; 97 params.is_indexed = true;
88 params.buffer_size = sizeof(u32) + stride * draw_count; 98 params.include_count = true;
99 params.count_start_address = maxwell3d.macro_addresses[4];
100 params.indirect_start_address = maxwell3d.macro_addresses[5];
101 params.buffer_size = stride * draw_count;
89 params.max_draw_counts = draw_count; 102 params.max_draw_counts = draw_count;
90 params.stride = stride; 103 params.stride = stride;
91 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 104 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
@@ -93,7 +106,7 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
93} 106}
94 107
95// Multi-layer Clear 108// Multi-layer Clear
96void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 109void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
97 ASSERT(parameters.size() == 1); 110 ASSERT(parameters.size() == 1);
98 111
99 const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; 112 const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
@@ -107,10 +120,10 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
107 120
108constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{ 121constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
109 {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, 122 {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
110 {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, 123 {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect},
111 {0x0217920100488FF7, &HLE_0217920100488FF7}, 124 {0x0217920100488FF7, &HLE_DrawIndexedIndirect},
112 {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164}, 125 {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect},
113 {0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B}, 126 {0xEAD26C3E2109B06B, &HLE_MultiLayerClear},
114}}; 127}};
115 128
116class HLEMacroImpl final : public CachedMacro { 129class HLEMacroImpl final : public CachedMacro {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a2a651f34..641b95c7c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -43,7 +43,7 @@ public:
43 virtual void Draw(bool is_indexed, u32 instance_count) = 0; 43 virtual void Draw(bool is_indexed, u32 instance_count) = 0;
44 44
45 /// Dispatches an indirect draw invocation 45 /// Dispatches an indirect draw invocation
46 virtual void DrawIndirect(bool is_indexed) {} 46 virtual void DrawIndirect() {}
47 47
48 /// Clear the current framebuffer 48 /// Clear the current framebuffer
49 virtual void Clear(u32 layer_count) = 0; 49 virtual void Clear(u32 layer_count) = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 6b54d7111..487d8b416 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
56 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 56 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
57 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | 57 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
58 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 58 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
59 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; 59 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
60 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
60 if (device.IsExtTransformFeedbackSupported()) { 61 if (device.IsExtTransformFeedbackSupported()) {
61 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; 62 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
62 } 63 }
@@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() {
516 if (device.IsExtTransformFeedbackSupported()) { 517 if (device.IsExtTransformFeedbackSupported()) {
517 create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; 518 create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
518 } 519 }
520 create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
519 null_buffer = device.GetLogical().CreateBuffer(create_info); 521 null_buffer = device.GetLogical().CreateBuffer(create_info);
520 if (device.HasDebuggingToolAttached()) { 522 if (device.HasDebuggingToolAttached()) {
521 null_buffer.SetObjectNameEXT("Null buffer"); 523 null_buffer.SetObjectNameEXT("Null buffer");
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 9b75f33dd..6f1adc97f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -225,25 +225,40 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
225 }); 225 });
226} 226}
227 227
228void RasterizerVulkan::DrawIndirect(bool is_indexed) { 228void RasterizerVulkan::DrawIndirect() {
229 PrepareDraw(is_indexed, [this, is_indexed] { 229 const auto& params = maxwell3d->draw_manager->GetIndirectParams();
230 const auto params = maxwell3d->draw_manager->GetIndirectParams(); 230 buffer_cache.SetDrawIndirect(&params);
231 const auto [buffer, offset] = buffer_cache.ObtainBuffer( 231 PrepareDraw(params.is_indexed, [this, &params] {
232 params.start_address, static_cast<u32>(params.buffer_size), true, false); 232 const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
233 scheduler.Record([buffer_obj = buffer->Handle(), offset, 233 if (params.include_count) {
234 max_draw_counts = params.max_draw_counts, stride = params.stride, 234 const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
235 is_indexed](vk::CommandBuffer cmdbuf) { 235 scheduler.Record([draw_buffer_obj = draw_buffer->Handle(),
236 if (is_indexed) { 236 buffer_obj = buffer->Handle(), offset_base, offset,
237 cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, 237 params](vk::CommandBuffer cmdbuf) {
238 static_cast<u32>(max_draw_counts), 238 if (params.is_indexed) {
239 static_cast<u32>(stride)); 239 cmdbuf.DrawIndexedIndirectCount(
240 buffer_obj, offset, draw_buffer_obj, offset_base,
241 static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride));
242 } else {
243 cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base,
244 static_cast<u32>(params.max_draw_counts),
245 static_cast<u32>(params.stride));
246 }
247 });
248 return;
249 }
250 scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
251 if (params.is_indexed) {
252 cmdbuf.DrawIndexedIndirect(buffer_obj, offset,
253 static_cast<u32>(params.max_draw_counts),
254 static_cast<u32>(params.stride));
240 } else { 255 } else {
241 cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, 256 cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts),
242 static_cast<u32>(max_draw_counts), 257 static_cast<u32>(params.stride));
243 static_cast<u32>(stride));
244 } 258 }
245 }); 259 });
246 }); 260 });
261 buffer_cache.SetDrawIndirect(nullptr);
247} 262}
248 263
249void RasterizerVulkan::Clear(u32 layer_count) { 264void RasterizerVulkan::Clear(u32 layer_count) {
@@ -425,9 +440,6 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
425 440
426bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { 441bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
427 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; 442 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
428 if (!Settings::IsGPULevelHigh()) {
429 return buffer_cache.IsRegionGpuModified(addr, size);
430 }
431 return texture_cache.IsRegionGpuModified(addr, size) || 443 return texture_cache.IsRegionGpuModified(addr, size) ||
432 buffer_cache.IsRegionGpuModified(addr, size); 444 buffer_cache.IsRegionGpuModified(addr, size);
433} 445}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index bc43a8a1f..43a210da0 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -65,7 +65,7 @@ public:
65 ~RasterizerVulkan() override; 65 ~RasterizerVulkan() override;
66 66
67 void Draw(bool is_indexed, u32 instance_count) override; 67 void Draw(bool is_indexed, u32 instance_count) override;
68 void DrawIndirect(bool is_indexed) override; 68 void DrawIndirect() override;
69 void Clear(u32 layer_count) override; 69 void Clear(u32 layer_count) override;
70 void DispatchCompute() override; 70 void DispatchCompute() override;
71 void ResetCounter(VideoCore::QueryType type) override; 71 void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 477fc428b..207fae8c9 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -351,7 +351,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
351 .dualSrcBlend = true, 351 .dualSrcBlend = true,
352 .logicOp = true, 352 .logicOp = true,
353 .multiDrawIndirect = true, 353 .multiDrawIndirect = true,
354 .drawIndirectFirstInstance = false, 354 .drawIndirectFirstInstance = true,
355 .depthClamp = true, 355 .depthClamp = true,
356 .depthBiasClamp = true, 356 .depthBiasClamp = true,
357 .fillModeNonSolid = true, 357 .fillModeNonSolid = true,
@@ -1024,6 +1024,8 @@ void Device::CheckSuitability(bool requires_swapchain) const {
1024 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 1024 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
1025 std::make_pair(features.imageCubeArray, "imageCubeArray"), 1025 std::make_pair(features.imageCubeArray, "imageCubeArray"),
1026 std::make_pair(features.independentBlend, "independentBlend"), 1026 std::make_pair(features.independentBlend, "independentBlend"),
1027 std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"),
1028 std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"),
1027 std::make_pair(features.depthClamp, "depthClamp"), 1029 std::make_pair(features.depthClamp, "depthClamp"),
1028 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), 1030 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
1029 std::make_pair(features.largePoints, "largePoints"), 1031 std::make_pair(features.largePoints, "largePoints"),
@@ -1117,6 +1119,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
1117 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); 1119 test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
1118 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); 1120 test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
1119 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 1121 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
1122 test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true);
1120 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 1123 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
1121 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 1124 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
1122 test(has_ext_primitive_topology_list_restart, 1125 test(has_ext_primitive_topology_list_restart,
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 6a26c4e6e..d0d7c2299 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -451,6 +451,7 @@ private:
451 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 451 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
452 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. 452 bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2.
453 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. 453 bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough.
454 bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count.
454 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. 455 bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
455 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. 456 bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
456 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. 457 bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index c58c4c1c4..f8f8ed9f8 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -94,8 +94,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
94 X(vkCmdDispatch); 94 X(vkCmdDispatch);
95 X(vkCmdDraw); 95 X(vkCmdDraw);
96 X(vkCmdDrawIndexed); 96 X(vkCmdDrawIndexed);
97 X(vkCmdDrawIndirectCount); 97 X(vkCmdDrawIndirect);
98 X(vkCmdDrawIndexedIndirectCount); 98 X(vkCmdDrawIndexedIndirect);
99 X(vkCmdDrawIndirectCountKHR);
100 X(vkCmdDrawIndexedIndirectCountKHR);
99 X(vkCmdEndQuery); 101 X(vkCmdEndQuery);
100 X(vkCmdEndRenderPass); 102 X(vkCmdEndRenderPass);
101 X(vkCmdEndTransformFeedbackEXT); 103 X(vkCmdEndTransformFeedbackEXT);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 9bd158dce..493a48573 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -213,8 +213,10 @@ struct DeviceDispatch : InstanceDispatch {
213 PFN_vkCmdDispatch vkCmdDispatch{}; 213 PFN_vkCmdDispatch vkCmdDispatch{};
214 PFN_vkCmdDraw vkCmdDraw{}; 214 PFN_vkCmdDraw vkCmdDraw{};
215 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; 215 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
216 PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; 216 PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
217 PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; 217 PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
218 PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{};
219 PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{};
218 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; 220 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
219 PFN_vkCmdEndQuery vkCmdEndQuery{}; 221 PFN_vkCmdEndQuery vkCmdEndQuery{};
220 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; 222 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
@@ -1021,17 +1023,27 @@ public:
1021 first_instance); 1023 first_instance);
1022 } 1024 }
1023 1025
1026 void DrawIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count,
1027 u32 stride) const noexcept {
1028 dld->vkCmdDrawIndirect(handle, src_buffer, src_offset, draw_count, stride);
1029 }
1030
1031 void DrawIndexedIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count,
1032 u32 stride) const noexcept {
1033 dld->vkCmdDrawIndexedIndirect(handle, src_buffer, src_offset, draw_count, stride);
1034 }
1035
1024 void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, 1036 void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer,
1025 VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { 1037 VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept {
1026 dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, 1038 dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset,
1027 draw_count, stride); 1039 draw_count, stride);
1028 } 1040 }
1029 1041
1030 void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, 1042 void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset,
1031 VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, 1043 VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count,
1032 u32 stride) const noexcept { 1044 u32 stride) const noexcept {
1033 dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, 1045 dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer,
1034 count_offset, draw_count, stride); 1046 count_offset, draw_count, stride);
1035 } 1047 }
1036 1048
1037 void ClearAttachments(Span<VkClearAttachment> attachments, 1049 void ClearAttachments(Span<VkClearAttachment> attachments,