diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 160 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/draw_manager.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/draw_manager.h | 5 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 12 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 45 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 1 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.h | 24 |
16 files changed, 252 insertions, 72 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 99abe0edf..557227b37 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -171,7 +171,9 @@ public: | |||
| 171 | bool is_written, bool is_image); | 171 | bool is_written, bool is_image); |
| 172 | 172 | ||
| 173 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 173 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 174 | bool synchronize, bool mark_as_written); | 174 | bool synchronize = true, |
| 175 | bool mark_as_written = false, | ||
| 176 | bool discard_downloads = false); | ||
| 175 | 177 | ||
| 176 | void FlushCachedWrites(); | 178 | void FlushCachedWrites(); |
| 177 | 179 | ||
| @@ -203,6 +205,14 @@ public: | |||
| 203 | /// Return true when a CPU region is modified from the CPU | 205 | /// Return true when a CPU region is modified from the CPU |
| 204 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 206 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |
| 205 | 207 | ||
| 208 | void SetDrawIndirect(const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 209 | current_draw_indirect = current_draw_indirect_; | ||
| 210 | } | ||
| 211 | |||
| 212 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 213 | |||
| 214 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 215 | |||
| 206 | std::mutex mutex; | 216 | std::mutex mutex; |
| 207 | Runtime& runtime; | 217 | Runtime& runtime; |
| 208 | 218 | ||
| @@ -275,6 +285,8 @@ private: | |||
| 275 | 285 | ||
| 276 | void BindHostVertexBuffers(); | 286 | void BindHostVertexBuffers(); |
| 277 | 287 | ||
| 288 | void BindHostDrawIndirectBuffers(); | ||
| 289 | |||
| 278 | void BindHostGraphicsUniformBuffers(size_t stage); | 290 | void BindHostGraphicsUniformBuffers(size_t stage); |
| 279 | 291 | ||
| 280 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | 292 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); |
| @@ -301,6 +313,8 @@ private: | |||
| 301 | 313 | ||
| 302 | void UpdateVertexBuffer(u32 index); | 314 | void UpdateVertexBuffer(u32 index); |
| 303 | 315 | ||
| 316 | void UpdateDrawIndirect(); | ||
| 317 | |||
| 304 | void UpdateUniformBuffers(size_t stage); | 318 | void UpdateUniformBuffers(size_t stage); |
| 305 | 319 | ||
| 306 | void UpdateStorageBuffers(size_t stage); | 320 | void UpdateStorageBuffers(size_t stage); |
| @@ -340,6 +354,8 @@ private: | |||
| 340 | 354 | ||
| 341 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 355 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 342 | 356 | ||
| 357 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 358 | |||
| 343 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 359 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 344 | std::span<BufferCopy> copies); | 360 | std::span<BufferCopy> copies); |
| 345 | 361 | ||
| @@ -375,6 +391,8 @@ private: | |||
| 375 | SlotVector<Buffer> slot_buffers; | 391 | SlotVector<Buffer> slot_buffers; |
| 376 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 392 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| 377 | 393 | ||
| 394 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 395 | |||
| 378 | u32 last_index_count = 0; | 396 | u32 last_index_count = 0; |
| 379 | 397 | ||
| 380 | Binding index_buffer; | 398 | Binding index_buffer; |
| @@ -383,6 +401,8 @@ private: | |||
| 383 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | 401 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; |
| 384 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | 402 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; |
| 385 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | 403 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; |
| 404 | Binding count_buffer_binding; | ||
| 405 | Binding indirect_buffer_binding; | ||
| 386 | 406 | ||
| 387 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | 407 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; |
| 388 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | 408 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; |
| @@ -422,6 +442,7 @@ private: | |||
| 422 | 442 | ||
| 423 | std::vector<BufferId> cached_write_buffer_ids; | 443 | std::vector<BufferId> cached_write_buffer_ids; |
| 424 | 444 | ||
| 445 | IntervalSet discarded_ranges; | ||
| 425 | IntervalSet uncommitted_ranges; | 446 | IntervalSet uncommitted_ranges; |
| 426 | IntervalSet common_ranges; | 447 | IntervalSet common_ranges; |
| 427 | std::deque<IntervalSet> committed_ranges; | 448 | std::deque<IntervalSet> committed_ranges; |
| @@ -579,13 +600,17 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 579 | }}; | 600 | }}; |
| 580 | 601 | ||
| 581 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 602 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 603 | const bool is_high_accuracy = | ||
| 604 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 582 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 605 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { |
| 583 | const u64 size = base_address_end - base_address; | 606 | const u64 size = base_address_end - base_address; |
| 584 | const VAddr diff = base_address - *cpu_src_address; | 607 | const VAddr diff = base_address - *cpu_src_address; |
| 585 | const VAddr new_base_address = *cpu_dest_address + diff; | 608 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 586 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 609 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 587 | uncommitted_ranges.add(add_interval); | ||
| 588 | tmp_intervals.push_back(add_interval); | 610 | tmp_intervals.push_back(add_interval); |
| 611 | if (is_high_accuracy) { | ||
| 612 | uncommitted_ranges.add(add_interval); | ||
| 613 | } | ||
| 589 | }; | 614 | }; |
| 590 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 615 | ForEachWrittenRange(*cpu_src_address, amount, mirror); |
| 591 | // This subtraction in this order is important for overlapping copies. | 616 | // This subtraction in this order is important for overlapping copies. |
| @@ -677,6 +702,9 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | |||
| 677 | } | 702 | } |
| 678 | BindHostVertexBuffers(); | 703 | BindHostVertexBuffers(); |
| 679 | BindHostTransformFeedbackBuffers(); | 704 | BindHostTransformFeedbackBuffers(); |
| 705 | if (current_draw_indirect) { | ||
| 706 | BindHostDrawIndirectBuffers(); | ||
| 707 | } | ||
| 680 | } | 708 | } |
| 681 | 709 | ||
| 682 | template <class P> | 710 | template <class P> |
| @@ -796,7 +824,8 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 796 | template <class P> | 824 | template <class P> |
| 797 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 825 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 798 | bool synchronize, | 826 | bool synchronize, |
| 799 | bool mark_as_written) { | 827 | bool mark_as_written, |
| 828 | bool discard_downloads) { | ||
| 800 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 829 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 801 | if (!cpu_addr) { | 830 | if (!cpu_addr) { |
| 802 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | 831 | return {&slot_buffers[NULL_BUFFER_ID], 0}; |
| @@ -804,11 +833,17 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 804 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 833 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); |
| 805 | Buffer& buffer = slot_buffers[buffer_id]; | 834 | Buffer& buffer = slot_buffers[buffer_id]; |
| 806 | if (synchronize) { | 835 | if (synchronize) { |
| 807 | SynchronizeBuffer(buffer, *cpu_addr, size); | 836 | // SynchronizeBuffer(buffer, *cpu_addr, size); |
| 837 | SynchronizeBufferNoModified(buffer, *cpu_addr, size); | ||
| 808 | } | 838 | } |
| 809 | if (mark_as_written) { | 839 | if (mark_as_written) { |
| 810 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | 840 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); |
| 811 | } | 841 | } |
| 842 | if (discard_downloads) { | ||
| 843 | IntervalType interval{*cpu_addr, size}; | ||
| 844 | ClearDownload(interval); | ||
| 845 | discarded_ranges.subtract(interval); | ||
| 846 | } | ||
| 812 | return {&buffer, buffer.Offset(*cpu_addr)}; | 847 | return {&buffer, buffer.Offset(*cpu_addr)}; |
| 813 | } | 848 | } |
| 814 | 849 | ||
| @@ -827,10 +862,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 827 | 862 | ||
| 828 | template <class P> | 863 | template <class P> |
| 829 | void BufferCache<P>::AccumulateFlushes() { | 864 | void BufferCache<P>::AccumulateFlushes() { |
| 830 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 831 | uncommitted_ranges.clear(); | ||
| 832 | return; | ||
| 833 | } | ||
| 834 | if (uncommitted_ranges.empty()) { | 865 | if (uncommitted_ranges.empty()) { |
| 835 | return; | 866 | return; |
| 836 | } | 867 | } |
| @@ -845,12 +876,15 @@ bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | |||
| 845 | template <class P> | 876 | template <class P> |
| 846 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 877 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 847 | AccumulateFlushes(); | 878 | AccumulateFlushes(); |
| 879 | |||
| 880 | for (const auto& interval : discarded_ranges) { | ||
| 881 | common_ranges.subtract(interval); | ||
| 882 | } | ||
| 883 | |||
| 848 | if (committed_ranges.empty()) { | 884 | if (committed_ranges.empty()) { |
| 849 | return; | 885 | return; |
| 850 | } | 886 | } |
| 851 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 887 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 852 | const bool is_accuracy_normal = | ||
| 853 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 854 | 888 | ||
| 855 | auto it = committed_ranges.begin(); | 889 | auto it = committed_ranges.begin(); |
| 856 | while (it != committed_ranges.end()) { | 890 | while (it != committed_ranges.end()) { |
| @@ -875,9 +909,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 875 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 909 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 876 | buffer.ForEachDownloadRangeAndClear( | 910 | buffer.ForEachDownloadRangeAndClear( |
| 877 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 911 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
| 878 | if (is_accuracy_normal) { | ||
| 879 | return; | ||
| 880 | } | ||
| 881 | const VAddr buffer_addr = buffer.CpuAddr(); | 912 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 882 | const auto add_download = [&](VAddr start, VAddr end) { | 913 | const auto add_download = [&](VAddr start, VAddr end) { |
| 883 | const u64 new_offset = start - buffer_addr; | 914 | const u64 new_offset = start - buffer_addr; |
| @@ -891,7 +922,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 891 | buffer_id, | 922 | buffer_id, |
| 892 | }); | 923 | }); |
| 893 | // Align up to avoid cache conflicts | 924 | // Align up to avoid cache conflicts |
| 894 | constexpr u64 align = 256ULL; | 925 | constexpr u64 align = 8ULL; |
| 895 | constexpr u64 mask = ~(align - 1ULL); | 926 | constexpr u64 mask = ~(align - 1ULL); |
| 896 | total_size_bytes += (new_size + align - 1) & mask; | 927 | total_size_bytes += (new_size + align - 1) & mask; |
| 897 | largest_copy = std::max(largest_copy, new_size); | 928 | largest_copy = std::max(largest_copy, new_size); |
| @@ -942,12 +973,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 942 | 973 | ||
| 943 | template <class P> | 974 | template <class P> |
| 944 | void BufferCache<P>::CommitAsyncFlushes() { | 975 | void BufferCache<P>::CommitAsyncFlushes() { |
| 945 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | 976 | CommitAsyncFlushesHigh(); |
| 946 | CommitAsyncFlushesHigh(); | ||
| 947 | } else { | ||
| 948 | uncommitted_ranges.clear(); | ||
| 949 | committed_ranges.clear(); | ||
| 950 | } | ||
| 951 | } | 977 | } |
| 952 | 978 | ||
| 953 | template <class P> | 979 | template <class P> |
| @@ -1064,6 +1090,19 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 1064 | } | 1090 | } |
| 1065 | 1091 | ||
| 1066 | template <class P> | 1092 | template <class P> |
| 1093 | void BufferCache<P>::BindHostDrawIndirectBuffers() { | ||
| 1094 | const auto bind_buffer = [this](const Binding& binding) { | ||
| 1095 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 1096 | TouchBuffer(buffer, binding.buffer_id); | ||
| 1097 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | ||
| 1098 | }; | ||
| 1099 | if (current_draw_indirect->include_count) { | ||
| 1100 | bind_buffer(count_buffer_binding); | ||
| 1101 | } | ||
| 1102 | bind_buffer(indirect_buffer_binding); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | template <class P> | ||
| 1067 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | 1106 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { |
| 1068 | u32 dirty = ~0U; | 1107 | u32 dirty = ~0U; |
| 1069 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1108 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| @@ -1294,6 +1333,9 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | |||
| 1294 | UpdateStorageBuffers(stage); | 1333 | UpdateStorageBuffers(stage); |
| 1295 | UpdateTextureBuffers(stage); | 1334 | UpdateTextureBuffers(stage); |
| 1296 | } | 1335 | } |
| 1336 | if (current_draw_indirect) { | ||
| 1337 | UpdateDrawIndirect(); | ||
| 1338 | } | ||
| 1297 | } while (has_deleted_buffers); | 1339 | } while (has_deleted_buffers); |
| 1298 | } | 1340 | } |
| 1299 | 1341 | ||
| @@ -1384,6 +1426,27 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1384 | } | 1426 | } |
| 1385 | 1427 | ||
| 1386 | template <class P> | 1428 | template <class P> |
| 1429 | void BufferCache<P>::UpdateDrawIndirect() { | ||
| 1430 | const auto update = [this](GPUVAddr gpu_addr, size_t size, Binding& binding) { | ||
| 1431 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||
| 1432 | if (!cpu_addr) { | ||
| 1433 | binding = NULL_BINDING; | ||
| 1434 | return; | ||
| 1435 | } | ||
| 1436 | binding = Binding{ | ||
| 1437 | .cpu_addr = *cpu_addr, | ||
| 1438 | .size = static_cast<u32>(size), | ||
| 1439 | .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), | ||
| 1440 | }; | ||
| 1441 | }; | ||
| 1442 | if (current_draw_indirect->include_count) { | ||
| 1443 | update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); | ||
| 1444 | } | ||
| 1445 | update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, | ||
| 1446 | indirect_buffer_binding); | ||
| 1447 | } | ||
| 1448 | |||
| 1449 | template <class P> | ||
| 1387 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | 1450 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { |
| 1388 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { | 1451 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 1389 | Binding& binding = uniform_buffers[stage][index]; | 1452 | Binding& binding = uniform_buffers[stage][index]; |
| @@ -1705,6 +1768,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1705 | } | 1768 | } |
| 1706 | 1769 | ||
| 1707 | template <class P> | 1770 | template <class P> |
| 1771 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1772 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1773 | u64 total_size_bytes = 0; | ||
| 1774 | u64 largest_copy = 0; | ||
| 1775 | IntervalSet found_sets{}; | ||
| 1776 | auto make_copies = [&] { | ||
| 1777 | for (auto& interval : found_sets) { | ||
| 1778 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1779 | const VAddr cpu_addr = interval.lower(); | ||
| 1780 | copies.push_back(BufferCopy{ | ||
| 1781 | .src_offset = total_size_bytes, | ||
| 1782 | .dst_offset = cpu_addr - buffer.CpuAddr(), | ||
| 1783 | .size = sub_size, | ||
| 1784 | }); | ||
| 1785 | total_size_bytes += sub_size; | ||
| 1786 | largest_copy = std::max(largest_copy, sub_size); | ||
| 1787 | } | ||
| 1788 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1789 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1790 | }; | ||
| 1791 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1792 | const VAddr base_adr = buffer.CpuAddr() + range_offset; | ||
| 1793 | const VAddr end_adr = base_adr + range_size; | ||
| 1794 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1795 | found_sets.add(add_interval); | ||
| 1796 | }); | ||
| 1797 | if (found_sets.empty()) { | ||
| 1798 | return true; | ||
| 1799 | } | ||
| 1800 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1801 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1802 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1803 | if (it == common_ranges.end()) { | ||
| 1804 | make_copies(); | ||
| 1805 | return false; | ||
| 1806 | } | ||
| 1807 | while (it != it_end) { | ||
| 1808 | found_sets.subtract(*it); | ||
| 1809 | it++; | ||
| 1810 | } | ||
| 1811 | make_copies(); | ||
| 1812 | return false; | ||
| 1813 | } | ||
| 1814 | |||
| 1815 | template <class P> | ||
| 1708 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1816 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1709 | std::span<BufferCopy> copies) { | 1817 | std::span<BufferCopy> copies) { |
| 1710 | if constexpr (USE_MEMORY_MAPS) { | 1818 | if constexpr (USE_MEMORY_MAPS) { |
| @@ -1963,4 +2071,16 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) | |||
| 1963 | } | 2071 | } |
| 1964 | } | 2072 | } |
| 1965 | 2073 | ||
| 2074 | template <class P> | ||
| 2075 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { | ||
| 2076 | auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; | ||
| 2077 | return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | template <class P> | ||
| 2081 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { | ||
| 2082 | auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; | ||
| 2083 | return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); | ||
| 2084 | } | ||
| 2085 | |||
| 1966 | } // namespace VideoCommon | 2086 | } // namespace VideoCommon |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb1371612..13ff64fa3 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -97,6 +97,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) { | |||
| 97 | if (dma_state.non_incrementing) { | 97 | if (dma_state.non_incrementing) { |
| 98 | const u32 max_write = static_cast<u32>( | 98 | const u32 max_write = static_cast<u32>( |
| 99 | std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); | 99 | std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); |
| 100 | dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32)); | ||
| 100 | CallMultiMethod(&command_header.argument, max_write); | 101 | CallMultiMethod(&command_header.argument, max_write); |
| 101 | dma_state.method_count -= max_write; | 102 | dma_state.method_count -= max_write; |
| 102 | dma_state.is_last_call = true; | 103 | dma_state.is_last_call = true; |
| @@ -175,7 +176,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | |||
| 175 | dma_state.method_count); | 176 | dma_state.method_count); |
| 176 | } else { | 177 | } else { |
| 177 | auto subchannel = subchannels[dma_state.subchannel]; | 178 | auto subchannel = subchannels[dma_state.subchannel]; |
| 178 | subchannel->current_dma_segment = dma_state.dma_get; | 179 | subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; |
| 179 | subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, | 180 | subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, |
| 180 | dma_state.method_count); | 181 | dma_state.method_count); |
| 181 | } | 182 | } |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index ca0899ba7..da7728ded 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -157,6 +157,7 @@ private: | |||
| 157 | u32 method_count; ///< Current method count | 157 | u32 method_count; ///< Current method count |
| 158 | u32 length_pending; ///< Large NI command length pending | 158 | u32 length_pending; ///< Large NI command length pending |
| 159 | GPUVAddr dma_get; ///< Currently read segment | 159 | GPUVAddr dma_get; ///< Currently read segment |
| 160 | u32 dma_word_offset; ///< Current word ofset from address | ||
| 160 | bool non_incrementing; ///< Current command's NI flag | 161 | bool non_incrementing; ///< Current command's NI flag |
| 161 | bool is_last_call; | 162 | bool is_last_call; |
| 162 | }; | 163 | }; |
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 4fa77b684..c60f32aad 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp | |||
| @@ -216,7 +216,7 @@ void DrawManager::ProcessDrawIndirect(bool draw_indexed) { | |||
| 216 | UpdateTopology(); | 216 | UpdateTopology(); |
| 217 | 217 | ||
| 218 | if (maxwell3d->ShouldExecute()) { | 218 | if (maxwell3d->ShouldExecute()) { |
| 219 | maxwell3d->rasterizer->DrawIndirect(draw_indexed); | 219 | maxwell3d->rasterizer->DrawIndirect(); |
| 220 | } | 220 | } |
| 221 | } | 221 | } |
| 222 | } // namespace Tegra::Engines | 222 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 0cdb37f83..437990162 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h | |||
| @@ -33,7 +33,10 @@ public: | |||
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | struct IndirectParams { | 35 | struct IndirectParams { |
| 36 | GPUVAddr start_address; | 36 | bool is_indexed; |
| 37 | bool include_count; | ||
| 38 | GPUVAddr count_start_address; | ||
| 39 | GPUVAddr indirect_start_address; | ||
| 37 | size_t buffer_size; | 40 | size_t buffer_size; |
| 38 | size_t max_draw_counts; | 41 | size_t max_draw_counts; |
| 39 | size_t stride; | 42 | size_t stride; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9b182b653..cd6274a9b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -130,11 +130,15 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | macro_params.insert(macro_params.end(), base_start, base_start + amount); | 132 | macro_params.insert(macro_params.end(), base_start, base_start + amount); |
| 133 | for (size_t i = 0; i < amount; i++) { | ||
| 134 | macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); | ||
| 135 | } | ||
| 133 | 136 | ||
| 134 | // Call the macro when there are no more parameters in the command buffer | 137 | // Call the macro when there are no more parameters in the command buffer |
| 135 | if (is_last_call) { | 138 | if (is_last_call) { |
| 136 | CallMacroMethod(executing_macro, macro_params); | 139 | CallMacroMethod(executing_macro, macro_params); |
| 137 | macro_params.clear(); | 140 | macro_params.clear(); |
| 141 | macro_addresses.clear(); | ||
| 138 | } | 142 | } |
| 139 | } | 143 | } |
| 140 | 144 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 22b904319..ac5e87563 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -3066,6 +3066,15 @@ public: | |||
| 3066 | 3066 | ||
| 3067 | std::unique_ptr<DrawManager> draw_manager; | 3067 | std::unique_ptr<DrawManager> draw_manager; |
| 3068 | friend class DrawManager; | 3068 | friend class DrawManager; |
| 3069 | |||
| 3070 | std::vector<u8> inline_index_draw_indexes; | ||
| 3071 | std::vector<GPUVAddr> macro_addresses; | ||
| 3072 | |||
| 3073 | Core::System& system; | ||
| 3074 | MemoryManager& memory_manager; | ||
| 3075 | |||
| 3076 | /// Handles a write to the CLEAR_BUFFERS register. | ||
| 3077 | void ProcessClearBuffers(u32 layer_count); | ||
| 3069 | 3078 | ||
| 3070 | private: | 3079 | private: |
| 3071 | void InitializeRegisterDefaults(); | 3080 | void InitializeRegisterDefaults(); |
| @@ -3126,9 +3135,6 @@ private: | |||
| 3126 | /// Returns a query's value or an empty object if the value will be deferred through a cache. | 3135 | /// Returns a query's value or an empty object if the value will be deferred through a cache. |
| 3127 | std::optional<u64> GetQueryResult(); | 3136 | std::optional<u64> GetQueryResult(); |
| 3128 | 3137 | ||
| 3129 | Core::System& system; | ||
| 3130 | MemoryManager& memory_manager; | ||
| 3131 | |||
| 3132 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 3138 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 3133 | 3139 | ||
| 3134 | /// Start offsets of each macro in macro_memory | 3140 | /// Start offsets of each macro in macro_memory |
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 1cc202cc7..da988cc0d 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/macro/macro.h" | 10 | #include "video_core/macro/macro.h" |
| 11 | #include "video_core/macro/macro_hle.h" | 11 | #include "video_core/macro/macro_hle.h" |
| 12 | #include "video_core/memory_manager.h" | ||
| 12 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 13 | 14 | ||
| 14 | namespace Tegra { | 15 | namespace Tegra { |
| @@ -24,15 +25,14 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 24 | parameters[4], parameters[1], parameters[3], parameters[5], instance_count); | 25 | parameters[4], parameters[1], parameters[3], parameters[5], instance_count); |
| 25 | } | 26 | } |
| 26 | 27 | ||
| 27 | void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | 28 | void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { |
| 28 | const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | 29 | const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); |
| 29 | maxwell3d.draw_manager->DrawArray( | 30 | maxwell3d.draw_manager->DrawArray( |
| 30 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | 31 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), |
| 31 | parameters[3], parameters[1], parameters[4], instance_count); | 32 | parameters[3], parameters[1], parameters[4], instance_count); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | 35 | void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { |
| 35 | const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | ||
| 36 | const u32 element_base = parameters[4]; | 36 | const u32 element_base = parameters[4]; |
| 37 | const u32 base_instance = parameters[5]; | 37 | const u32 base_instance = parameters[5]; |
| 38 | maxwell3d.regs.vertex_id_base = element_base; | 38 | maxwell3d.regs.vertex_id_base = element_base; |
| @@ -41,9 +41,18 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 41 | maxwell3d.CallMethod(0x8e4, element_base, true); | 41 | maxwell3d.CallMethod(0x8e4, element_base, true); |
| 42 | maxwell3d.CallMethod(0x8e5, base_instance, true); | 42 | maxwell3d.CallMethod(0x8e5, base_instance, true); |
| 43 | 43 | ||
| 44 | maxwell3d.draw_manager->DrawIndex( | 44 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
| 45 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | 45 | params.is_indexed = true; |
| 46 | parameters[3], parameters[1], element_base, base_instance, instance_count); | 46 | params.include_count = false; |
| 47 | params.count_start_address = 0; | ||
| 48 | params.indirect_start_address = maxwell3d.macro_addresses[1]; | ||
| 49 | params.buffer_size = 5 * sizeof(u32); | ||
| 50 | params.max_draw_counts = 1; | ||
| 51 | params.stride = 0; | ||
| 52 | |||
| 53 | maxwell3d.draw_manager->DrawIndexedIndirect( | ||
| 54 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 0, | ||
| 55 | 1U << 18); | ||
| 47 | 56 | ||
| 48 | maxwell3d.regs.vertex_id_base = 0x0; | 57 | maxwell3d.regs.vertex_id_base = 0x0; |
| 49 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 58 | maxwell3d.CallMethod(0x8e3, 0x640, true); |
| @@ -51,8 +60,9 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 51 | maxwell3d.CallMethod(0x8e5, 0x0, true); | 60 | maxwell3d.CallMethod(0x8e5, 0x0, true); |
| 52 | } | 61 | } |
| 53 | 62 | ||
| 54 | // Multidraw Indirect | 63 | // Multidraw Indixed Indirect |
| 55 | void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | 64 | void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, |
| 65 | const std::vector<u32>& parameters) { | ||
| 56 | const u32 start_indirect = parameters[0]; | 66 | const u32 start_indirect = parameters[0]; |
| 57 | const u32 end_indirect = parameters[1]; | 67 | const u32 end_indirect = parameters[1]; |
| 58 | if (start_indirect >= end_indirect) { | 68 | if (start_indirect >= end_indirect) { |
| @@ -66,7 +76,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 66 | // size of each indirect segment | 76 | // size of each indirect segment |
| 67 | const u32 indirect_words = 5 + padding; | 77 | const u32 indirect_words = 5 + padding; |
| 68 | const u32 stride = indirect_words * sizeof(u32); | 78 | const u32 stride = indirect_words * sizeof(u32); |
| 69 | const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32); | ||
| 70 | const std::size_t draw_count = end_indirect - start_indirect; | 79 | const std::size_t draw_count = end_indirect - start_indirect; |
| 71 | u32 lowest_first = std::numeric_limits<u32>::max(); | 80 | u32 lowest_first = std::numeric_limits<u32>::max(); |
| 72 | u32 highest_limit = std::numeric_limits<u32>::min(); | 81 | u32 highest_limit = std::numeric_limits<u32>::min(); |
| @@ -80,12 +89,16 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 80 | 89 | ||
| 81 | const u32 base_vertex = parameters[8]; | 90 | const u32 base_vertex = parameters[8]; |
| 82 | const u32 base_instance = parameters[9]; | 91 | const u32 base_instance = parameters[9]; |
| 92 | maxwell3d.regs.vertex_id_base = base_vertex; | ||
| 83 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 93 | maxwell3d.CallMethod(0x8e3, 0x640, true); |
| 84 | maxwell3d.CallMethod(0x8e4, base_vertex, true); | 94 | maxwell3d.CallMethod(0x8e4, base_vertex, true); |
| 85 | maxwell3d.CallMethod(0x8e5, base_instance, true); | 95 | maxwell3d.CallMethod(0x8e5, base_instance, true); |
| 86 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); | 96 | auto& params = maxwell3d.draw_manager->GetIndirectParams(); |
| 87 | params.start_address = start_address; | 97 | params.is_indexed = true; |
| 88 | params.buffer_size = sizeof(u32) + stride * draw_count; | 98 | params.include_count = true; |
| 99 | params.count_start_address = maxwell3d.macro_addresses[4]; | ||
| 100 | params.indirect_start_address = maxwell3d.macro_addresses[5]; | ||
| 101 | params.buffer_size = stride * draw_count; | ||
| 89 | params.max_draw_counts = draw_count; | 102 | params.max_draw_counts = draw_count; |
| 90 | params.stride = stride; | 103 | params.stride = stride; |
| 91 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 104 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| @@ -93,7 +106,7 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 93 | } | 106 | } |
| 94 | 107 | ||
| 95 | // Multi-layer Clear | 108 | // Multi-layer Clear |
| 96 | void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | 109 | void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { |
| 97 | ASSERT(parameters.size() == 1); | 110 | ASSERT(parameters.size() == 1); |
| 98 | 111 | ||
| 99 | const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; | 112 | const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; |
| @@ -107,10 +120,10 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 107 | 120 | ||
| 108 | constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{ | 121 | constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{ |
| 109 | {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, | 122 | {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, |
| 110 | {0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD}, | 123 | {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect}, |
| 111 | {0x0217920100488FF7, &HLE_0217920100488FF7}, | 124 | {0x0217920100488FF7, &HLE_DrawIndexedIndirect}, |
| 112 | {0x3F5E74B9C9A50164, &HLE_3F5E74B9C9A50164}, | 125 | {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect}, |
| 113 | {0xEAD26C3E2109B06B, &HLE_EAD26C3E2109B06B}, | 126 | {0xEAD26C3E2109B06B, &HLE_MultiLayerClear}, |
| 114 | }}; | 127 | }}; |
| 115 | 128 | ||
| 116 | class HLEMacroImpl final : public CachedMacro { | 129 | class HLEMacroImpl final : public CachedMacro { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a2a651f34..641b95c7c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -43,7 +43,7 @@ public: | |||
| 43 | virtual void Draw(bool is_indexed, u32 instance_count) = 0; | 43 | virtual void Draw(bool is_indexed, u32 instance_count) = 0; |
| 44 | 44 | ||
| 45 | /// Dispatches an indirect draw invocation | 45 | /// Dispatches an indirect draw invocation |
| 46 | virtual void DrawIndirect(bool is_indexed) {} | 46 | virtual void DrawIndirect() {} |
| 47 | 47 | ||
| 48 | /// Clear the current framebuffer | 48 | /// Clear the current framebuffer |
| 49 | virtual void Clear(u32 layer_count) = 0; | 49 | virtual void Clear(u32 layer_count) = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 6b54d7111..487d8b416 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -56,7 +56,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { | |||
| 56 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 56 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 57 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | | 57 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| 58 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | 58 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | |
| 59 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; | 59 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | |
| 60 | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; | ||
| 60 | if (device.IsExtTransformFeedbackSupported()) { | 61 | if (device.IsExtTransformFeedbackSupported()) { |
| 61 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 62 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 62 | } | 63 | } |
| @@ -516,6 +517,7 @@ void BufferCacheRuntime::ReserveNullBuffer() { | |||
| 516 | if (device.IsExtTransformFeedbackSupported()) { | 517 | if (device.IsExtTransformFeedbackSupported()) { |
| 517 | create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 518 | create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 518 | } | 519 | } |
| 520 | create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; | ||
| 519 | null_buffer = device.GetLogical().CreateBuffer(create_info); | 521 | null_buffer = device.GetLogical().CreateBuffer(create_info); |
| 520 | if (device.HasDebuggingToolAttached()) { | 522 | if (device.HasDebuggingToolAttached()) { |
| 521 | null_buffer.SetObjectNameEXT("Null buffer"); | 523 | null_buffer.SetObjectNameEXT("Null buffer"); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9b75f33dd..6f1adc97f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -225,25 +225,40 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | |||
| 225 | }); | 225 | }); |
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | void RasterizerVulkan::DrawIndirect(bool is_indexed) { | 228 | void RasterizerVulkan::DrawIndirect() { |
| 229 | PrepareDraw(is_indexed, [this, is_indexed] { | 229 | const auto& params = maxwell3d->draw_manager->GetIndirectParams(); |
| 230 | const auto params = maxwell3d->draw_manager->GetIndirectParams(); | 230 | buffer_cache.SetDrawIndirect(¶ms); |
| 231 | const auto [buffer, offset] = buffer_cache.ObtainBuffer( | 231 | PrepareDraw(params.is_indexed, [this, ¶ms] { |
| 232 | params.start_address, static_cast<u32>(params.buffer_size), true, false); | 232 | const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer(); |
| 233 | scheduler.Record([buffer_obj = buffer->Handle(), offset, | 233 | if (params.include_count) { |
| 234 | max_draw_counts = params.max_draw_counts, stride = params.stride, | 234 | const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount(); |
| 235 | is_indexed](vk::CommandBuffer cmdbuf) { | 235 | scheduler.Record([draw_buffer_obj = draw_buffer->Handle(), |
| 236 | if (is_indexed) { | 236 | buffer_obj = buffer->Handle(), offset_base, offset, |
| 237 | cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, | 237 | params](vk::CommandBuffer cmdbuf) { |
| 238 | static_cast<u32>(max_draw_counts), | 238 | if (params.is_indexed) { |
| 239 | static_cast<u32>(stride)); | 239 | cmdbuf.DrawIndexedIndirectCount( |
| 240 | buffer_obj, offset, draw_buffer_obj, offset_base, | ||
| 241 | static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride)); | ||
| 242 | } else { | ||
| 243 | cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base, | ||
| 244 | static_cast<u32>(params.max_draw_counts), | ||
| 245 | static_cast<u32>(params.stride)); | ||
| 246 | } | ||
| 247 | }); | ||
| 248 | return; | ||
| 249 | } | ||
| 250 | scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { | ||
| 251 | if (params.is_indexed) { | ||
| 252 | cmdbuf.DrawIndexedIndirect(buffer_obj, offset, | ||
| 253 | static_cast<u32>(params.max_draw_counts), | ||
| 254 | static_cast<u32>(params.stride)); | ||
| 240 | } else { | 255 | } else { |
| 241 | cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, | 256 | cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts), |
| 242 | static_cast<u32>(max_draw_counts), | 257 | static_cast<u32>(params.stride)); |
| 243 | static_cast<u32>(stride)); | ||
| 244 | } | 258 | } |
| 245 | }); | 259 | }); |
| 246 | }); | 260 | }); |
| 261 | buffer_cache.SetDrawIndirect(nullptr); | ||
| 247 | } | 262 | } |
| 248 | 263 | ||
| 249 | void RasterizerVulkan::Clear(u32 layer_count) { | 264 | void RasterizerVulkan::Clear(u32 layer_count) { |
| @@ -425,9 +440,6 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 425 | 440 | ||
| 426 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | 441 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { |
| 427 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | 442 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; |
| 428 | if (!Settings::IsGPULevelHigh()) { | ||
| 429 | return buffer_cache.IsRegionGpuModified(addr, size); | ||
| 430 | } | ||
| 431 | return texture_cache.IsRegionGpuModified(addr, size) || | 443 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 432 | buffer_cache.IsRegionGpuModified(addr, size); | 444 | buffer_cache.IsRegionGpuModified(addr, size); |
| 433 | } | 445 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index bc43a8a1f..43a210da0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -65,7 +65,7 @@ public: | |||
| 65 | ~RasterizerVulkan() override; | 65 | ~RasterizerVulkan() override; |
| 66 | 66 | ||
| 67 | void Draw(bool is_indexed, u32 instance_count) override; | 67 | void Draw(bool is_indexed, u32 instance_count) override; |
| 68 | void DrawIndirect(bool is_indexed) override; | 68 | void DrawIndirect() override; |
| 69 | void Clear(u32 layer_count) override; | 69 | void Clear(u32 layer_count) override; |
| 70 | void DispatchCompute() override; | 70 | void DispatchCompute() override; |
| 71 | void ResetCounter(VideoCore::QueryType type) override; | 71 | void ResetCounter(VideoCore::QueryType type) override; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 477fc428b..207fae8c9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -351,7 +351,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 351 | .dualSrcBlend = true, | 351 | .dualSrcBlend = true, |
| 352 | .logicOp = true, | 352 | .logicOp = true, |
| 353 | .multiDrawIndirect = true, | 353 | .multiDrawIndirect = true, |
| 354 | .drawIndirectFirstInstance = false, | 354 | .drawIndirectFirstInstance = true, |
| 355 | .depthClamp = true, | 355 | .depthClamp = true, |
| 356 | .depthBiasClamp = true, | 356 | .depthBiasClamp = true, |
| 357 | .fillModeNonSolid = true, | 357 | .fillModeNonSolid = true, |
| @@ -1024,6 +1024,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { | |||
| 1024 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 1024 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 1025 | std::make_pair(features.imageCubeArray, "imageCubeArray"), | 1025 | std::make_pair(features.imageCubeArray, "imageCubeArray"), |
| 1026 | std::make_pair(features.independentBlend, "independentBlend"), | 1026 | std::make_pair(features.independentBlend, "independentBlend"), |
| 1027 | std::make_pair(features.multiDrawIndirect, "multiDrawIndirect"), | ||
| 1028 | std::make_pair(features.drawIndirectFirstInstance, "drawIndirectFirstInstance"), | ||
| 1027 | std::make_pair(features.depthClamp, "depthClamp"), | 1029 | std::make_pair(features.depthClamp, "depthClamp"), |
| 1028 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), | 1030 | std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), |
| 1029 | std::make_pair(features.largePoints, "largePoints"), | 1031 | std::make_pair(features.largePoints, "largePoints"), |
| @@ -1117,6 +1119,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 1117 | test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); | 1119 | test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); |
| 1118 | test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); | 1120 | test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); |
| 1119 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 1121 | test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); |
| 1122 | test(khr_draw_indirect_count, VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, true); | ||
| 1120 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 1123 | test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 1121 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 1124 | test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 1122 | test(has_ext_primitive_topology_list_restart, | 1125 | test(has_ext_primitive_topology_list_restart, |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6a26c4e6e..d0d7c2299 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -451,6 +451,7 @@ private: | |||
| 451 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 451 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 452 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. | 452 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. |
| 453 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. | 453 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. |
| 454 | bool khr_draw_indirect_count{}; ///< Support for VK_KHR_draw_indirect_count. | ||
| 454 | bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. | 455 | bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. |
| 455 | bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. | 456 | bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. |
| 456 | bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. | 457 | bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index c58c4c1c4..f8f8ed9f8 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -94,8 +94,10 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 94 | X(vkCmdDispatch); | 94 | X(vkCmdDispatch); |
| 95 | X(vkCmdDraw); | 95 | X(vkCmdDraw); |
| 96 | X(vkCmdDrawIndexed); | 96 | X(vkCmdDrawIndexed); |
| 97 | X(vkCmdDrawIndirectCount); | 97 | X(vkCmdDrawIndirect); |
| 98 | X(vkCmdDrawIndexedIndirectCount); | 98 | X(vkCmdDrawIndexedIndirect); |
| 99 | X(vkCmdDrawIndirectCountKHR); | ||
| 100 | X(vkCmdDrawIndexedIndirectCountKHR); | ||
| 99 | X(vkCmdEndQuery); | 101 | X(vkCmdEndQuery); |
| 100 | X(vkCmdEndRenderPass); | 102 | X(vkCmdEndRenderPass); |
| 101 | X(vkCmdEndTransformFeedbackEXT); | 103 | X(vkCmdEndTransformFeedbackEXT); |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 9bd158dce..493a48573 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -213,8 +213,10 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 213 | PFN_vkCmdDispatch vkCmdDispatch{}; | 213 | PFN_vkCmdDispatch vkCmdDispatch{}; |
| 214 | PFN_vkCmdDraw vkCmdDraw{}; | 214 | PFN_vkCmdDraw vkCmdDraw{}; |
| 215 | PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; | 215 | PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; |
| 216 | PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; | 216 | PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; |
| 217 | PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; | 217 | PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; |
| 218 | PFN_vkCmdDrawIndirectCountKHR vkCmdDrawIndirectCountKHR{}; | ||
| 219 | PFN_vkCmdDrawIndexedIndirectCountKHR vkCmdDrawIndexedIndirectCountKHR{}; | ||
| 218 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | 220 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; |
| 219 | PFN_vkCmdEndQuery vkCmdEndQuery{}; | 221 | PFN_vkCmdEndQuery vkCmdEndQuery{}; |
| 220 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | 222 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; |
| @@ -1021,17 +1023,27 @@ public: | |||
| 1021 | first_instance); | 1023 | first_instance); |
| 1022 | } | 1024 | } |
| 1023 | 1025 | ||
| 1026 | void DrawIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count, | ||
| 1027 | u32 stride) const noexcept { | ||
| 1028 | dld->vkCmdDrawIndirect(handle, src_buffer, src_offset, draw_count, stride); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void DrawIndexedIndirect(VkBuffer src_buffer, VkDeviceSize src_offset, u32 draw_count, | ||
| 1032 | u32 stride) const noexcept { | ||
| 1033 | dld->vkCmdDrawIndexedIndirect(handle, src_buffer, src_offset, draw_count, stride); | ||
| 1034 | } | ||
| 1035 | |||
| 1024 | void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, | 1036 | void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, |
| 1025 | VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { | 1037 | VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { |
| 1026 | dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, | 1038 | dld->vkCmdDrawIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, count_offset, |
| 1027 | draw_count, stride); | 1039 | draw_count, stride); |
| 1028 | } | 1040 | } |
| 1029 | 1041 | ||
| 1030 | void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, | 1042 | void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, |
| 1031 | VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, | 1043 | VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, |
| 1032 | u32 stride) const noexcept { | 1044 | u32 stride) const noexcept { |
| 1033 | dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, | 1045 | dld->vkCmdDrawIndexedIndirectCountKHR(handle, src_buffer, src_offset, count_buffer, |
| 1034 | count_offset, draw_count, stride); | 1046 | count_offset, draw_count, stride); |
| 1035 | } | 1047 | } |
| 1036 | 1048 | ||
| 1037 | void ClearAttachments(Span<VkClearAttachment> attachments, | 1049 | void ClearAttachments(Span<VkClearAttachment> attachments, |