diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/query_cache/query_base.h | 1 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache.h | 18 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_stream.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 160 |
4 files changed, 180 insertions, 5 deletions
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 485ed669c..0ae23af9f 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 { | |||
| 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. | 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. |
| 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. | 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. |
| 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. | 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. |
| 21 | IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment | ||
| 21 | }; | 22 | }; |
| 22 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | 23 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) |
| 23 | 24 | ||
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f6af48d14..f1393d5c7 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -489,8 +489,22 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() { | |||
| 489 | if (mask == 0) { | 489 | if (mask == 0) { |
| 490 | return; | 490 | return; |
| 491 | } | 491 | } |
| 492 | impl->ForEachStreamerIn(mask, | 492 | u64 ran_mask = 0; |
| 493 | [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); | 493 | u64 next_phase = 0; |
| 494 | while (mask) { | ||
| 495 | impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { | ||
| 496 | u64 dep_mask = streamer->GetDependenceMask(); | ||
| 497 | if ((dep_mask & ~ran_mask) != 0) { | ||
| 498 | next_phase |= dep_mask; | ||
| 499 | return; | ||
| 500 | } | ||
| 501 | u64 index = streamer->GetId(); | ||
| 502 | ran_mask |= (1ULL << index); | ||
| 503 | mask &= ~(1ULL << index); | ||
| 504 | streamer->PopUnsyncedQueries(); | ||
| 505 | }); | ||
| 506 | ran_mask |= next_phase; | ||
| 507 | } | ||
| 494 | } | 508 | } |
| 495 | 509 | ||
| 496 | // Invalidation | 510 | // Invalidation |
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index dd5f95b3c..0e9275565 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h | |||
| @@ -70,6 +70,10 @@ public: | |||
| 70 | return id; | 70 | return id; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | u64 GetDependenceMask() const { | ||
| 74 | return dependance_mask; | ||
| 75 | } | ||
| 76 | |||
| 73 | protected: | 77 | protected: |
| 74 | const size_t id; | 78 | const size_t id; |
| 75 | const u64 dependance_mask; | 79 | const u64 dependance_mask; |
| @@ -78,7 +82,7 @@ protected: | |||
| 78 | template <typename QueryType> | 82 | template <typename QueryType> |
| 79 | class SimpleStreamer : public StreamerInterface { | 83 | class SimpleStreamer : public StreamerInterface { |
| 80 | public: | 84 | public: |
| 81 | SimpleStreamer(size_t id_) : StreamerInterface{id_} {} | 85 | SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} |
| 82 | virtual ~SimpleStreamer() = default; | 86 | virtual ~SimpleStreamer() = default; |
| 83 | 87 | ||
| 84 | protected: | 88 | protected: |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 42f571007..ef891e26b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -526,6 +526,9 @@ private: | |||
| 526 | }; | 526 | }; |
| 527 | 527 | ||
| 528 | template <typename Traits> | 528 | template <typename Traits> |
| 529 | class PrimitivesSucceededStreamer; | ||
| 530 | |||
| 531 | template <typename Traits> | ||
| 529 | class TFBCounterStreamer : public BaseStreamer { | 532 | class TFBCounterStreamer : public BaseStreamer { |
| 530 | public: | 533 | public: |
| 531 | TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | 534 | TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, |
| @@ -537,6 +540,7 @@ public: | |||
| 537 | current_bank = nullptr; | 540 | current_bank = nullptr; |
| 538 | counter_buffers.fill(VK_NULL_HANDLE); | 541 | counter_buffers.fill(VK_NULL_HANDLE); |
| 539 | offsets.fill(0); | 542 | offsets.fill(0); |
| 543 | last_queries.fill(0); | ||
| 540 | const VkBufferCreateInfo buffer_ci = { | 544 | const VkBufferCreateInfo buffer_ci = { |
| 541 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 545 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 542 | .pNext = nullptr, | 546 | .pNext = nullptr, |
| @@ -630,7 +634,7 @@ public: | |||
| 630 | return index; | 634 | return index; |
| 631 | } | 635 | } |
| 632 | const size_t subreport = static_cast<size_t>(*subreport_); | 636 | const size_t subreport = static_cast<size_t>(*subreport_); |
| 633 | UpdateBuffers(); | 637 | last_queries[subreport] = address; |
| 634 | if ((streams_mask & (1ULL << subreport)) == 0) { | 638 | if ((streams_mask & (1ULL << subreport)) == 0) { |
| 635 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | 639 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; |
| 636 | return index; | 640 | return index; |
| @@ -646,6 +650,13 @@ public: | |||
| 646 | return index; | 650 | return index; |
| 647 | } | 651 | } |
| 648 | 652 | ||
| 653 | std::optional<VAddr> GetLastQueryStream(size_t stream) { | ||
| 654 | if (last_queries[stream] != 0) { | ||
| 655 | return {last_queries[stream]}; | ||
| 656 | } | ||
| 657 | return std::nullopt; | ||
| 658 | } | ||
| 659 | |||
| 649 | bool HasUnsyncedQueries() override { | 660 | bool HasUnsyncedQueries() override { |
| 650 | return !pending_flush_queries.empty(); | 661 | return !pending_flush_queries.empty(); |
| 651 | } | 662 | } |
| @@ -657,6 +668,7 @@ public: | |||
| 657 | size_t offset_base = staging_ref.offset; | 668 | size_t offset_base = staging_ref.offset; |
| 658 | for (auto q : pending_flush_queries) { | 669 | for (auto q : pending_flush_queries) { |
| 659 | auto* query = GetQuery(q); | 670 | auto* query = GetQuery(q); |
| 671 | query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; | ||
| 660 | auto& bank = bank_pool.GetBank(query->start_bank_id); | 672 | auto& bank = bank_pool.GetBank(query->start_bank_id); |
| 661 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); | 673 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); |
| 662 | offset_base += TFBQueryBank::QUERY_SIZE; | 674 | offset_base += TFBQueryBank::QUERY_SIZE; |
| @@ -741,13 +753,15 @@ private: | |||
| 741 | cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); | 753 | cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); |
| 742 | }); | 754 | }); |
| 743 | } else { | 755 | } else { |
| 744 | scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | 756 | scheduler.Record([this, |
| 757 | total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | ||
| 745 | cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); | 758 | cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); |
| 746 | }); | 759 | }); |
| 747 | } | 760 | } |
| 748 | } | 761 | } |
| 749 | 762 | ||
| 750 | void UpdateBuffers() { | 763 | void UpdateBuffers() { |
| 764 | last_queries.fill(0); | ||
| 751 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | 765 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { |
| 752 | buffers_count = 0; | 766 | buffers_count = 0; |
| 753 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | 767 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; |
| @@ -804,6 +818,9 @@ private: | |||
| 804 | return {current_bank_id, slot}; | 818 | return {current_bank_id, slot}; |
| 805 | } | 819 | } |
| 806 | 820 | ||
| 821 | template <typename Traits> | ||
| 822 | friend class PrimitivesSucceededStreamer; | ||
| 823 | |||
| 807 | static constexpr size_t NUM_STREAMS = 4; | 824 | static constexpr size_t NUM_STREAMS = 4; |
| 808 | static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; | 825 | static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; |
| 809 | 826 | ||
| @@ -833,9 +850,143 @@ private: | |||
| 833 | size_t buffers_count{}; | 850 | size_t buffers_count{}; |
| 834 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | 851 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; |
| 835 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | 852 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; |
| 853 | std::array<VAddr, NUM_STREAMS> last_queries; | ||
| 836 | u64 streams_mask; | 854 | u64 streams_mask; |
| 837 | }; | 855 | }; |
| 838 | 856 | ||
| 857 | class PrimitivesQueryBase : public VideoCommon::QueryBase { | ||
| 858 | public: | ||
| 859 | // Default constructor | ||
| 860 | PrimitivesQueryBase() | ||
| 861 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, | ||
| 862 | dependant_index{}, dependant_manage{} {} | ||
| 863 | |||
| 864 | // Parameterized constructor | ||
| 865 | PrimitivesQueryBase(bool is_long, VAddr address) | ||
| 866 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, | ||
| 867 | dependant_index{}, dependant_manage{} { | ||
| 868 | if (is_long) { | ||
| 869 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 870 | } | ||
| 871 | } | ||
| 872 | |||
| 873 | u64 stride; | ||
| 874 | VAddr dependant_address; | ||
| 875 | size_t dependant_index; | ||
| 876 | bool dependant_manage; | ||
| 877 | }; | ||
| 878 | |||
| 879 | template <typename Traits> | ||
| 880 | class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { | ||
| 881 | public: | ||
| 882 | PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, | ||
| 883 | TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_) | ||
| 884 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>( | ||
| 885 | id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)), | ||
| 886 | runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} | ||
| 887 | |||
| 888 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 889 | std::optional<u32> subreport_) override { | ||
| 890 | auto index = BuildQuery(); | ||
| 891 | auto* new_query = GetQuery(index); | ||
| 892 | new_query->guest_address = address; | ||
| 893 | new_query->value = 0; | ||
| 894 | if (has_timestamp) { | ||
| 895 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 896 | } | ||
| 897 | if (!subreport_) { | ||
| 898 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 899 | return index; | ||
| 900 | } | ||
| 901 | const size_t subreport = static_cast<size_t>(*subreport_); | ||
| 902 | auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); | ||
| 903 | bool must_manage_dependance = false; | ||
| 904 | if (dependant_address_opt) { | ||
| 905 | new_query->dependant_address = *dependant_address_opt; | ||
| 906 | } else { | ||
| 907 | new_query->dependant_index = | ||
| 908 | tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); | ||
| 909 | auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index); | ||
| 910 | dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated; | ||
| 911 | must_manage_dependance = true; | ||
| 912 | if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { | ||
| 913 | new_query->value = 0; | ||
| 914 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 915 | if (must_manage_dependance) { | ||
| 916 | tfb_streamer.Free(new_query->dependant_index); | ||
| 917 | } | ||
| 918 | return index; | ||
| 919 | } | ||
| 920 | } | ||
| 921 | |||
| 922 | new_query->dependant_manage = must_manage_dependance; | ||
| 923 | runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 924 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | ||
| 925 | i++) { | ||
| 926 | const auto& tf = regs.transform_feedback; | ||
| 927 | if (tf.controls[i].stream != subreport) { | ||
| 928 | continue; | ||
| 929 | } | ||
| 930 | new_query->stride = tf.controls[i].stride; | ||
| 931 | break; | ||
| 932 | } | ||
| 933 | }); | ||
| 934 | pending_flush_queries.push_back(index); | ||
| 935 | return index; | ||
| 936 | } | ||
| 937 | |||
| 938 | bool HasUnsyncedQueries() override { | ||
| 939 | return !pending_flush_queries.empty(); | ||
| 940 | } | ||
| 941 | |||
| 942 | void PushUnsyncedQueries() override { | ||
| 943 | std::scoped_lock lk(flush_guard); | ||
| 944 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | ||
| 945 | pending_flush_queries.clear(); | ||
| 946 | } | ||
| 947 | |||
| 948 | void PopUnsyncedQueries() override { | ||
| 949 | std::vector<size_t> flushed_queries; | ||
| 950 | { | ||
| 951 | std::scoped_lock lk(flush_guard); | ||
| 952 | flushed_queries = std::move(pending_flush_sets.front()); | ||
| 953 | pending_flush_sets.pop_front(); | ||
| 954 | } | ||
| 955 | |||
| 956 | for (auto q : flushed_queries) { | ||
| 957 | auto* query = GetQuery(q); | ||
| 958 | if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) { | ||
| 959 | continue; | ||
| 960 | } | ||
| 961 | |||
| 962 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 963 | if (query->dependant_manage) { | ||
| 964 | auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); | ||
| 965 | query->value = dependant_query->value / query->stride; | ||
| 966 | tfb_streamer.Free(query->dependant_index); | ||
| 967 | } else { | ||
| 968 | u8* pointer = cpu_memory.GetPointer(query->dependant_address); | ||
| 969 | u32 result; | ||
| 970 | std::memcpy(&result, pointer, sizeof(u32)); | ||
| 971 | query->value = static_cast<u64>(result) / query->stride; | ||
| 972 | } | ||
| 973 | } | ||
| 974 | } | ||
| 975 | |||
| 976 | private: | ||
| 977 | QueryCacheRuntime& runtime; | ||
| 978 | TFBCounterStreamer<QueryCacheParams>& tfb_streamer; | ||
| 979 | Core::Memory::Memory& cpu_memory; | ||
| 980 | |||
| 981 | // syncing queue | ||
| 982 | std::vector<size_t> pending_sync; | ||
| 983 | |||
| 984 | // flush levels | ||
| 985 | std::vector<size_t> pending_flush_queries; | ||
| 986 | std::deque<std::vector<size_t>> pending_flush_sets; | ||
| 987 | std::mutex flush_guard; | ||
| 988 | }; | ||
| 989 | |||
| 839 | } // namespace | 990 | } // namespace |
| 840 | 991 | ||
| 841 | struct QueryCacheRuntimeImpl { | 992 | struct QueryCacheRuntimeImpl { |
| @@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl { | |||
| 853 | scheduler, memory_allocator), | 1004 | scheduler, memory_allocator), |
| 854 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | 1005 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, |
| 855 | scheduler, memory_allocator, staging_pool), | 1006 | scheduler, memory_allocator, staging_pool), |
| 1007 | primitives_succeeded_streamer( | ||
| 1008 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), | ||
| 856 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { | 1009 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { |
| 857 | 1010 | ||
| 858 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; | 1011 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; |
| @@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl { | |||
| 889 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; | 1042 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; |
| 890 | SamplesStreamer<QueryCacheParams> sample_streamer; | 1043 | SamplesStreamer<QueryCacheParams> sample_streamer; |
| 891 | TFBCounterStreamer<QueryCacheParams> tfb_streamer; | 1044 | TFBCounterStreamer<QueryCacheParams> tfb_streamer; |
| 1045 | PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer; | ||
| 892 | 1046 | ||
| 893 | std::vector<std::pair<VAddr, VAddr>> little_cache; | 1047 | std::vector<std::pair<VAddr, VAddr>> little_cache; |
| 894 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | 1048 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| @@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp | |||
| 1086 | return &impl->sample_streamer; | 1240 | return &impl->sample_streamer; |
| 1087 | case QueryType::StreamingByteCount: | 1241 | case QueryType::StreamingByteCount: |
| 1088 | return &impl->tfb_streamer; | 1242 | return &impl->tfb_streamer; |
| 1243 | case QueryType::StreamingPrimitivesSucceeded: | ||
| 1244 | return &impl->primitives_succeeded_streamer; | ||
| 1089 | default: | 1245 | default: |
| 1090 | return nullptr; | 1246 | return nullptr; |
| 1091 | } | 1247 | } |