summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/query_cache/query_base.h1
-rw-r--r--src/video_core/query_cache/query_cache.h18
-rw-r--r--src/video_core/query_cache/query_stream.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp160
4 files changed, 180 insertions, 5 deletions
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 485ed669c..0ae23af9f 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 {
18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. 18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. 19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
20 IsFence = 1 << 8, ///< Indicates the query is a fence. 20 IsFence = 1 << 8, ///< Indicates the query is a fence.
21 IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment
21}; 22};
22DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) 23DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23 24
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index f6af48d14..f1393d5c7 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -489,8 +489,22 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
489 if (mask == 0) { 489 if (mask == 0) {
490 return; 490 return;
491 } 491 }
492 impl->ForEachStreamerIn(mask, 492 u64 ran_mask = 0;
493 [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); 493 u64 next_phase = 0;
494 while (mask) {
495 impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) {
496 u64 dep_mask = streamer->GetDependenceMask();
497 if ((dep_mask & ~ran_mask) != 0) {
498 next_phase |= dep_mask;
499 return;
500 }
501 u64 index = streamer->GetId();
502 ran_mask |= (1ULL << index);
503 mask &= ~(1ULL << index);
504 streamer->PopUnsyncedQueries();
505 });
506 ran_mask |= next_phase;
507 }
494} 508}
495 509
496// Invalidation 510// Invalidation
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
index dd5f95b3c..0e9275565 100644
--- a/src/video_core/query_cache/query_stream.h
+++ b/src/video_core/query_cache/query_stream.h
@@ -70,6 +70,10 @@ public:
70 return id; 70 return id;
71 } 71 }
72 72
73 u64 GetDependenceMask() const {
74 return dependance_mask;
75 }
76
73protected: 77protected:
74 const size_t id; 78 const size_t id;
75 const u64 dependance_mask; 79 const u64 dependance_mask;
@@ -78,7 +82,7 @@ protected:
78template <typename QueryType> 82template <typename QueryType>
79class SimpleStreamer : public StreamerInterface { 83class SimpleStreamer : public StreamerInterface {
80public: 84public:
81 SimpleStreamer(size_t id_) : StreamerInterface{id_} {} 85 SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {}
82 virtual ~SimpleStreamer() = default; 86 virtual ~SimpleStreamer() = default;
83 87
84protected: 88protected:
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 42f571007..ef891e26b 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -526,6 +526,9 @@ private:
526}; 526};
527 527
528template <typename Traits> 528template <typename Traits>
529class PrimitivesSucceededStreamer;
530
531template <typename Traits>
529class TFBCounterStreamer : public BaseStreamer { 532class TFBCounterStreamer : public BaseStreamer {
530public: 533public:
531 TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, 534 TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
@@ -537,6 +540,7 @@ public:
537 current_bank = nullptr; 540 current_bank = nullptr;
538 counter_buffers.fill(VK_NULL_HANDLE); 541 counter_buffers.fill(VK_NULL_HANDLE);
539 offsets.fill(0); 542 offsets.fill(0);
543 last_queries.fill(0);
540 const VkBufferCreateInfo buffer_ci = { 544 const VkBufferCreateInfo buffer_ci = {
541 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 545 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
542 .pNext = nullptr, 546 .pNext = nullptr,
@@ -630,7 +634,7 @@ public:
630 return index; 634 return index;
631 } 635 }
632 const size_t subreport = static_cast<size_t>(*subreport_); 636 const size_t subreport = static_cast<size_t>(*subreport_);
633 UpdateBuffers(); 637 last_queries[subreport] = address;
634 if ((streams_mask & (1ULL << subreport)) == 0) { 638 if ((streams_mask & (1ULL << subreport)) == 0) {
635 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; 639 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
636 return index; 640 return index;
@@ -646,6 +650,13 @@ public:
646 return index; 650 return index;
647 } 651 }
648 652
653 std::optional<VAddr> GetLastQueryStream(size_t stream) {
654 if (last_queries[stream] != 0) {
655 return {last_queries[stream]};
656 }
657 return std::nullopt;
658 }
659
649 bool HasUnsyncedQueries() override { 660 bool HasUnsyncedQueries() override {
650 return !pending_flush_queries.empty(); 661 return !pending_flush_queries.empty();
651 } 662 }
@@ -657,6 +668,7 @@ public:
657 size_t offset_base = staging_ref.offset; 668 size_t offset_base = staging_ref.offset;
658 for (auto q : pending_flush_queries) { 669 for (auto q : pending_flush_queries) {
659 auto* query = GetQuery(q); 670 auto* query = GetQuery(q);
671 query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
660 auto& bank = bank_pool.GetBank(query->start_bank_id); 672 auto& bank = bank_pool.GetBank(query->start_bank_id);
661 bank.Sync(staging_ref, offset_base, query->start_slot, 1); 673 bank.Sync(staging_ref, offset_base, query->start_slot, 1);
662 offset_base += TFBQueryBank::QUERY_SIZE; 674 offset_base += TFBQueryBank::QUERY_SIZE;
@@ -741,13 +753,15 @@ private:
741 cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); 753 cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
742 }); 754 });
743 } else { 755 } else {
744 scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { 756 scheduler.Record([this,
757 total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
745 cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); 758 cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
746 }); 759 });
747 } 760 }
748 } 761 }
749 762
750 void UpdateBuffers() { 763 void UpdateBuffers() {
764 last_queries.fill(0);
751 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { 765 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
752 buffers_count = 0; 766 buffers_count = 0;
753 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; 767 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
@@ -804,6 +818,9 @@ private:
804 return {current_bank_id, slot}; 818 return {current_bank_id, slot};
805 } 819 }
806 820
821 template <typename Traits>
822 friend class PrimitivesSucceededStreamer;
823
807 static constexpr size_t NUM_STREAMS = 4; 824 static constexpr size_t NUM_STREAMS = 4;
808 static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; 825 static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
809 826
@@ -833,9 +850,143 @@ private:
833 size_t buffers_count{}; 850 size_t buffers_count{};
834 std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; 851 std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
835 std::array<VkDeviceSize, NUM_STREAMS> offsets{}; 852 std::array<VkDeviceSize, NUM_STREAMS> offsets{};
853 std::array<VAddr, NUM_STREAMS> last_queries;
836 u64 streams_mask; 854 u64 streams_mask;
837}; 855};
838 856
857class PrimitivesQueryBase : public VideoCommon::QueryBase {
858public:
859 // Default constructor
860 PrimitivesQueryBase()
861 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
862 dependant_index{}, dependant_manage{} {}
863
864 // Parameterized constructor
865 PrimitivesQueryBase(bool is_long, VAddr address)
866 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
867 dependant_index{}, dependant_manage{} {
868 if (is_long) {
869 flags |= VideoCommon::QueryFlagBits::HasTimestamp;
870 }
871 }
872
873 u64 stride;
874 VAddr dependant_address;
875 size_t dependant_index;
876 bool dependant_manage;
877};
878
879template <typename Traits>
880class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
881public:
882 PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_,
883 TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_)
884 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(
885 id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)),
886 runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {}
887
888 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
889 std::optional<u32> subreport_) override {
890 auto index = BuildQuery();
891 auto* new_query = GetQuery(index);
892 new_query->guest_address = address;
893 new_query->value = 0;
894 if (has_timestamp) {
895 new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
896 }
897 if (!subreport_) {
898 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
899 return index;
900 }
901 const size_t subreport = static_cast<size_t>(*subreport_);
902 auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
903 bool must_manage_dependance = false;
904 if (dependant_address_opt) {
905 new_query->dependant_address = *dependant_address_opt;
906 } else {
907 new_query->dependant_index =
908 tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
909 auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index);
910 dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated;
911 must_manage_dependance = true;
912 if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
913 new_query->value = 0;
914 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
915 if (must_manage_dependance) {
916 tfb_streamer.Free(new_query->dependant_index);
917 }
918 return index;
919 }
920 }
921
922 new_query->dependant_manage = must_manage_dependance;
923 runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
924 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
925 i++) {
926 const auto& tf = regs.transform_feedback;
927 if (tf.controls[i].stream != subreport) {
928 continue;
929 }
930 new_query->stride = tf.controls[i].stride;
931 break;
932 }
933 });
934 pending_flush_queries.push_back(index);
935 return index;
936 }
937
938 bool HasUnsyncedQueries() override {
939 return !pending_flush_queries.empty();
940 }
941
942 void PushUnsyncedQueries() override {
943 std::scoped_lock lk(flush_guard);
944 pending_flush_sets.emplace_back(std::move(pending_flush_queries));
945 pending_flush_queries.clear();
946 }
947
948 void PopUnsyncedQueries() override {
949 std::vector<size_t> flushed_queries;
950 {
951 std::scoped_lock lk(flush_guard);
952 flushed_queries = std::move(pending_flush_sets.front());
953 pending_flush_sets.pop_front();
954 }
955
956 for (auto q : flushed_queries) {
957 auto* query = GetQuery(q);
958 if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
959 continue;
960 }
961
962 query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
963 if (query->dependant_manage) {
964 auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
965 query->value = dependant_query->value / query->stride;
966 tfb_streamer.Free(query->dependant_index);
967 } else {
968 u8* pointer = cpu_memory.GetPointer(query->dependant_address);
969 u32 result;
970 std::memcpy(&result, pointer, sizeof(u32));
971 query->value = static_cast<u64>(result) / query->stride;
972 }
973 }
974 }
975
976private:
977 QueryCacheRuntime& runtime;
978 TFBCounterStreamer<QueryCacheParams>& tfb_streamer;
979 Core::Memory::Memory& cpu_memory;
980
981 // syncing queue
982 std::vector<size_t> pending_sync;
983
984 // flush levels
985 std::vector<size_t> pending_flush_queries;
986 std::deque<std::vector<size_t>> pending_flush_sets;
987 std::mutex flush_guard;
988};
989
839} // namespace 990} // namespace
840 991
841struct QueryCacheRuntimeImpl { 992struct QueryCacheRuntimeImpl {
@@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl {
853 scheduler, memory_allocator), 1004 scheduler, memory_allocator),
854 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, 1005 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
855 scheduler, memory_allocator, staging_pool), 1006 scheduler, memory_allocator, staging_pool),
1007 primitives_succeeded_streamer(
1008 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_),
856 hcr_setup{}, hcr_is_set{}, is_hcr_running{} { 1009 hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
857 1010
858 hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; 1011 hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl {
889 VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; 1042 VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
890 SamplesStreamer<QueryCacheParams> sample_streamer; 1043 SamplesStreamer<QueryCacheParams> sample_streamer;
891 TFBCounterStreamer<QueryCacheParams> tfb_streamer; 1044 TFBCounterStreamer<QueryCacheParams> tfb_streamer;
1045 PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer;
892 1046
893 std::vector<std::pair<VAddr, VAddr>> little_cache; 1047 std::vector<std::pair<VAddr, VAddr>> little_cache;
894 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; 1048 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
@@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
1086 return &impl->sample_streamer; 1240 return &impl->sample_streamer;
1087 case QueryType::StreamingByteCount: 1241 case QueryType::StreamingByteCount:
1088 return &impl->tfb_streamer; 1242 return &impl->tfb_streamer;
1243 case QueryType::StreamingPrimitivesSucceeded:
1244 return &impl->primitives_succeeded_streamer;
1089 default: 1245 default:
1090 return nullptr; 1246 return nullptr;
1091 } 1247 }