diff options
| author | 2023-08-06 09:38:16 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:30 +0200 | |
| commit | 282ae8fa51e060e6d4ef026b734aa871b1b9331e (patch) | |
| tree | 3bc4603b6add0582315dc65544f1986427e4182d | |
| parent | QueryCache: Implement dependant queries. (diff) | |
| download | yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.gz yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.xz yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.zip | |
Query Cache: address issues
21 files changed, 270 insertions, 214 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f91b7d1e4..9e90c587c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 276 | } | 276 | } |
| 277 | 277 | ||
| 278 | template <class P> | 278 | template <class P> |
| 279 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, | 279 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( |
| 280 | ObtainBufferSynchronize sync_info, | 280 | VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { |
| 281 | ObtainBufferOperation post_op) { | ||
| 282 | const BufferId buffer_id = FindBuffer(cpu_addr, size); | 281 | const BufferId buffer_id = FindBuffer(cpu_addr, size); |
| 283 | Buffer& buffer = slot_buffers[buffer_id]; | 282 | Buffer& buffer = slot_buffers[buffer_id]; |
| 284 | 283 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 9507071e5..c4f6e8d12 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -297,8 +297,8 @@ public: | |||
| 297 | ObtainBufferOperation post_op); | 297 | ObtainBufferOperation post_op); |
| 298 | 298 | ||
| 299 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | 299 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, |
| 300 | ObtainBufferSynchronize sync_info, | 300 | ObtainBufferSynchronize sync_info, |
| 301 | ObtainBufferOperation post_op); | 301 | ObtainBufferOperation post_op); |
| 302 | void FlushCachedWrites(); | 302 | void FlushCachedWrites(); |
| 303 | 303 | ||
| 304 | /// Return true when there are uncommitted buffers to be downloaded | 304 | /// Return true when there are uncommitted buffers to be downloaded |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 922c399e6..46b9c548a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 596 | case Regs::ClearReport::ZPassPixelCount: | 596 | case Regs::ClearReport::ZPassPixelCount: |
| 597 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | 597 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); |
| 598 | break; | 598 | break; |
| 599 | case Regs::ClearReport::PrimitivesGenerated: | ||
| 600 | rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||
| 601 | break; | ||
| 602 | case Regs::ClearReport::VtgPrimitivesOut: | ||
| 603 | rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||
| 604 | break; | ||
| 605 | default: | 599 | default: |
| 606 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | 600 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); |
| 607 | break; | 601 | break; |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 582738234..8dd34c04a 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 82 | if (op == GpuSemaphoreOperation::WriteLong) { | 82 | if (op == GpuSemaphoreOperation::WriteLong) { |
| 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 84 | const u32 payload = regs.semaphore_sequence; | 84 | const u32 payload = regs.semaphore_sequence; |
| 85 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | 85 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, |
| 86 | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | ||
| 86 | } else { | 87 | } else { |
| 87 | do { | 88 | do { |
| 88 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | 89 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |
| @@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 117 | void Puller::ProcessSemaphoreRelease() { | 118 | void Puller::ProcessSemaphoreRelease() { |
| 118 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 119 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 119 | const u32 payload = regs.semaphore_release; | 120 | const u32 payload = regs.semaphore_release; |
| 120 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); | 121 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, |
| 122 | VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); | ||
| 121 | } | 123 | } |
| 122 | 124 | ||
| 123 | void Puller::ProcessSemaphoreAcquire() { | 125 | void Puller::ProcessSemaphoreAcquire() { |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8459a3092..805a89900 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -55,6 +55,9 @@ public: | |||
| 55 | 55 | ||
| 56 | // Unlike other fences, this one doesn't | 56 | // Unlike other fences, this one doesn't |
| 57 | void SignalOrdering() { | 57 | void SignalOrdering() { |
| 58 | if constexpr (!can_async_check) { | ||
| 59 | TryReleasePendingFences<false>(); | ||
| 60 | } | ||
| 58 | std::scoped_lock lock{buffer_cache.mutex}; | 61 | std::scoped_lock lock{buffer_cache.mutex}; |
| 59 | buffer_cache.AccumulateFlushes(); | 62 | buffer_cache.AccumulateFlushes(); |
| 60 | } | 63 | } |
| @@ -104,13 +107,9 @@ public: | |||
| 104 | SignalFence(std::move(func)); | 107 | SignalFence(std::move(func)); |
| 105 | } | 108 | } |
| 106 | 109 | ||
| 107 | void WaitPendingFences(bool force) { | 110 | void WaitPendingFences([[maybe_unused]] bool force) { |
| 108 | if constexpr (!can_async_check) { | 111 | if constexpr (!can_async_check) { |
| 109 | if (force) { | 112 | TryReleasePendingFences<true>(); |
| 110 | TryReleasePendingFences<true>(); | ||
| 111 | } else { | ||
| 112 | TryReleasePendingFences<false>(); | ||
| 113 | } | ||
| 114 | } else { | 113 | } else { |
| 115 | if (!force) { | 114 | if (!force) { |
| 116 | return; | 115 | return; |
| @@ -125,7 +124,8 @@ public: | |||
| 125 | }); | 124 | }); |
| 126 | SignalFence(std::move(func)); | 125 | SignalFence(std::move(func)); |
| 127 | std::unique_lock lk(wait_mutex); | 126 | std::unique_lock lk(wait_mutex); |
| 128 | wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); | 127 | wait_cv.wait( |
| 128 | lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); | ||
| 129 | } | 129 | } |
| 130 | } | 130 | } |
| 131 | 131 | ||
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h index 4246a609d..420927091 100644 --- a/src/video_core/query_cache/bank_base.h +++ b/src/video_core/query_cache/bank_base.h | |||
| @@ -7,21 +7,19 @@ | |||
| 7 | #include <deque> | 7 | #include <deque> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | 9 | ||
| 10 | |||
| 11 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 12 | 11 | ||
| 13 | namespace VideoCommon { | 12 | namespace VideoCommon { |
| 14 | 13 | ||
| 15 | class BankBase { | 14 | class BankBase { |
| 16 | protected: | 15 | protected: |
| 17 | const size_t base_bank_size; | 16 | const size_t base_bank_size{}; |
| 18 | size_t bank_size; | 17 | size_t bank_size{}; |
| 19 | std::atomic<size_t> references; | 18 | std::atomic<size_t> references{}; |
| 20 | size_t current_slot; | 19 | size_t current_slot{}; |
| 21 | 20 | ||
| 22 | public: | 21 | public: |
| 23 | BankBase(size_t bank_size_) | 22 | explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {} |
| 24 | : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} | ||
| 25 | 23 | ||
| 26 | virtual ~BankBase() = default; | 24 | virtual ~BankBase() = default; |
| 27 | 25 | ||
| @@ -58,11 +56,11 @@ public: | |||
| 58 | bank_size = current_slot; | 56 | bank_size = current_slot; |
| 59 | } | 57 | } |
| 60 | 58 | ||
| 61 | constexpr bool IsClosed() { | 59 | bool IsClosed() const { |
| 62 | return current_slot >= bank_size; | 60 | return current_slot >= bank_size; |
| 63 | } | 61 | } |
| 64 | 62 | ||
| 65 | bool IsDead() { | 63 | bool IsDead() const { |
| 66 | return IsClosed() && references == 0; | 64 | return IsClosed() && references == 0; |
| 67 | } | 65 | } |
| 68 | }; | 66 | }; |
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 0ae23af9f..993a13eac 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -9,28 +9,28 @@ | |||
| 9 | namespace VideoCommon { | 9 | namespace VideoCommon { |
| 10 | 10 | ||
| 11 | enum class QueryFlagBits : u32 { | 11 | enum class QueryFlagBits : u32 { |
| 12 | HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. | 12 | HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. |
| 13 | IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host | 13 | IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host |
| 14 | IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host | 14 | IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host |
| 15 | IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. | 15 | IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. |
| 16 | IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query | 16 | IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query |
| 17 | IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query | 17 | IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query |
| 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. | 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. |
| 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. | 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. |
| 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. | 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. |
| 21 | IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment | 21 | IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment |
| 22 | }; | 22 | }; |
| 23 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | 23 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) |
| 24 | 24 | ||
| 25 | class QueryBase { | 25 | class QueryBase { |
| 26 | public: | 26 | public: |
| 27 | VAddr guest_address; | 27 | VAddr guest_address{}; |
| 28 | QueryFlagBits flags; | 28 | QueryFlagBits flags{}; |
| 29 | u64 value; | 29 | u64 value{}; |
| 30 | 30 | ||
| 31 | protected: | 31 | protected: |
| 32 | // Default constructor | 32 | // Default constructor |
| 33 | QueryBase() : guest_address(0), flags{}, value{} {} | 33 | QueryBase() = default; |
| 34 | 34 | ||
| 35 | // Parameterized constructor | 35 | // Parameterized constructor |
| 36 | QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | 36 | QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) |
| @@ -51,23 +51,21 @@ public: | |||
| 51 | class HostQueryBase : public QueryBase { | 51 | class HostQueryBase : public QueryBase { |
| 52 | public: | 52 | public: |
| 53 | // Default constructor | 53 | // Default constructor |
| 54 | HostQueryBase() | 54 | HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {} |
| 55 | : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, | ||
| 56 | size_banks{}, start_slot{}, size_slots{} {} | ||
| 57 | 55 | ||
| 58 | // Parameterized constructor | 56 | // Parameterized constructor |
| 59 | HostQueryBase(bool isLong, VAddr address) | 57 | HostQueryBase(bool has_timestamp, VAddr address) |
| 60 | : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, | 58 | : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, |
| 61 | start_slot{}, size_slots{} { | 59 | start_slot{}, size_slots{} { |
| 62 | if (isLong) { | 60 | if (has_timestamp) { |
| 63 | flags |= QueryFlagBits::HasTimestamp; | 61 | flags |= QueryFlagBits::HasTimestamp; |
| 64 | } | 62 | } |
| 65 | } | 63 | } |
| 66 | 64 | ||
| 67 | u32 start_bank_id; | 65 | u32 start_bank_id{}; |
| 68 | u32 size_banks; | 66 | u32 size_banks{}; |
| 69 | size_t start_slot; | 67 | size_t start_slot{}; |
| 70 | size_t size_slots; | 68 | size_t size_slots{}; |
| 71 | }; | 69 | }; |
| 72 | 70 | ||
| 73 | } // namespace VideoCommon \ No newline at end of file | 71 | } // namespace VideoCommon \ No newline at end of file |
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f1393d5c7..042af053c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -54,7 +54,7 @@ public: | |||
| 54 | return new_id; | 54 | return new_id; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | bool HasPendingSync() override { | 57 | bool HasPendingSync() const override { |
| 58 | return !pending_sync.empty(); | 58 | return !pending_sync.empty(); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| @@ -71,8 +71,10 @@ public: | |||
| 71 | continue; | 71 | continue; |
| 72 | } | 72 | } |
| 73 | query.flags |= QueryFlagBits::IsHostSynced; | 73 | query.flags |= QueryFlagBits::IsHostSynced; |
| 74 | sync_values.emplace_back(query.guest_address, query.value, | 74 | sync_values.emplace_back(SyncValuesStruct{ |
| 75 | True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); | 75 | .address = query.guest_address, |
| 76 | .value = query.value, | ||
| 77 | .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); | ||
| 76 | } | 78 | } |
| 77 | pending_sync.clear(); | 79 | pending_sync.clear(); |
| 78 | if (sync_values.size() > 0) { | 80 | if (sync_values.size() > 0) { |
| @@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> { | |||
| 90 | public: | 92 | public: |
| 91 | using RuntimeType = typename Traits::RuntimeType; | 93 | using RuntimeType = typename Traits::RuntimeType; |
| 92 | 94 | ||
| 93 | StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} | 95 | StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) |
| 96 | : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {} | ||
| 94 | 97 | ||
| 95 | ~StubStreamer() override = default; | 98 | ~StubStreamer() override = default; |
| 96 | 99 | ||
| 97 | size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, | 100 | size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, |
| 98 | std::optional<u32> subreport = std::nullopt) override { | 101 | std::optional<u32> subreport = std::nullopt) override { |
| 99 | size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); | 102 | size_t new_id = |
| 103 | GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport); | ||
| 100 | return new_id; | 104 | return new_id; |
| 101 | } | 105 | } |
| 106 | |||
| 107 | private: | ||
| 108 | u32 stub_value; | ||
| 102 | }; | 109 | }; |
| 103 | 110 | ||
| 104 | template <typename Traits> | 111 | template <typename Traits> |
| @@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 113 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | 120 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |
| 114 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | 121 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |
| 115 | if (streamers[i]) { | 122 | if (streamers[i]) { |
| 116 | streamer_mask |= 1ULL << i; | 123 | streamer_mask |= 1ULL << streamers[i]->GetId(); |
| 117 | } | 124 | } |
| 118 | } | 125 | } |
| 119 | } | 126 | } |
| @@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 152 | QueryCacheBase<Traits>* owner; | 159 | QueryCacheBase<Traits>* owner; |
| 153 | VideoCore::RasterizerInterface& rasterizer; | 160 | VideoCore::RasterizerInterface& rasterizer; |
| 154 | Core::Memory::Memory& cpu_memory; | 161 | Core::Memory::Memory& cpu_memory; |
| 155 | Traits::RuntimeType& runtime; | 162 | RuntimeType& runtime; |
| 156 | Tegra::GPU& gpu; | 163 | Tegra::GPU& gpu; |
| 157 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | 164 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |
| 158 | u64 streamer_mask; | 165 | u64 streamer_mask; |
| @@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 223 | const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); | 230 | const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); |
| 224 | size_t streamer_id = static_cast<size_t>(counter_type); | 231 | size_t streamer_id = static_cast<size_t>(counter_type); |
| 225 | auto* streamer = impl->streamers[streamer_id]; | 232 | auto* streamer = impl->streamers[streamer_id]; |
| 226 | if (!streamer) [[unlikely]] { | 233 | if (streamer == nullptr) [[unlikely]] { |
| 227 | if (has_timestamp) { | 234 | counter_type = QueryType::Payload; |
| 228 | u64 timestamp = impl->gpu.GetTicks(); | 235 | payload = 1U; |
| 229 | gpu_memory->Write<u64>(addr + 8, timestamp); | 236 | streamer_id = static_cast<size_t>(counter_type); |
| 230 | gpu_memory->Write<u64>(addr, 1ULL); | 237 | streamer = impl->streamers[streamer_id]; |
| 231 | } else { | ||
| 232 | gpu_memory->Write<u32>(addr, 1U); | ||
| 233 | } | ||
| 234 | return; | ||
| 235 | } | 238 | } |
| 236 | auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); | 239 | auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); |
| 237 | if (!cpu_addr_opt) [[unlikely]] { | 240 | if (!cpu_addr_opt) [[unlikely]] { |
| @@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | |||
| 403 | impl->runtime.EndHostConditionalRendering(); | 406 | impl->runtime.EndHostConditionalRendering(); |
| 404 | return false; | 407 | return false; |
| 405 | } | 408 | } |
| 406 | /*if (!Settings::IsGPULevelHigh()) { | ||
| 407 | impl->runtime.EndHostConditionalRendering(); | ||
| 408 | return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, | ||
| 409 | VideoCommon::CacheType::BufferCache | | ||
| 410 | VideoCommon::CacheType::QueryCache); | ||
| 411 | }*/ | ||
| 412 | const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); | 409 | const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); |
| 413 | const GPUVAddr address = regs.render_enable.Address(); | 410 | const GPUVAddr address = regs.render_enable.Address(); |
| 414 | switch (mode) { | 411 | switch (mode) { |
| @@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | |||
| 442 | // Async downloads | 439 | // Async downloads |
| 443 | template <typename Traits> | 440 | template <typename Traits> |
| 444 | void QueryCacheBase<Traits>::CommitAsyncFlushes() { | 441 | void QueryCacheBase<Traits>::CommitAsyncFlushes() { |
| 442 | // Make sure to have the results synced in Host. | ||
| 443 | NotifyWFI(); | ||
| 444 | |||
| 445 | u64 mask{}; | 445 | u64 mask{}; |
| 446 | { | 446 | { |
| 447 | std::scoped_lock lk(impl->flush_guard); | 447 | std::scoped_lock lk(impl->flush_guard); |
| @@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() { | |||
| 458 | if (mask == 0) { | 458 | if (mask == 0) { |
| 459 | return; | 459 | return; |
| 460 | } | 460 | } |
| 461 | impl->ForEachStreamerIn(mask, | 461 | u64 ran_mask = ~mask; |
| 462 | [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); | 462 | while (mask) { |
| 463 | impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { | ||
| 464 | u64 dep_mask = streamer->GetDependentMask(); | ||
| 465 | if ((dep_mask & ~ran_mask) != 0) { | ||
| 466 | return; | ||
| 467 | } | ||
| 468 | u64 index = streamer->GetId(); | ||
| 469 | ran_mask |= (1ULL << index); | ||
| 470 | mask &= ~(1ULL << index); | ||
| 471 | streamer->PushUnsyncedQueries(); | ||
| 472 | }); | ||
| 473 | } | ||
| 463 | } | 474 | } |
| 464 | 475 | ||
| 465 | template <typename Traits> | 476 | template <typename Traits> |
| @@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() { | |||
| 489 | if (mask == 0) { | 500 | if (mask == 0) { |
| 490 | return; | 501 | return; |
| 491 | } | 502 | } |
| 492 | u64 ran_mask = 0; | 503 | u64 ran_mask = ~mask; |
| 493 | u64 next_phase = 0; | ||
| 494 | while (mask) { | 504 | while (mask) { |
| 495 | impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { | 505 | impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { |
| 496 | u64 dep_mask = streamer->GetDependenceMask(); | 506 | u64 dep_mask = streamer->GetDependenceMask(); |
| 497 | if ((dep_mask & ~ran_mask) != 0) { | 507 | if ((dep_mask & ~ran_mask) != 0) { |
| 498 | next_phase |= dep_mask; | ||
| 499 | return; | 508 | return; |
| 500 | } | 509 | } |
| 501 | u64 index = streamer->GetId(); | 510 | u64 index = streamer->GetId(); |
| @@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() { | |||
| 503 | mask &= ~(1ULL << index); | 512 | mask &= ~(1ULL << index); |
| 504 | streamer->PopUnsyncedQueries(); | 513 | streamer->PopUnsyncedQueries(); |
| 505 | }); | 514 | }); |
| 506 | ran_mask |= next_phase; | ||
| 507 | } | 515 | } |
| 508 | } | 516 | } |
| 509 | 517 | ||
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h index 55f508dd1..07be421c6 100644 --- a/src/video_core/query_cache/query_cache_base.h +++ b/src/video_core/query_cache/query_cache_base.h | |||
| @@ -47,7 +47,7 @@ public: | |||
| 47 | BitField<0, 27, u32> query_id; | 47 | BitField<0, 27, u32> query_id; |
| 48 | u32 raw; | 48 | u32 raw; |
| 49 | 49 | ||
| 50 | std::pair<size_t, size_t> unpack() { | 50 | std::pair<size_t, size_t> unpack() const { |
| 51 | return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; | 51 | return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; |
| 52 | } | 52 | } |
| 53 | }; | 53 | }; |
| @@ -73,7 +73,7 @@ public: | |||
| 73 | } | 73 | } |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | static u64 BuildMask(std::span<QueryType> types) { | 76 | static u64 BuildMask(std::span<const QueryType> types) { |
| 77 | u64 mask = 0; | 77 | u64 mask = 0; |
| 78 | for (auto query_type : types) { | 78 | for (auto query_type : types) { |
| 79 | mask |= 1ULL << (static_cast<u64>(query_type)); | 79 | mask |= 1ULL << (static_cast<u64>(query_type)); |
| @@ -160,7 +160,7 @@ protected: | |||
| 160 | } | 160 | } |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; | 163 | using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; |
| 164 | 164 | ||
| 165 | void InvalidateQuery(QueryLocation location); | 165 | void InvalidateQuery(QueryLocation location); |
| 166 | bool IsQueryDirty(QueryLocation location); | 166 | bool IsQueryDirty(QueryLocation location); |
| @@ -175,7 +175,7 @@ protected: | |||
| 175 | friend struct QueryCacheBaseImpl; | 175 | friend struct QueryCacheBaseImpl; |
| 176 | friend RuntimeType; | 176 | friend RuntimeType; |
| 177 | 177 | ||
| 178 | std::unique_ptr<struct QueryCacheBaseImpl> impl; | 178 | std::unique_ptr<QueryCacheBaseImpl> impl; |
| 179 | }; | 179 | }; |
| 180 | 180 | ||
| 181 | } // namespace VideoCommon \ No newline at end of file | 181 | } // namespace VideoCommon \ No newline at end of file |
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index 0e9275565..e7aac955b 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h | |||
| @@ -16,7 +16,7 @@ namespace VideoCommon { | |||
| 16 | 16 | ||
| 17 | class StreamerInterface { | 17 | class StreamerInterface { |
| 18 | public: | 18 | public: |
| 19 | StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} | 19 | explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {} |
| 20 | virtual ~StreamerInterface() = default; | 20 | virtual ~StreamerInterface() = default; |
| 21 | 21 | ||
| 22 | virtual QueryBase* GetQuery(size_t id) = 0; | 22 | virtual QueryBase* GetQuery(size_t id) = 0; |
| @@ -37,7 +37,7 @@ public: | |||
| 37 | /* Do Nothing */ | 37 | /* Do Nothing */ |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | virtual bool HasPendingSync() { | 40 | virtual bool HasPendingSync() const { |
| 41 | return false; | 41 | return false; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| @@ -52,7 +52,7 @@ public: | |||
| 52 | virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 52 | virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
| 53 | std::optional<u32> subreport = std::nullopt) = 0; | 53 | std::optional<u32> subreport = std::nullopt) = 0; |
| 54 | 54 | ||
| 55 | virtual bool HasUnsyncedQueries() { | 55 | virtual bool HasUnsyncedQueries() const { |
| 56 | return false; | 56 | return false; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| @@ -71,18 +71,28 @@ public: | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | u64 GetDependenceMask() const { | 73 | u64 GetDependenceMask() const { |
| 74 | return dependance_mask; | 74 | return dependence_mask; |
| 75 | } | ||
| 76 | |||
| 77 | u64 GetDependentMask() const { | ||
| 78 | return dependence_mask; | ||
| 75 | } | 79 | } |
| 76 | 80 | ||
| 77 | protected: | 81 | protected: |
| 82 | void MakeDependent(StreamerInterface* depend_on) { | ||
| 83 | dependence_mask |= 1ULL << depend_on->id; | ||
| 84 | depend_on->dependent_mask |= 1ULL << id; | ||
| 85 | } | ||
| 86 | |||
| 78 | const size_t id; | 87 | const size_t id; |
| 79 | const u64 dependance_mask; | 88 | u64 dependence_mask; |
| 89 | u64 dependent_mask; | ||
| 80 | }; | 90 | }; |
| 81 | 91 | ||
| 82 | template <typename QueryType> | 92 | template <typename QueryType> |
| 83 | class SimpleStreamer : public StreamerInterface { | 93 | class SimpleStreamer : public StreamerInterface { |
| 84 | public: | 94 | public: |
| 85 | SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} | 95 | explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {} |
| 86 | virtual ~SimpleStreamer() = default; | 96 | virtual ~SimpleStreamer() = default; |
| 87 | 97 | ||
| 88 | protected: | 98 | protected: |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2ba7cbb0d..af1469147 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -9,10 +9,10 @@ | |||
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/polyfill_thread.h" | 11 | #include "common/polyfill_thread.h" |
| 12 | #include "video_core/query_cache/types.h" | ||
| 13 | #include "video_core/cache_types.h" | 12 | #include "video_core/cache_types.h" |
| 14 | #include "video_core/engines/fermi_2d.h" | 13 | #include "video_core/engines/fermi_2d.h" |
| 15 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/query_cache/types.h" | ||
| 16 | #include "video_core/rasterizer_download_area.h" | 16 | #include "video_core/rasterizer_download_area.h" |
| 17 | 17 | ||
| 18 | namespace Tegra { | 18 | namespace Tegra { |
| @@ -57,7 +57,8 @@ public: | |||
| 57 | virtual void ResetCounter(VideoCommon::QueryType type) = 0; | 57 | virtual void ResetCounter(VideoCommon::QueryType type) = 0; |
| 58 | 58 | ||
| 59 | /// Records a GPU query and caches it | 59 | /// Records a GPU query and caches it |
| 60 | virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; | 60 | virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 61 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; | ||
| 61 | 62 | ||
| 62 | /// Signal an uniform buffer binding | 63 | /// Signal an uniform buffer binding |
| 63 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 64 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 57a8c4c85..23001eeb8 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -43,7 +43,8 @@ public: | |||
| 43 | void Clear(u32 layer_count) override; | 43 | void Clear(u32 layer_count) override; |
| 44 | void DispatchCompute() override; | 44 | void DispatchCompute() override; |
| 45 | void ResetCounter(VideoCommon::QueryType type) override; | 45 | void ResetCounter(VideoCommon::QueryType type) override; |
| 46 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | 46 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 47 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||
| 47 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 48 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 48 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 49 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 49 | void FlushAll() override; | 50 | void FlushAll() override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a975bbe75..27e2de1bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | |||
| 405 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | 405 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 406 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | 406 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 407 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { | 407 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { |
| 408 | std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout) | ||
| 409 | ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt }; | ||
| 410 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | 408 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { |
| 411 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | 409 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); |
| 412 | } else { | 410 | } else { |
| @@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | |||
| 414 | } | 412 | } |
| 415 | return; | 413 | return; |
| 416 | } | 414 | } |
| 417 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | 415 | if (type != VideoCommon::QueryType::Payload) { |
| 418 | u64 ticks = gpu.GetTicks(); | 416 | payload = 1u; |
| 419 | gpu_memory->Write<u64>(gpu_addr + 8, ticks); | 417 | } |
| 420 | gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); | 418 | std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() { |
| 421 | } else { | 419 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { |
| 422 | gpu_memory->Write<u32>(gpu_addr, payload); | 420 | u64 ticks = gpu.GetTicks(); |
| 421 | memory_manager->Write<u64>(gpu_addr + 8, ticks); | ||
| 422 | memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||
| 423 | } else { | ||
| 424 | memory_manager->Write<u32>(gpu_addr, payload); | ||
| 425 | } | ||
| 426 | }); | ||
| 427 | if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) { | ||
| 428 | SignalFence(std::move(func)); | ||
| 429 | return; | ||
| 423 | } | 430 | } |
| 431 | func(); | ||
| 424 | } | 432 | } |
| 425 | 433 | ||
| 426 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 434 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 05e048e15..ceffe1f1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -87,7 +87,8 @@ public: | |||
| 87 | void Clear(u32 layer_count) override; | 87 | void Clear(u32 layer_count) override; |
| 88 | void DispatchCompute() override; | 88 | void DispatchCompute() override; |
| 89 | void ResetCounter(VideoCommon::QueryType type) override; | 89 | void ResetCounter(VideoCommon::QueryType type) override; |
| 90 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | 90 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 91 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||
| 91 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 92 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 92 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 93 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 93 | void FlushAll() override; | 94 | void FlushAll() override; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 97cd4521d..039dc95e1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 303 | return {staging.buffer, staging.offset}; | 303 | return {staging.buffer, staging.offset}; |
| 304 | } | 304 | } |
| 305 | 305 | ||
| 306 | ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, | 306 | ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( |
| 307 | Scheduler& scheduler_, | 307 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 308 | DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | 308 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_) |
| 309 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | 309 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 310 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, | 310 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, |
| 311 | RESOLVE_CONDITIONAL_RENDER_COMP_SPV), | 311 | RESOLVE_CONDITIONAL_RENDER_COMP_SPV), |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 14fc5ad71..336573574 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -7,8 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | #include "video_core/fence_manager.h" | 8 | #include "video_core/fence_manager.h" |
| 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 12 | 12 | ||
| 13 | namespace Core { | 13 | namespace Core { |
| 14 | class System; | 14 | class System; |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ef891e26b..add0c6fb3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -11,11 +11,9 @@ | |||
| 11 | #include <utility> | 11 | #include <utility> |
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | 13 | ||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 18 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 16 | #include "video_core/engines/draw_manager.h" | ||
| 19 | #include "video_core/query_cache/query_cache.h" | 17 | #include "video_core/query_cache/query_cache.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 18 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 19 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -30,6 +28,7 @@ | |||
| 30 | 28 | ||
| 31 | namespace Vulkan { | 29 | namespace Vulkan { |
| 32 | 30 | ||
| 31 | using Tegra::Engines::Maxwell3D; | ||
| 33 | using VideoCommon::QueryType; | 32 | using VideoCommon::QueryType; |
| 34 | 33 | ||
| 35 | namespace { | 34 | namespace { |
| @@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase { | |||
| 37 | public: | 36 | public: |
| 38 | static constexpr size_t BANK_SIZE = 256; | 37 | static constexpr size_t BANK_SIZE = 256; |
| 39 | static constexpr size_t QUERY_SIZE = 8; | 38 | static constexpr size_t QUERY_SIZE = 8; |
| 40 | SamplesQueryBank(const Device& device_, size_t index_) | 39 | explicit SamplesQueryBank(const Device& device_, size_t index_) |
| 41 | : BankBase(BANK_SIZE), device{device_}, index{index_} { | 40 | : BankBase(BANK_SIZE), device{device_}, index{index_} { |
| 42 | const auto& dev = device.GetLogical(); | 41 | const auto& dev = device.GetLogical(); |
| 43 | query_pool = dev.CreateQueryPool({ | 42 | query_pool = dev.CreateQueryPool({ |
| @@ -109,18 +108,19 @@ struct HostSyncValues { | |||
| 109 | static constexpr bool GeneratesBaseBuffer = false; | 108 | static constexpr bool GeneratesBaseBuffer = false; |
| 110 | }; | 109 | }; |
| 111 | 110 | ||
| 112 | template <typename Traits> | ||
| 113 | class SamplesStreamer : public BaseStreamer { | 111 | class SamplesStreamer : public BaseStreamer { |
| 114 | public: | 112 | public: |
| 115 | SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | 113 | explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, |
| 116 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) | 114 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) |
| 117 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | 115 | : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, |
| 118 | memory_allocator{memory_allocator_} { | 116 | memory_allocator{memory_allocator_} { |
| 119 | BuildResolveBuffer(); | 117 | BuildResolveBuffer(); |
| 120 | current_bank = nullptr; | 118 | current_bank = nullptr; |
| 121 | current_query = nullptr; | 119 | current_query = nullptr; |
| 122 | } | 120 | } |
| 123 | 121 | ||
| 122 | ~SamplesStreamer() = default; | ||
| 123 | |||
| 124 | void StartCounter() override { | 124 | void StartCounter() override { |
| 125 | if (has_started) { | 125 | if (has_started) { |
| 126 | return; | 126 | return; |
| @@ -157,7 +157,7 @@ public: | |||
| 157 | PauseCounter(); | 157 | PauseCounter(); |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | bool HasPendingSync() override { | 160 | bool HasPendingSync() const override { |
| 161 | return !pending_sync.empty(); | 161 | return !pending_sync.empty(); |
| 162 | } | 162 | } |
| 163 | 163 | ||
| @@ -198,7 +198,7 @@ public: | |||
| 198 | } | 198 | } |
| 199 | resolve_slots_remaining = resolve_slots; | 199 | resolve_slots_remaining = resolve_slots; |
| 200 | sync_values_stash.emplace_back(); | 200 | sync_values_stash.emplace_back(); |
| 201 | sync_values = sync_values = &sync_values_stash.back(); | 201 | sync_values = &sync_values_stash.back(); |
| 202 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | 202 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); |
| 203 | } | 203 | } |
| 204 | resolve_slots_remaining--; | 204 | resolve_slots_remaining--; |
| @@ -207,6 +207,7 @@ public: | |||
| 207 | const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * | 207 | const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * |
| 208 | (resolve_slots - resolve_slots_remaining - 1); | 208 | (resolve_slots - resolve_slots_remaining - 1); |
| 209 | VkQueryPool query_pool = bank->GetInnerPool(); | 209 | VkQueryPool query_pool = bank->GetInnerPool(); |
| 210 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 210 | scheduler.Record([start, amount, base_offset, query_pool, | 211 | scheduler.Record([start, amount, base_offset, query_pool, |
| 211 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { | 212 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { |
| 212 | size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; | 213 | size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; |
| @@ -284,7 +285,7 @@ public: | |||
| 284 | return index; | 285 | return index; |
| 285 | } | 286 | } |
| 286 | 287 | ||
| 287 | bool HasUnsyncedQueries() override { | 288 | bool HasUnsyncedQueries() const override { |
| 288 | return !pending_flush_queries.empty(); | 289 | return !pending_flush_queries.empty(); |
| 289 | } | 290 | } |
| 290 | 291 | ||
| @@ -348,8 +349,8 @@ private: | |||
| 348 | for (auto q : queries) { | 349 | for (auto q : queries) { |
| 349 | auto* query = GetQuery(q); | 350 | auto* query = GetQuery(q); |
| 350 | ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { | 351 | ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { |
| 351 | auto id = bank->GetIndex(); | 352 | auto id_ = bank->GetIndex(); |
| 352 | auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), | 353 | auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(), |
| 353 | std::numeric_limits<size_t>::min()); | 354 | std::numeric_limits<size_t>::min()); |
| 354 | auto& current_pair = pair.first->second; | 355 | auto& current_pair = pair.first->second; |
| 355 | current_pair.first = std::min(current_pair.first, start); | 356 | current_pair.first = std::min(current_pair.first, start); |
| @@ -434,13 +435,14 @@ private: | |||
| 434 | .pNext = nullptr, | 435 | .pNext = nullptr, |
| 435 | .flags = 0, | 436 | .flags = 0, |
| 436 | .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, | 437 | .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, |
| 437 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | 438 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | |
| 439 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||
| 438 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 440 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 439 | .queueFamilyIndexCount = 0, | 441 | .queueFamilyIndexCount = 0, |
| 440 | .pQueueFamilyIndices = nullptr, | 442 | .pQueueFamilyIndices = nullptr, |
| 441 | }; | 443 | }; |
| 442 | resolve_buffers.emplace_back( | 444 | resolve_buffers.emplace_back( |
| 443 | std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); | 445 | memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)); |
| 444 | } | 446 | } |
| 445 | 447 | ||
| 446 | static constexpr size_t resolve_slots = 8; | 448 | static constexpr size_t resolve_slots = 8; |
| @@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase { | |||
| 476 | public: | 478 | public: |
| 477 | static constexpr size_t BANK_SIZE = 1024; | 479 | static constexpr size_t BANK_SIZE = 1024; |
| 478 | static constexpr size_t QUERY_SIZE = 4; | 480 | static constexpr size_t QUERY_SIZE = 4; |
| 479 | TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) | 481 | explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, |
| 482 | size_t index_) | ||
| 480 | : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { | 483 | : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { |
| 481 | const VkBufferCreateInfo buffer_ci = { | 484 | const VkBufferCreateInfo buffer_ci = { |
| 482 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 485 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| @@ -525,22 +528,21 @@ private: | |||
| 525 | vk::Buffer buffer; | 528 | vk::Buffer buffer; |
| 526 | }; | 529 | }; |
| 527 | 530 | ||
| 528 | template <typename Traits> | ||
| 529 | class PrimitivesSucceededStreamer; | 531 | class PrimitivesSucceededStreamer; |
| 530 | 532 | ||
| 531 | template <typename Traits> | ||
| 532 | class TFBCounterStreamer : public BaseStreamer { | 533 | class TFBCounterStreamer : public BaseStreamer { |
| 533 | public: | 534 | public: |
| 534 | TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | 535 | explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, |
| 535 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, | 536 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, |
| 536 | StagingBufferPool& staging_pool_) | 537 | StagingBufferPool& staging_pool_) |
| 537 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | 538 | : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, |
| 538 | memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { | 539 | memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { |
| 539 | buffers_count = 0; | 540 | buffers_count = 0; |
| 540 | current_bank = nullptr; | 541 | current_bank = nullptr; |
| 541 | counter_buffers.fill(VK_NULL_HANDLE); | 542 | counter_buffers.fill(VK_NULL_HANDLE); |
| 542 | offsets.fill(0); | 543 | offsets.fill(0); |
| 543 | last_queries.fill(0); | 544 | last_queries.fill(0); |
| 545 | last_queries_stride.fill(1); | ||
| 544 | const VkBufferCreateInfo buffer_ci = { | 546 | const VkBufferCreateInfo buffer_ci = { |
| 545 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 547 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 546 | .pNext = nullptr, | 548 | .pNext = nullptr, |
| @@ -564,6 +566,8 @@ public: | |||
| 564 | } | 566 | } |
| 565 | } | 567 | } |
| 566 | 568 | ||
| 569 | ~TFBCounterStreamer() = default; | ||
| 570 | |||
| 567 | void StartCounter() override { | 571 | void StartCounter() override { |
| 568 | FlushBeginTFB(); | 572 | FlushBeginTFB(); |
| 569 | has_started = true; | 573 | has_started = true; |
| @@ -581,15 +585,15 @@ public: | |||
| 581 | if (has_flushed_end_pending) { | 585 | if (has_flushed_end_pending) { |
| 582 | FlushEndTFB(); | 586 | FlushEndTFB(); |
| 583 | } | 587 | } |
| 584 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | 588 | runtime.View3DRegs([this](Maxwell3D& maxwell3d) { |
| 585 | if (regs.transform_feedback_enabled == 0) { | 589 | if (maxwell3d.regs.transform_feedback_enabled == 0) { |
| 586 | streams_mask = 0; | 590 | streams_mask = 0; |
| 587 | has_started = false; | 591 | has_started = false; |
| 588 | } | 592 | } |
| 589 | }); | 593 | }); |
| 590 | } | 594 | } |
| 591 | 595 | ||
| 592 | bool HasPendingSync() override { | 596 | bool HasPendingSync() const override { |
| 593 | return !pending_sync.empty(); | 597 | return !pending_sync.empty(); |
| 594 | } | 598 | } |
| 595 | 599 | ||
| @@ -650,14 +654,19 @@ public: | |||
| 650 | return index; | 654 | return index; |
| 651 | } | 655 | } |
| 652 | 656 | ||
| 653 | std::optional<VAddr> GetLastQueryStream(size_t stream) { | 657 | std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { |
| 654 | if (last_queries[stream] != 0) { | 658 | if (last_queries[stream] != 0) { |
| 655 | return {last_queries[stream]}; | 659 | std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); |
| 660 | return result; | ||
| 656 | } | 661 | } |
| 657 | return std::nullopt; | 662 | return std::nullopt; |
| 658 | } | 663 | } |
| 659 | 664 | ||
| 660 | bool HasUnsyncedQueries() override { | 665 | Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const { |
| 666 | return out_topology; | ||
| 667 | } | ||
| 668 | |||
| 669 | bool HasUnsyncedQueries() const override { | ||
| 661 | return !pending_flush_queries.empty(); | 670 | return !pending_flush_queries.empty(); |
| 662 | } | 671 | } |
| 663 | 672 | ||
| @@ -762,15 +771,17 @@ private: | |||
| 762 | 771 | ||
| 763 | void UpdateBuffers() { | 772 | void UpdateBuffers() { |
| 764 | last_queries.fill(0); | 773 | last_queries.fill(0); |
| 765 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | 774 | last_queries_stride.fill(1); |
| 775 | runtime.View3DRegs([this](Maxwell3D& maxwell3d) { | ||
| 766 | buffers_count = 0; | 776 | buffers_count = 0; |
| 767 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | 777 | out_topology = maxwell3d.draw_manager->GetDrawState().topology; |
| 768 | i++) { | 778 | for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { |
| 769 | const auto& tf = regs.transform_feedback; | 779 | const auto& tf = maxwell3d.regs.transform_feedback; |
| 770 | if (tf.buffers[i].enable == 0) { | 780 | if (tf.buffers[i].enable == 0) { |
| 771 | continue; | 781 | continue; |
| 772 | } | 782 | } |
| 773 | const size_t stream = tf.controls[i].stream; | 783 | const size_t stream = tf.controls[i].stream; |
| 784 | last_queries_stride[stream] = tf.controls[i].stride; | ||
| 774 | streams_mask |= 1ULL << stream; | 785 | streams_mask |= 1ULL << stream; |
| 775 | buffers_count = std::max<size_t>(buffers_count, stream + 1); | 786 | buffers_count = std::max<size_t>(buffers_count, stream + 1); |
| 776 | } | 787 | } |
| @@ -785,7 +796,8 @@ private: | |||
| 785 | }); | 796 | }); |
| 786 | current_bank = &bank_pool.GetBank(current_bank_id); | 797 | current_bank = &bank_pool.GetBank(current_bank_id); |
| 787 | } | 798 | } |
| 788 | auto [dont_care, slot] = current_bank->Reserve(); | 799 | auto [dont_care, other] = current_bank->Reserve(); |
| 800 | const size_t slot = other; // workaround to compile bug. | ||
| 789 | current_bank->AddReference(); | 801 | current_bank->AddReference(); |
| 790 | 802 | ||
| 791 | static constexpr VkMemoryBarrier READ_BARRIER{ | 803 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| @@ -818,11 +830,9 @@ private: | |||
| 818 | return {current_bank_id, slot}; | 830 | return {current_bank_id, slot}; |
| 819 | } | 831 | } |
| 820 | 832 | ||
| 821 | template <typename Traits> | ||
| 822 | friend class PrimitivesSucceededStreamer; | 833 | friend class PrimitivesSucceededStreamer; |
| 823 | 834 | ||
| 824 | static constexpr size_t NUM_STREAMS = 4; | 835 | static constexpr size_t NUM_STREAMS = 4; |
| 825 | static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; | ||
| 826 | 836 | ||
| 827 | QueryCacheRuntime& runtime; | 837 | QueryCacheRuntime& runtime; |
| 828 | const Device& device; | 838 | const Device& device; |
| @@ -851,6 +861,8 @@ private: | |||
| 851 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | 861 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; |
| 852 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | 862 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; |
| 853 | std::array<VAddr, NUM_STREAMS> last_queries; | 863 | std::array<VAddr, NUM_STREAMS> last_queries; |
| 864 | std::array<size_t, NUM_STREAMS> last_queries_stride; | ||
| 865 | Maxwell3D::Regs::PrimitiveTopology out_topology; | ||
| 854 | u64 streams_mask; | 866 | u64 streams_mask; |
| 855 | }; | 867 | }; |
| 856 | 868 | ||
| @@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase { | |||
| 858 | public: | 870 | public: |
| 859 | // Default constructor | 871 | // Default constructor |
| 860 | PrimitivesQueryBase() | 872 | PrimitivesQueryBase() |
| 861 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, | 873 | : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} |
| 862 | dependant_index{}, dependant_manage{} {} | ||
| 863 | 874 | ||
| 864 | // Parameterized constructor | 875 | // Parameterized constructor |
| 865 | PrimitivesQueryBase(bool is_long, VAddr address) | 876 | PrimitivesQueryBase(bool has_timestamp, VAddr address) |
| 866 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, | 877 | : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { |
| 867 | dependant_index{}, dependant_manage{} { | 878 | if (has_timestamp) { |
| 868 | if (is_long) { | ||
| 869 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; | 879 | flags |= VideoCommon::QueryFlagBits::HasTimestamp; |
| 870 | } | 880 | } |
| 871 | } | 881 | } |
| 872 | 882 | ||
| 873 | u64 stride; | 883 | u64 stride{}; |
| 874 | VAddr dependant_address; | 884 | VAddr dependant_address{}; |
| 875 | size_t dependant_index; | 885 | Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; |
| 876 | bool dependant_manage; | 886 | size_t dependant_index{}; |
| 887 | bool dependant_manage{}; | ||
| 877 | }; | 888 | }; |
| 878 | 889 | ||
| 879 | template <typename Traits> | ||
| 880 | class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { | 890 | class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { |
| 881 | public: | 891 | public: |
| 882 | PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, | 892 | explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, |
| 883 | TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_) | 893 | TFBCounterStreamer& tfb_streamer_, |
| 884 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>( | 894 | Core::Memory::Memory& cpu_memory_) |
| 885 | id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)), | 895 | : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, |
| 886 | runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} | 896 | tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { |
| 897 | MakeDependent(&tfb_streamer); | ||
| 898 | } | ||
| 899 | |||
| 900 | ~PrimitivesSucceededStreamer() = default; | ||
| 887 | 901 | ||
| 888 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 902 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
| 889 | std::optional<u32> subreport_) override { | 903 | std::optional<u32> subreport_) override { |
| @@ -901,8 +915,11 @@ public: | |||
| 901 | const size_t subreport = static_cast<size_t>(*subreport_); | 915 | const size_t subreport = static_cast<size_t>(*subreport_); |
| 902 | auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); | 916 | auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); |
| 903 | bool must_manage_dependance = false; | 917 | bool must_manage_dependance = false; |
| 918 | new_query->topology = tfb_streamer.GetOutputTopology(); | ||
| 904 | if (dependant_address_opt) { | 919 | if (dependant_address_opt) { |
| 905 | new_query->dependant_address = *dependant_address_opt; | 920 | auto [dep_address, stride] = *dependant_address_opt; |
| 921 | new_query->dependant_address = dep_address; | ||
| 922 | new_query->stride = stride; | ||
| 906 | } else { | 923 | } else { |
| 907 | new_query->dependant_index = | 924 | new_query->dependant_index = |
| 908 | tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); | 925 | tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); |
| @@ -917,25 +934,28 @@ public: | |||
| 917 | } | 934 | } |
| 918 | return index; | 935 | return index; |
| 919 | } | 936 | } |
| 937 | new_query->stride = 1; | ||
| 938 | runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) { | ||
| 939 | for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { | ||
| 940 | const auto& tf = maxwell3d.regs.transform_feedback; | ||
| 941 | if (tf.buffers[i].enable == 0) { | ||
| 942 | continue; | ||
| 943 | } | ||
| 944 | if (tf.controls[i].stream != subreport) { | ||
| 945 | continue; | ||
| 946 | } | ||
| 947 | new_query->stride = tf.controls[i].stride; | ||
| 948 | break; | ||
| 949 | } | ||
| 950 | }); | ||
| 920 | } | 951 | } |
| 921 | 952 | ||
| 922 | new_query->dependant_manage = must_manage_dependance; | 953 | new_query->dependant_manage = must_manage_dependance; |
| 923 | runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 924 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | ||
| 925 | i++) { | ||
| 926 | const auto& tf = regs.transform_feedback; | ||
| 927 | if (tf.controls[i].stream != subreport) { | ||
| 928 | continue; | ||
| 929 | } | ||
| 930 | new_query->stride = tf.controls[i].stride; | ||
| 931 | break; | ||
| 932 | } | ||
| 933 | }); | ||
| 934 | pending_flush_queries.push_back(index); | 954 | pending_flush_queries.push_back(index); |
| 935 | return index; | 955 | return index; |
| 936 | } | 956 | } |
| 937 | 957 | ||
| 938 | bool HasUnsyncedQueries() override { | 958 | bool HasUnsyncedQueries() const override { |
| 939 | return !pending_flush_queries.empty(); | 959 | return !pending_flush_queries.empty(); |
| 940 | } | 960 | } |
| 941 | 961 | ||
| @@ -960,22 +980,49 @@ public: | |||
| 960 | } | 980 | } |
| 961 | 981 | ||
| 962 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | 982 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; |
| 983 | u64 num_vertices = 0; | ||
| 963 | if (query->dependant_manage) { | 984 | if (query->dependant_manage) { |
| 964 | auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); | 985 | auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); |
| 965 | query->value = dependant_query->value / query->stride; | 986 | num_vertices = dependant_query->value / query->stride; |
| 966 | tfb_streamer.Free(query->dependant_index); | 987 | tfb_streamer.Free(query->dependant_index); |
| 967 | } else { | 988 | } else { |
| 968 | u8* pointer = cpu_memory.GetPointer(query->dependant_address); | 989 | u8* pointer = cpu_memory.GetPointer(query->dependant_address); |
| 969 | u32 result; | 990 | u32 result; |
| 970 | std::memcpy(&result, pointer, sizeof(u32)); | 991 | std::memcpy(&result, pointer, sizeof(u32)); |
| 971 | query->value = static_cast<u64>(result) / query->stride; | 992 | num_vertices = static_cast<u64>(result) / query->stride; |
| 972 | } | 993 | } |
| 994 | query->value = [&]() -> u64 { | ||
| 995 | switch (query->topology) { | ||
| 996 | case Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 997 | return num_vertices; | ||
| 998 | case Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 999 | return num_vertices / 2; | ||
| 1000 | case Maxwell3D::Regs::PrimitiveTopology::LineLoop: | ||
| 1001 | return (num_vertices / 2) + 1; | ||
| 1002 | case Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 1003 | return num_vertices - 1; | ||
| 1004 | case Maxwell3D::Regs::PrimitiveTopology::Patches: | ||
| 1005 | case Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 1006 | case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 1007 | return num_vertices / 3; | ||
| 1008 | case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 1009 | case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 1010 | case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 1011 | return num_vertices - 2; | ||
| 1012 | case Maxwell3D::Regs::PrimitiveTopology::Quads: | ||
| 1013 | return num_vertices / 4; | ||
| 1014 | case Maxwell3D::Regs::PrimitiveTopology::Polygon: | ||
| 1015 | return 1U; | ||
| 1016 | default: | ||
| 1017 | return num_vertices; | ||
| 1018 | } | ||
| 1019 | }(); | ||
| 973 | } | 1020 | } |
| 974 | } | 1021 | } |
| 975 | 1022 | ||
| 976 | private: | 1023 | private: |
| 977 | QueryCacheRuntime& runtime; | 1024 | QueryCacheRuntime& runtime; |
| 978 | TFBCounterStreamer<QueryCacheParams>& tfb_streamer; | 1025 | TFBCounterStreamer& tfb_streamer; |
| 979 | Core::Memory::Memory& cpu_memory; | 1026 | Core::Memory::Memory& cpu_memory; |
| 980 | 1027 | ||
| 981 | // syncing queue | 1028 | // syncing queue |
| @@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl { | |||
| 1005 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | 1052 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, |
| 1006 | scheduler, memory_allocator, staging_pool), | 1053 | scheduler, memory_allocator, staging_pool), |
| 1007 | primitives_succeeded_streamer( | 1054 | primitives_succeeded_streamer( |
| 1008 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), | 1055 | static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, |
| 1056 | cpu_memory_), | ||
| 1057 | primitives_needed_minus_suceeded_streamer( | ||
| 1058 | static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), | ||
| 1009 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { | 1059 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { |
| 1010 | 1060 | ||
| 1011 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; | 1061 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; |
| @@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl { | |||
| 1040 | 1090 | ||
| 1041 | // Streamers | 1091 | // Streamers |
| 1042 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; | 1092 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; |
| 1043 | SamplesStreamer<QueryCacheParams> sample_streamer; | 1093 | SamplesStreamer sample_streamer; |
| 1044 | TFBCounterStreamer<QueryCacheParams> tfb_streamer; | 1094 | TFBCounterStreamer tfb_streamer; |
| 1045 | PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer; | 1095 | PrimitivesSucceededStreamer primitives_succeeded_streamer; |
| 1096 | VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer; | ||
| 1046 | 1097 | ||
| 1047 | std::vector<std::pair<VAddr, VAddr>> little_cache; | 1098 | std::vector<std::pair<VAddr, VAddr>> little_cache; |
| 1048 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | 1099 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| @@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl { | |||
| 1059 | bool is_hcr_running; | 1110 | bool is_hcr_running; |
| 1060 | 1111 | ||
| 1061 | // maxwell3d | 1112 | // maxwell3d |
| 1062 | Tegra::Engines::Maxwell3D* maxwell3d; | 1113 | Maxwell3D* maxwell3d; |
| 1063 | }; | 1114 | }; |
| 1064 | 1115 | ||
| 1065 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | 1116 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| @@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | |||
| 1074 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | 1125 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); |
| 1075 | } | 1126 | } |
| 1076 | 1127 | ||
| 1077 | void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { | 1128 | void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) { |
| 1078 | impl->maxwell3d = maxwell3d; | 1129 | impl->maxwell3d = maxwell3d; |
| 1079 | } | 1130 | } |
| 1080 | 1131 | ||
| 1081 | template <typename Func> | 1132 | template <typename Func> |
| 1082 | void QueryCacheRuntime::View3DRegs(Func&& func) { | 1133 | void QueryCacheRuntime::View3DRegs(Func&& func) { |
| 1083 | func(impl->maxwell3d->regs); | 1134 | func(*impl->maxwell3d); |
| 1084 | } | 1135 | } |
| 1085 | 1136 | ||
| 1086 | void QueryCacheRuntime::EndHostConditionalRendering() { | 1137 | void QueryCacheRuntime::EndHostConditionalRendering() { |
| @@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp | |||
| 1240 | return &impl->sample_streamer; | 1291 | return &impl->sample_streamer; |
| 1241 | case QueryType::StreamingByteCount: | 1292 | case QueryType::StreamingByteCount: |
| 1242 | return &impl->tfb_streamer; | 1293 | return &impl->tfb_streamer; |
| 1294 | case QueryType::StreamingPrimitivesNeeded: | ||
| 1295 | case QueryType::VtgPrimitivesOut: | ||
| 1243 | case QueryType::StreamingPrimitivesSucceeded: | 1296 | case QueryType::StreamingPrimitivesSucceeded: |
| 1244 | return &impl->primitives_succeeded_streamer; | 1297 | return &impl->primitives_succeeded_streamer; |
| 1298 | case QueryType::StreamingPrimitivesNeededMinusSucceeded: | ||
| 1299 | return &impl->primitives_needed_minus_suceeded_streamer; | ||
| 1245 | default: | 1300 | default: |
| 1246 | return nullptr; | 1301 | return nullptr; |
| 1247 | } | 1302 | } |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 9ad2929d7..e9a1ea169 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -49,7 +49,8 @@ public: | |||
| 49 | bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); | 49 | bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); |
| 50 | 50 | ||
| 51 | bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, | 51 | bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
| 52 | VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); | 52 | VideoCommon::LookupData object_2, bool qc_dirty, |
| 53 | bool equal_check); | ||
| 53 | 54 | ||
| 54 | VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); | 55 | VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); |
| 55 | 56 | ||
| @@ -66,7 +67,7 @@ private: | |||
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | struct QueryCacheParams { | 69 | struct QueryCacheParams { |
| 69 | using RuntimeType = Vulkan::QueryCacheRuntime; | 70 | using RuntimeType = typename Vulkan::QueryCacheRuntime; |
| 70 | }; | 71 | }; |
| 71 | 72 | ||
| 72 | using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; | 73 | using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e8862ba04..c7ce7c312 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 194 | 194 | ||
| 195 | query_cache.NotifySegment(true); | 195 | query_cache.NotifySegment(true); |
| 196 | 196 | ||
| 197 | #if ANDROID | ||
| 198 | if (Settings::IsGPULevelHigh()) { | ||
| 199 | // This is problematic on Android, disable on GPU Normal. | ||
| 200 | // query_cache.UpdateCounters(); | ||
| 201 | } | ||
| 202 | #else | ||
| 203 | // query_cache.UpdateCounters(); | ||
| 204 | #endif | ||
| 205 | |||
| 206 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | 197 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |
| 207 | if (!pipeline) { | 198 | if (!pipeline) { |
| 208 | return; | 199 | return; |
| @@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() { | |||
| 294 | 285 | ||
| 295 | query_cache.NotifySegment(true); | 286 | query_cache.NotifySegment(true); |
| 296 | 287 | ||
| 297 | #if ANDROID | ||
| 298 | if (Settings::IsGPULevelHigh()) { | ||
| 299 | // This is problematic on Android, disable on GPU Normal. | ||
| 300 | // query_cache.UpdateCounters(); | ||
| 301 | } | ||
| 302 | #else | ||
| 303 | // query_cache.UpdateCounters(); | ||
| 304 | #endif | ||
| 305 | |||
| 306 | texture_cache.SynchronizeGraphicsDescriptors(); | 288 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 307 | texture_cache.UpdateRenderTargets(false); | 289 | texture_cache.UpdateRenderTargets(false); |
| 308 | 290 | ||
| @@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 332 | FlushWork(); | 314 | FlushWork(); |
| 333 | gpu_memory->FlushCaching(); | 315 | gpu_memory->FlushCaching(); |
| 334 | 316 | ||
| 335 | #if ANDROID | ||
| 336 | if (Settings::IsGPULevelHigh()) { | ||
| 337 | // This is problematic on Android, disable on GPU Normal. | ||
| 338 | // query_cache.UpdateCounters(); | ||
| 339 | } | ||
| 340 | #else | ||
| 341 | // query_cache.UpdateCounters(); | ||
| 342 | #endif | ||
| 343 | |||
| 344 | query_cache.NotifySegment(true); | 317 | query_cache.NotifySegment(true); |
| 345 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | 318 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, |
| 346 | maxwell3d->regs.zpass_pixel_count_enable); | 319 | maxwell3d->regs.zpass_pixel_count_enable); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ffd44c68d..ad069556c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -85,7 +85,8 @@ public: | |||
| 85 | void Clear(u32 layer_count) override; | 85 | void Clear(u32 layer_count) override; |
| 86 | void DispatchCompute() override; | 86 | void DispatchCompute() override; |
| 87 | void ResetCounter(VideoCommon::QueryType type) override; | 87 | void ResetCounter(VideoCommon::QueryType type) override; |
| 88 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | 88 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 89 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||
| 89 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 90 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 90 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 91 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 91 | void FlushAll() override; | 92 | void FlushAll() override; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c87e5fb07..da03803aa 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -15,9 +15,13 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/polyfill_thread.h" | 16 | #include "common/polyfill_thread.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 18 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 20 | 19 | ||
| 20 | namespace VideoCommon { | ||
| 21 | template <typename Trait> | ||
| 22 | class QueryCacheBase; | ||
| 23 | } | ||
| 24 | |||
| 21 | namespace Vulkan { | 25 | namespace Vulkan { |
| 22 | 26 | ||
| 23 | class CommandPool; | 27 | class CommandPool; |
| @@ -26,6 +30,8 @@ class Framebuffer; | |||
| 26 | class GraphicsPipeline; | 30 | class GraphicsPipeline; |
| 27 | class StateTracker; | 31 | class StateTracker; |
| 28 | 32 | ||
| 33 | struct QueryCacheParams; | ||
| 34 | |||
| 29 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 35 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 30 | /// OpenGL-like operations on Vulkan command buffers. | 36 | /// OpenGL-like operations on Vulkan command buffers. |
| 31 | class Scheduler { | 37 | class Scheduler { |
| @@ -63,7 +69,7 @@ public: | |||
| 63 | void InvalidateState(); | 69 | void InvalidateState(); |
| 64 | 70 | ||
| 65 | /// Assigns the query cache. | 71 | /// Assigns the query cache. |
| 66 | void SetQueryCache(QueryCache& query_cache_) { | 72 | void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) { |
| 67 | query_cache = &query_cache_; | 73 | query_cache = &query_cache_; |
| 68 | } | 74 | } |
| 69 | 75 | ||
| @@ -219,7 +225,7 @@ private: | |||
| 219 | std::unique_ptr<MasterSemaphore> master_semaphore; | 225 | std::unique_ptr<MasterSemaphore> master_semaphore; |
| 220 | std::unique_ptr<CommandPool> command_pool; | 226 | std::unique_ptr<CommandPool> command_pool; |
| 221 | 227 | ||
| 222 | QueryCache* query_cache = nullptr; | 228 | VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; |
| 223 | 229 | ||
| 224 | vk::CommandBuffer current_cmdbuf; | 230 | vk::CommandBuffer current_cmdbuf; |
| 225 | 231 | ||