diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/fence_manager.h | 143 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 4 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 137 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_fence_manager.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 |
14 files changed, 286 insertions, 80 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 432310632..a9667463f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -462,7 +462,7 @@ struct Memory::Impl { | |||
| 462 | } | 462 | } |
| 463 | 463 | ||
| 464 | if (Settings::IsFastmemEnabled()) { | 464 | if (Settings::IsFastmemEnabled()) { |
| 465 | const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; | 465 | const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; |
| 466 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | 466 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); |
| 467 | } | 467 | } |
| 468 | 468 | ||
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c390ac91b..3b2f6aab6 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -4,13 +4,20 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <condition_variable> | ||
| 7 | #include <cstring> | 8 | #include <cstring> |
| 8 | #include <deque> | 9 | #include <deque> |
| 9 | #include <functional> | 10 | #include <functional> |
| 10 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 13 | #include <thread> | ||
| 11 | #include <queue> | 14 | #include <queue> |
| 12 | 15 | ||
| 13 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "common/scope_exit.h" | ||
| 19 | #include "common/settings.h" | ||
| 20 | #include "common/thread.h" | ||
| 14 | #include "video_core/delayed_destruction_ring.h" | 21 | #include "video_core/delayed_destruction_ring.h" |
| 15 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 16 | #include "video_core/host1x/host1x.h" | 23 | #include "video_core/host1x/host1x.h" |
| @@ -23,15 +30,26 @@ class FenceBase { | |||
| 23 | public: | 30 | public: |
| 24 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} | 31 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} |
| 25 | 32 | ||
| 33 | bool IsStubbed() const { | ||
| 34 | return is_stubbed; | ||
| 35 | } | ||
| 36 | |||
| 26 | protected: | 37 | protected: |
| 27 | bool is_stubbed; | 38 | bool is_stubbed; |
| 28 | }; | 39 | }; |
| 29 | 40 | ||
| 30 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | 41 | template <typename Traits> |
| 31 | class FenceManager { | 42 | class FenceManager { |
| 43 | using TFence = typename Traits::FenceType; | ||
| 44 | using TTextureCache = typename Traits::TextureCacheType; | ||
| 45 | using TBufferCache = typename Traits::BufferCacheType; | ||
| 46 | using TQueryCache = typename Traits::QueryCacheType; | ||
| 47 | static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK; | ||
| 48 | |||
| 32 | public: | 49 | public: |
| 33 | /// Notify the fence manager about a new frame | 50 | /// Notify the fence manager about a new frame |
| 34 | void TickFrame() { | 51 | void TickFrame() { |
| 52 | std::unique_lock lock(ring_guard); | ||
| 35 | delayed_destruction_ring.Tick(); | 53 | delayed_destruction_ring.Tick(); |
| 36 | } | 54 | } |
| 37 | 55 | ||
| @@ -46,17 +64,33 @@ public: | |||
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | void SignalFence(std::function<void()>&& func) { | 66 | void SignalFence(std::function<void()>&& func) { |
| 49 | TryReleasePendingFences(); | 67 | rasterizer.InvalidateGPUCache(); |
| 68 | bool delay_fence = Settings::IsGPULevelHigh(); | ||
| 69 | if constexpr (!can_async_check) { | ||
| 70 | TryReleasePendingFences<false>(); | ||
| 71 | } | ||
| 50 | const bool should_flush = ShouldFlush(); | 72 | const bool should_flush = ShouldFlush(); |
| 51 | CommitAsyncFlushes(); | 73 | CommitAsyncFlushes(); |
| 52 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 53 | CommitOperations(); | ||
| 54 | TFence new_fence = CreateFence(!should_flush); | 74 | TFence new_fence = CreateFence(!should_flush); |
| 55 | fences.push(new_fence); | 75 | if constexpr (can_async_check) { |
| 76 | guard.lock(); | ||
| 77 | } | ||
| 78 | if (delay_fence) { | ||
| 79 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 80 | } | ||
| 81 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 56 | QueueFence(new_fence); | 82 | QueueFence(new_fence); |
| 83 | if (!delay_fence) { | ||
| 84 | func(); | ||
| 85 | } | ||
| 86 | fences.push(std::move(new_fence)); | ||
| 57 | if (should_flush) { | 87 | if (should_flush) { |
| 58 | rasterizer.FlushCommands(); | 88 | rasterizer.FlushCommands(); |
| 59 | } | 89 | } |
| 90 | if constexpr (can_async_check) { | ||
| 91 | guard.unlock(); | ||
| 92 | cv.notify_all(); | ||
| 93 | } | ||
| 60 | } | 94 | } |
| 61 | 95 | ||
| 62 | void SignalSyncPoint(u32 value) { | 96 | void SignalSyncPoint(u32 value) { |
| @@ -66,29 +100,30 @@ public: | |||
| 66 | } | 100 | } |
| 67 | 101 | ||
| 68 | void WaitPendingFences() { | 102 | void WaitPendingFences() { |
| 69 | while (!fences.empty()) { | 103 | if constexpr (!can_async_check) { |
| 70 | TFence& current_fence = fences.front(); | 104 | TryReleasePendingFences<true>(); |
| 71 | if (ShouldWait()) { | ||
| 72 | WaitFence(current_fence); | ||
| 73 | } | ||
| 74 | PopAsyncFlushes(); | ||
| 75 | auto operations = std::move(pending_operations.front()); | ||
| 76 | pending_operations.pop_front(); | ||
| 77 | for (auto& operation : operations) { | ||
| 78 | operation(); | ||
| 79 | } | ||
| 80 | PopFence(); | ||
| 81 | } | 105 | } |
| 82 | } | 106 | } |
| 83 | 107 | ||
| 84 | protected: | 108 | protected: |
| 85 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 109 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 86 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 110 | TTextureCache& texture_cache_, TBufferCache& buffer_cache_, |
| 87 | TQueryCache& query_cache_) | 111 | TQueryCache& query_cache_) |
| 88 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, | 112 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, |
| 89 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} | 113 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} { |
| 114 | if constexpr (can_async_check) { | ||
| 115 | fence_thread = | ||
| 116 | std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); }); | ||
| 117 | } | ||
| 118 | } | ||
| 90 | 119 | ||
| 91 | virtual ~FenceManager() = default; | 120 | virtual ~FenceManager() { |
| 121 | if constexpr (can_async_check) { | ||
| 122 | fence_thread.request_stop(); | ||
| 123 | cv.notify_all(); | ||
| 124 | fence_thread.join(); | ||
| 125 | } | ||
| 126 | } | ||
| 92 | 127 | ||
| 93 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is | 128 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is |
| 94 | /// true | 129 | /// true |
| @@ -104,15 +139,20 @@ protected: | |||
| 104 | Tegra::GPU& gpu; | 139 | Tegra::GPU& gpu; |
| 105 | Tegra::Host1x::SyncpointManager& syncpoint_manager; | 140 | Tegra::Host1x::SyncpointManager& syncpoint_manager; |
| 106 | TTextureCache& texture_cache; | 141 | TTextureCache& texture_cache; |
| 107 | TTBufferCache& buffer_cache; | 142 | TBufferCache& buffer_cache; |
| 108 | TQueryCache& query_cache; | 143 | TQueryCache& query_cache; |
| 109 | 144 | ||
| 110 | private: | 145 | private: |
| 146 | template <bool force_wait> | ||
| 111 | void TryReleasePendingFences() { | 147 | void TryReleasePendingFences() { |
| 112 | while (!fences.empty()) { | 148 | while (!fences.empty()) { |
| 113 | TFence& current_fence = fences.front(); | 149 | TFence& current_fence = fences.front(); |
| 114 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | 150 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { |
| 115 | return; | 151 | if constexpr (force_wait) { |
| 152 | WaitFence(current_fence); | ||
| 153 | } else { | ||
| 154 | return; | ||
| 155 | } | ||
| 116 | } | 156 | } |
| 117 | PopAsyncFlushes(); | 157 | PopAsyncFlushes(); |
| 118 | auto operations = std::move(pending_operations.front()); | 158 | auto operations = std::move(pending_operations.front()); |
| @@ -120,7 +160,49 @@ private: | |||
| 120 | for (auto& operation : operations) { | 160 | for (auto& operation : operations) { |
| 121 | operation(); | 161 | operation(); |
| 122 | } | 162 | } |
| 123 | PopFence(); | 163 | { |
| 164 | std::unique_lock lock(ring_guard); | ||
| 165 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 166 | } | ||
| 167 | fences.pop(); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | void ReleaseThreadFunc(std::stop_token stop_token) { | ||
| 172 | std::string name = "GPUFencingThread"; | ||
| 173 | MicroProfileOnThreadCreate(name.c_str()); | ||
| 174 | |||
| 175 | // Cleanup | ||
| 176 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||
| 177 | |||
| 178 | Common::SetCurrentThreadName(name.c_str()); | ||
| 179 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||
| 180 | |||
| 181 | TFence current_fence; | ||
| 182 | std::deque<std::function<void()>> current_operations; | ||
| 183 | while (!stop_token.stop_requested()) { | ||
| 184 | { | ||
| 185 | std::unique_lock lock(guard); | ||
| 186 | cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); }); | ||
| 187 | if (stop_token.stop_requested()) [[unlikely]] { | ||
| 188 | return; | ||
| 189 | } | ||
| 190 | current_fence = std::move(fences.front()); | ||
| 191 | current_operations = std::move(pending_operations.front()); | ||
| 192 | fences.pop(); | ||
| 193 | pending_operations.pop_front(); | ||
| 194 | } | ||
| 195 | if (!current_fence->IsStubbed()) { | ||
| 196 | WaitFence(current_fence); | ||
| 197 | } | ||
| 198 | PopAsyncFlushes(); | ||
| 199 | for (auto& operation : current_operations) { | ||
| 200 | operation(); | ||
| 201 | } | ||
| 202 | { | ||
| 203 | std::unique_lock lock(ring_guard); | ||
| 204 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 205 | } | ||
| 124 | } | 206 | } |
| 125 | } | 207 | } |
| 126 | 208 | ||
| @@ -154,19 +236,16 @@ private: | |||
| 154 | query_cache.CommitAsyncFlushes(); | 236 | query_cache.CommitAsyncFlushes(); |
| 155 | } | 237 | } |
| 156 | 238 | ||
| 157 | void PopFence() { | ||
| 158 | delayed_destruction_ring.Push(std::move(fences.front())); | ||
| 159 | fences.pop(); | ||
| 160 | } | ||
| 161 | |||
| 162 | void CommitOperations() { | ||
| 163 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 164 | } | ||
| 165 | |||
| 166 | std::queue<TFence> fences; | 239 | std::queue<TFence> fences; |
| 167 | std::deque<std::function<void()>> uncommitted_operations; | 240 | std::deque<std::function<void()>> uncommitted_operations; |
| 168 | std::deque<std::deque<std::function<void()>>> pending_operations; | 241 | std::deque<std::deque<std::function<void()>>> pending_operations; |
| 169 | 242 | ||
| 243 | std::mutex guard; | ||
| 244 | std::mutex ring_guard; | ||
| 245 | std::condition_variable cv; | ||
| 246 | |||
| 247 | std::jthread fence_thread; | ||
| 248 | |||
| 170 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | 249 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; |
| 171 | }; | 250 | }; |
| 172 | 251 | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 01fb5b546..e06ce5d14 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -170,6 +170,7 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
| 170 | 170 | ||
| 171 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, | 171 | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, |
| 172 | bool is_big_pages) { | 172 | bool is_big_pages) { |
| 173 | std::unique_lock<std::mutex> lock(guard); | ||
| 173 | if (is_big_pages) [[likely]] { | 174 | if (is_big_pages) [[likely]] { |
| 174 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | 175 | return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); |
| 175 | } | 176 | } |
| @@ -177,6 +178,7 @@ GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | |||
| 177 | } | 178 | } |
| 178 | 179 | ||
| 179 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | 180 | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { |
| 181 | std::unique_lock<std::mutex> lock(guard); | ||
| 180 | if (is_big_pages) [[likely]] { | 182 | if (is_big_pages) [[likely]] { |
| 181 | return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID); | 183 | return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID); |
| 182 | } | 184 | } |
| @@ -187,6 +189,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 187 | if (size == 0) { | 189 | if (size == 0) { |
| 188 | return; | 190 | return; |
| 189 | } | 191 | } |
| 192 | std::unique_lock<std::mutex> lock(guard); | ||
| 190 | GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); | 193 | GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); |
| 191 | 194 | ||
| 192 | for (const auto& [map_addr, map_size] : page_stash) { | 195 | for (const auto& [map_addr, map_size] : page_stash) { |
| @@ -553,6 +556,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 553 | } | 556 | } |
| 554 | 557 | ||
| 555 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { | 558 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { |
| 559 | std::unique_lock<std::mutex> lock(guard); | ||
| 556 | return kind_map.GetContinuousSizeFrom(gpu_addr); | 560 | return kind_map.GetContinuousSizeFrom(gpu_addr); |
| 557 | } | 561 | } |
| 558 | 562 | ||
| @@ -745,10 +749,10 @@ void MemoryManager::FlushCaching() { | |||
| 745 | return; | 749 | return; |
| 746 | } | 750 | } |
| 747 | accumulator->Callback([this](GPUVAddr addr, size_t size) { | 751 | accumulator->Callback([this](GPUVAddr addr, size_t size) { |
| 748 | GetSubmappedRangeImpl<false>(addr, size, page_stash); | 752 | GetSubmappedRangeImpl<false>(addr, size, page_stash2); |
| 749 | }); | 753 | }); |
| 750 | rasterizer->InnerInvalidation(page_stash); | 754 | rasterizer->InnerInvalidation(page_stash2); |
| 751 | page_stash.clear(); | 755 | page_stash2.clear(); |
| 752 | accumulator->Clear(); | 756 | accumulator->Clear(); |
| 753 | } | 757 | } |
| 754 | 758 | ||
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index fbbe856c4..794535122 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <atomic> | 6 | #include <atomic> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <mutex> | ||
| 8 | #include <optional> | 9 | #include <optional> |
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | 11 | ||
| @@ -215,6 +216,9 @@ private: | |||
| 215 | 216 | ||
| 216 | std::vector<u64> big_page_continuous; | 217 | std::vector<u64> big_page_continuous; |
| 217 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | 218 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; |
| 219 | std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; | ||
| 220 | |||
| 221 | mutable std::mutex guard; | ||
| 218 | 222 | ||
| 219 | static constexpr size_t continuous_bits = 64; | 223 | static constexpr size_t continuous_bits = 64; |
| 220 | 224 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 8906ba6d8..941de95c1 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <functional> | ||
| 9 | #include <iterator> | 10 | #include <iterator> |
| 10 | #include <list> | 11 | #include <list> |
| 11 | #include <memory> | 12 | #include <memory> |
| @@ -17,13 +18,19 @@ | |||
| 17 | 18 | ||
| 18 | #include "common/assert.h" | 19 | #include "common/assert.h" |
| 19 | #include "common/settings.h" | 20 | #include "common/settings.h" |
| 21 | #include "core/memory.h" | ||
| 20 | #include "video_core/control/channel_state_cache.h" | 22 | #include "video_core/control/channel_state_cache.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 23 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/memory_manager.h" | 24 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/rasterizer_interface.h" | 25 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | ||
| 24 | 27 | ||
| 25 | namespace VideoCommon { | 28 | namespace VideoCommon { |
| 26 | 29 | ||
| 30 | using AsyncJobId = SlotId; | ||
| 31 | |||
| 32 | static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; | ||
| 33 | |||
| 27 | template <class QueryCache, class HostCounter> | 34 | template <class QueryCache, class HostCounter> |
| 28 | class CounterStreamBase { | 35 | class CounterStreamBase { |
| 29 | public: | 36 | public: |
| @@ -93,9 +100,13 @@ private: | |||
| 93 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 100 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 94 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 101 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 95 | public: | 102 | public: |
| 96 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) | 103 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | 104 | Core::Memory::Memory& cpu_memory_) |
| 98 | VideoCore::QueryType::SamplesPassed}}} {} | 105 | : rasterizer{rasterizer_}, |
| 106 | cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | ||
| 107 | VideoCore::QueryType::SamplesPassed}}} { | ||
| 108 | (void)slot_async_jobs.insert(); // Null value | ||
| 109 | } | ||
| 99 | 110 | ||
| 100 | void InvalidateRegion(VAddr addr, std::size_t size) { | 111 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 101 | std::unique_lock lock{mutex}; | 112 | std::unique_lock lock{mutex}; |
| @@ -126,10 +137,15 @@ public: | |||
| 126 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 137 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); |
| 127 | } | 138 | } |
| 128 | 139 | ||
| 129 | query->BindCounter(Stream(type).Current(), timestamp); | 140 | auto result = query->BindCounter(Stream(type).Current(), timestamp); |
| 130 | if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 141 | if (result) { |
| 131 | AsyncFlushQuery(*cpu_addr); | 142 | auto async_job_id = query->GetAsyncJob(); |
| 143 | auto& async_job = slot_async_jobs[async_job_id]; | ||
| 144 | async_job.collected = true; | ||
| 145 | async_job.value = *result; | ||
| 146 | query->SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 132 | } | 147 | } |
| 148 | AsyncFlushQuery(query, timestamp, lock); | ||
| 133 | } | 149 | } |
| 134 | 150 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 151 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -173,15 +189,18 @@ public: | |||
| 173 | } | 189 | } |
| 174 | 190 | ||
| 175 | void CommitAsyncFlushes() { | 191 | void CommitAsyncFlushes() { |
| 192 | std::unique_lock lock{mutex}; | ||
| 176 | committed_flushes.push_back(uncommitted_flushes); | 193 | committed_flushes.push_back(uncommitted_flushes); |
| 177 | uncommitted_flushes.reset(); | 194 | uncommitted_flushes.reset(); |
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | bool HasUncommittedFlushes() const { | 197 | bool HasUncommittedFlushes() const { |
| 198 | std::unique_lock lock{mutex}; | ||
| 181 | return uncommitted_flushes != nullptr; | 199 | return uncommitted_flushes != nullptr; |
| 182 | } | 200 | } |
| 183 | 201 | ||
| 184 | bool ShouldWaitAsyncFlushes() const { | 202 | bool ShouldWaitAsyncFlushes() const { |
| 203 | std::unique_lock lock{mutex}; | ||
| 185 | if (committed_flushes.empty()) { | 204 | if (committed_flushes.empty()) { |
| 186 | return false; | 205 | return false; |
| 187 | } | 206 | } |
| @@ -189,6 +208,7 @@ public: | |||
| 189 | } | 208 | } |
| 190 | 209 | ||
| 191 | void PopAsyncFlushes() { | 210 | void PopAsyncFlushes() { |
| 211 | std::unique_lock lock{mutex}; | ||
| 192 | if (committed_flushes.empty()) { | 212 | if (committed_flushes.empty()) { |
| 193 | return; | 213 | return; |
| 194 | } | 214 | } |
| @@ -197,15 +217,25 @@ public: | |||
| 197 | committed_flushes.pop_front(); | 217 | committed_flushes.pop_front(); |
| 198 | return; | 218 | return; |
| 199 | } | 219 | } |
| 200 | for (VAddr query_address : *flush_list) { | 220 | for (AsyncJobId async_job_id : *flush_list) { |
| 201 | FlushAndRemoveRegion(query_address, 4); | 221 | AsyncJob& async_job = slot_async_jobs[async_job_id]; |
| 222 | if (!async_job.collected) { | ||
| 223 | FlushAndRemoveRegion(async_job.query_location, 2, true); | ||
| 224 | } | ||
| 202 | } | 225 | } |
| 203 | committed_flushes.pop_front(); | 226 | committed_flushes.pop_front(); |
| 204 | } | 227 | } |
| 205 | 228 | ||
| 206 | private: | 229 | private: |
| 230 | struct AsyncJob { | ||
| 231 | bool collected = false; | ||
| 232 | u64 value = 0; | ||
| 233 | VAddr query_location = 0; | ||
| 234 | std::optional<u64> timestamp{}; | ||
| 235 | }; | ||
| 236 | |||
| 207 | /// Flushes a memory range to guest memory and removes it from the cache. | 237 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 208 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { | 238 | void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) { |
| 209 | const u64 addr_begin = addr; | 239 | const u64 addr_begin = addr; |
| 210 | const u64 addr_end = addr_begin + size; | 240 | const u64 addr_end = addr_begin + size; |
| 211 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { | 241 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { |
| @@ -226,7 +256,16 @@ private: | |||
| 226 | continue; | 256 | continue; |
| 227 | } | 257 | } |
| 228 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); | 258 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 229 | query.Flush(); | 259 | AsyncJobId async_job_id = query.GetAsyncJob(); |
| 260 | auto flush_result = query.Flush(async); | ||
| 261 | if (async_job_id == NULL_ASYNC_JOB_ID) { | ||
| 262 | ASSERT_MSG(false, "This should not be reachable at all"); | ||
| 263 | continue; | ||
| 264 | } | ||
| 265 | AsyncJob& async_job = slot_async_jobs[async_job_id]; | ||
| 266 | async_job.collected = true; | ||
| 267 | async_job.value = flush_result; | ||
| 268 | query.SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 230 | } | 269 | } |
| 231 | std::erase_if(contents, in_range); | 270 | std::erase_if(contents, in_range); |
| 232 | } | 271 | } |
| @@ -253,26 +292,60 @@ private: | |||
| 253 | return found != std::end(contents) ? &*found : nullptr; | 292 | return found != std::end(contents) ? &*found : nullptr; |
| 254 | } | 293 | } |
| 255 | 294 | ||
| 256 | void AsyncFlushQuery(VAddr addr) { | 295 | void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp, |
| 257 | if (!uncommitted_flushes) { | 296 | std::unique_lock<std::recursive_mutex>& lock) { |
| 258 | uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); | 297 | const AsyncJobId new_async_job_id = slot_async_jobs.insert(); |
| 298 | { | ||
| 299 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 300 | query->SetAsyncJob(new_async_job_id); | ||
| 301 | async_job.query_location = query->GetCpuAddr(); | ||
| 302 | async_job.collected = false; | ||
| 303 | |||
| 304 | if (!uncommitted_flushes) { | ||
| 305 | uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>(); | ||
| 306 | } | ||
| 307 | uncommitted_flushes->push_back(new_async_job_id); | ||
| 259 | } | 308 | } |
| 260 | uncommitted_flushes->push_back(addr); | 309 | lock.unlock(); |
| 310 | std::function<void()> operation([this, new_async_job_id, timestamp] { | ||
| 311 | std::unique_lock local_lock{mutex}; | ||
| 312 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 313 | u64 value = async_job.value; | ||
| 314 | VAddr address = async_job.query_location; | ||
| 315 | slot_async_jobs.erase(new_async_job_id); | ||
| 316 | local_lock.unlock(); | ||
| 317 | if (timestamp) { | ||
| 318 | u64 timestamp_value = *timestamp; | ||
| 319 | cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | ||
| 320 | cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||
| 321 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | ||
| 322 | VideoCommon::CacheType::NoQueryCache); | ||
| 323 | } else { | ||
| 324 | u32 small_value = static_cast<u32>(value); | ||
| 325 | cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||
| 326 | rasterizer.InvalidateRegion(address, sizeof(u32), | ||
| 327 | VideoCommon::CacheType::NoQueryCache); | ||
| 328 | } | ||
| 329 | }); | ||
| 330 | rasterizer.SyncOperation(std::move(operation)); | ||
| 261 | } | 331 | } |
| 262 | 332 | ||
| 263 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; | 333 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; |
| 264 | static constexpr unsigned YUZU_PAGEBITS = 12; | 334 | static constexpr unsigned YUZU_PAGEBITS = 12; |
| 265 | 335 | ||
| 336 | SlotVector<AsyncJob> slot_async_jobs; | ||
| 337 | |||
| 266 | VideoCore::RasterizerInterface& rasterizer; | 338 | VideoCore::RasterizerInterface& rasterizer; |
| 339 | Core::Memory::Memory& cpu_memory; | ||
| 267 | 340 | ||
| 268 | std::recursive_mutex mutex; | 341 | mutable std::recursive_mutex mutex; |
| 269 | 342 | ||
| 270 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 343 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 271 | 344 | ||
| 272 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 345 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 273 | 346 | ||
| 274 | std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; | 347 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |
| 275 | std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; | 348 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |
| 276 | }; | 349 | }; |
| 277 | 350 | ||
| 278 | template <class QueryCache, class HostCounter> | 351 | template <class QueryCache, class HostCounter> |
| @@ -291,12 +364,12 @@ public: | |||
| 291 | virtual ~HostCounterBase() = default; | 364 | virtual ~HostCounterBase() = default; |
| 292 | 365 | ||
| 293 | /// Returns the current value of the query. | 366 | /// Returns the current value of the query. |
| 294 | u64 Query() { | 367 | u64 Query(bool async = false) { |
| 295 | if (result) { | 368 | if (result) { |
| 296 | return *result; | 369 | return *result; |
| 297 | } | 370 | } |
| 298 | 371 | ||
| 299 | u64 value = BlockingQuery() + base_result; | 372 | u64 value = BlockingQuery(async) + base_result; |
| 300 | if (dependency) { | 373 | if (dependency) { |
| 301 | value += dependency->Query(); | 374 | value += dependency->Query(); |
| 302 | dependency = nullptr; | 375 | dependency = nullptr; |
| @@ -317,7 +390,7 @@ public: | |||
| 317 | 390 | ||
| 318 | protected: | 391 | protected: |
| 319 | /// Returns the value of query from the backend API blocking as needed. | 392 | /// Returns the value of query from the backend API blocking as needed. |
| 320 | virtual u64 BlockingQuery() const = 0; | 393 | virtual u64 BlockingQuery(bool async = false) const = 0; |
| 321 | 394 | ||
| 322 | private: | 395 | private: |
| 323 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | 396 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. |
| @@ -340,26 +413,33 @@ public: | |||
| 340 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | 413 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; |
| 341 | 414 | ||
| 342 | /// Flushes the query to guest memory. | 415 | /// Flushes the query to guest memory. |
| 343 | virtual void Flush() { | 416 | virtual u64 Flush(bool async = false) { |
| 344 | // When counter is nullptr it means that it's just been reset. We are supposed to write a | 417 | // When counter is nullptr it means that it's just been reset. We are supposed to write a |
| 345 | // zero in these cases. | 418 | // zero in these cases. |
| 346 | const u64 value = counter ? counter->Query() : 0; | 419 | const u64 value = counter ? counter->Query(async) : 0; |
| 420 | if (async) { | ||
| 421 | return value; | ||
| 422 | } | ||
| 347 | std::memcpy(host_ptr, &value, sizeof(u64)); | 423 | std::memcpy(host_ptr, &value, sizeof(u64)); |
| 348 | 424 | ||
| 349 | if (timestamp) { | 425 | if (timestamp) { |
| 350 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | 426 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); |
| 351 | } | 427 | } |
| 428 | return value; | ||
| 352 | } | 429 | } |
| 353 | 430 | ||
| 354 | /// Binds a counter to this query. | 431 | /// Binds a counter to this query. |
| 355 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | 432 | std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_, |
| 433 | std::optional<u64> timestamp_) { | ||
| 434 | std::optional<u64> result{}; | ||
| 356 | if (counter) { | 435 | if (counter) { |
| 357 | // If there's an old counter set it means the query is being rewritten by the game. | 436 | // If there's an old counter set it means the query is being rewritten by the game. |
| 358 | // To avoid losing the data forever, flush here. | 437 | // To avoid losing the data forever, flush here. |
| 359 | Flush(); | 438 | result = std::make_optional(Flush()); |
| 360 | } | 439 | } |
| 361 | counter = std::move(counter_); | 440 | counter = std::move(counter_); |
| 362 | timestamp = timestamp_; | 441 | timestamp = timestamp_; |
| 442 | return result; | ||
| 363 | } | 443 | } |
| 364 | 444 | ||
| 365 | VAddr GetCpuAddr() const noexcept { | 445 | VAddr GetCpuAddr() const noexcept { |
| @@ -374,6 +454,14 @@ public: | |||
| 374 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | 454 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; |
| 375 | } | 455 | } |
| 376 | 456 | ||
| 457 | void SetAsyncJob(AsyncJobId assigned_async_job_) { | ||
| 458 | assigned_async_job = assigned_async_job_; | ||
| 459 | } | ||
| 460 | |||
| 461 | AsyncJobId GetAsyncJob() const { | ||
| 462 | return assigned_async_job; | ||
| 463 | } | ||
| 464 | |||
| 377 | protected: | 465 | protected: |
| 378 | /// Returns true when querying the counter may potentially block. | 466 | /// Returns true when querying the counter may potentially block. |
| 379 | bool WaitPending() const noexcept { | 467 | bool WaitPending() const noexcept { |
| @@ -389,6 +477,7 @@ private: | |||
| 389 | u8* host_ptr; ///< Writable host pointer. | 477 | u8* host_ptr; ///< Writable host pointer. |
| 390 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | 478 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. |
| 391 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | 479 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. |
| 480 | AsyncJobId assigned_async_job; | ||
| 392 | }; | 481 | }; |
| 393 | 482 | ||
| 394 | } // namespace VideoCommon | 483 | } // namespace VideoCommon |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index f1446e732..e21b19dcc 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -30,7 +30,17 @@ private: | |||
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | using Fence = std::shared_ptr<GLInnerFence>; | 32 | using Fence = std::shared_ptr<GLInnerFence>; |
| 33 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 33 | |
| 34 | struct FenceManagerParams { | ||
| 35 | using FenceType = Fence; | ||
| 36 | using BufferCacheType = BufferCache; | ||
| 37 | using TextureCacheType = TextureCache; | ||
| 38 | using QueryCacheType = QueryCache; | ||
| 39 | |||
| 40 | static constexpr bool HAS_ASYNC_CHECK = false; | ||
| 41 | }; | ||
| 42 | |||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 34 | 44 | ||
| 35 | class FenceManagerOpenGL final : public GenericFenceManager { | 45 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 36 | public: | 46 | public: |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 5070db441..99d7347f5 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 26 | 26 | ||
| 27 | } // Anonymous namespace | 27 | } // Anonymous namespace |
| 28 | 28 | ||
| 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_) | 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) |
| 30 | : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {} | 30 | : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} |
| 31 | 31 | ||
| 32 | QueryCache::~QueryCache() = default; | 32 | QueryCache::~QueryCache() = default; |
| 33 | 33 | ||
| @@ -74,7 +74,7 @@ void HostCounter::EndQuery() { | |||
| 74 | glEndQuery(GetTarget(type)); | 74 | glEndQuery(GetTarget(type)); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | u64 HostCounter::BlockingQuery() const { | 77 | u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const { |
| 78 | GLint64 value; | 78 | GLint64 value; |
| 79 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); | 79 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); |
| 80 | return static_cast<u64>(value); | 80 | return static_cast<u64>(value); |
| @@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { | |||
| 96 | return *this; | 96 | return *this; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | void CachedQuery::Flush() { | 99 | u64 CachedQuery::Flush([[maybe_unused]] bool async) { |
| 100 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. | 100 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. |
| 101 | // To avoid this disable and re-enable keeping the dependency stream. | 101 | // To avoid this disable and re-enable keeping the dependency stream. |
| 102 | // But we only have to do this if we have pending waits to be done. | 102 | // But we only have to do this if we have pending waits to be done. |
| @@ -106,11 +106,13 @@ void CachedQuery::Flush() { | |||
| 106 | stream.Update(false); | 106 | stream.Update(false); |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | VideoCommon::CachedQueryBase<HostCounter>::Flush(); | 109 | auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); |
| 110 | 110 | ||
| 111 | if (slice_counter) { | 111 | if (slice_counter) { |
| 112 | stream.Update(true); | 112 | stream.Update(true); |
| 113 | } | 113 | } |
| 114 | |||
| 115 | return result; | ||
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | } // namespace OpenGL | 118 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 14ce59990..872513f22 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| 28 | class QueryCache final | 28 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 30 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_); | 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); |
| 32 | ~QueryCache(); | 32 | ~QueryCache(); |
| 33 | 33 | ||
| 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); | 34 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
| @@ -51,7 +51,7 @@ public: | |||
| 51 | void EndQuery(); | 51 | void EndQuery(); |
| 52 | 52 | ||
| 53 | private: | 53 | private: |
| 54 | u64 BlockingQuery() const override; | 54 | u64 BlockingQuery(bool async = false) const override; |
| 55 | 55 | ||
| 56 | QueryCache& cache; | 56 | QueryCache& cache; |
| 57 | const VideoCore::QueryType type; | 57 | const VideoCore::QueryType type; |
| @@ -70,7 +70,7 @@ public: | |||
| 70 | CachedQuery(const CachedQuery&) = delete; | 70 | CachedQuery(const CachedQuery&) = delete; |
| 71 | CachedQuery& operator=(const CachedQuery&) = delete; | 71 | CachedQuery& operator=(const CachedQuery&) = delete; |
| 72 | 72 | ||
| 73 | void Flush() override; | 73 | u64 Flush(bool async = false) override; |
| 74 | 74 | ||
| 75 | private: | 75 | private: |
| 76 | QueryCache* cache; | 76 | QueryCache* cache; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4993d4709..0089b4b27 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 63 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 63 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 64 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | 64 | shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, |
| 65 | state_tracker, gpu.ShaderNotify()), | 65 | state_tracker, gpu.ShaderNotify()), |
| 66 | query_cache(*this), accelerate_dma(buffer_cache, texture_cache), | 66 | query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), |
| 67 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 67 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 68 | blit_image(program_manager_) {} | 68 | blit_image(program_manager_) {} |
| 69 | 69 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0214b103a..fad9e3832 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 8 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 9 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 9 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | 11 | #include "video_core/vulkan_common/vulkan_device.h" |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7fe2afcd9..145359d4e 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -40,7 +40,16 @@ private: | |||
| 40 | }; | 40 | }; |
| 41 | using Fence = std::shared_ptr<InnerFence>; | 41 | using Fence = std::shared_ptr<InnerFence>; |
| 42 | 42 | ||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 43 | struct FenceManagerParams { |
| 44 | using FenceType = Fence; | ||
| 45 | using BufferCacheType = BufferCache; | ||
| 46 | using TextureCacheType = TextureCache; | ||
| 47 | using QueryCacheType = QueryCache; | ||
| 48 | |||
| 49 | static constexpr bool HAS_ASYNC_CHECK = true; | ||
| 50 | }; | ||
| 51 | |||
| 52 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 44 | 53 | ||
| 45 | class FenceManager final : public GenericFenceManager { | 54 | class FenceManager final : public GenericFenceManager { |
| 46 | public: | 55 | public: |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 929c8ece6..d67490449 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | |||
| 66 | } | 66 | } |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 70 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 70 | Scheduler& scheduler_) | 71 | Scheduler& scheduler_) |
| 71 | : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, | 72 | : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, |
| 72 | query_pools{ | 73 | query_pools{ |
| 73 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, |
| 74 | } {} | 75 | } {} |
| @@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend | |||
| 98 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { | 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { |
| 99 | const vk::Device* logical = &cache.GetDevice().GetLogical(); | 100 | const vk::Device* logical = &cache.GetDevice().GetLogical(); |
| 100 | cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { | 101 | cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { |
| 102 | const bool use_precise = Settings::IsGPULevelHigh(); | ||
| 101 | logical->ResetQueryPool(query.first, query.second, 1); | 103 | logical->ResetQueryPool(query.first, query.second, 1); |
| 102 | cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); | 104 | cmdbuf.BeginQuery(query.first, query.second, |
| 105 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | ||
| 103 | }); | 106 | }); |
| 104 | } | 107 | } |
| 105 | 108 | ||
| @@ -112,8 +115,10 @@ void HostCounter::EndQuery() { | |||
| 112 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); | 115 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); |
| 113 | } | 116 | } |
| 114 | 117 | ||
| 115 | u64 HostCounter::BlockingQuery() const { | 118 | u64 HostCounter::BlockingQuery(bool async) const { |
| 116 | cache.GetScheduler().Wait(tick); | 119 | if (!async) { |
| 120 | cache.GetScheduler().Wait(tick); | ||
| 121 | } | ||
| 117 | u64 data; | 122 | u64 data; |
| 118 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 123 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( |
| 119 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 124 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 26762ee09..c1b9552eb 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -52,7 +52,8 @@ private: | |||
| 52 | class QueryCache final | 52 | class QueryCache final |
| 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 54 | public: | 54 | public: |
| 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, |
| 56 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 56 | Scheduler& scheduler_); | 57 | Scheduler& scheduler_); |
| 57 | ~QueryCache(); | 58 | ~QueryCache(); |
| 58 | 59 | ||
| @@ -83,7 +84,7 @@ public: | |||
| 83 | void EndQuery(); | 84 | void EndQuery(); |
| 84 | 85 | ||
| 85 | private: | 86 | private: |
| 86 | u64 BlockingQuery() const override; | 87 | u64 BlockingQuery(bool async = false) const override; |
| 87 | 88 | ||
| 88 | QueryCache& cache; | 89 | QueryCache& cache; |
| 89 | const VideoCore::QueryType type; | 90 | const VideoCore::QueryType type; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2559a3aa7..d1489fc95 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, | 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 175 | query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), | 175 | query_cache{*this, cpu_memory_, device, scheduler}, |
| 176 | accelerate_dma(buffer_cache, texture_cache, scheduler), | ||
| 176 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 177 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 177 | wfi_event(device.GetLogical().CreateEvent()) { | 178 | wfi_event(device.GetLogical().CreateEvent()) { |
| 178 | scheduler.SetQueryCache(query_cache); | 179 | scheduler.SetQueryCache(query_cache); |
| @@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() { | |||
| 675 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; | 676 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; |
| 676 | Maxwell::ReportSemaphore::Compare cmp; | 677 | Maxwell::ReportSemaphore::Compare cmp; |
| 677 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), | 678 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), |
| 678 | VideoCommon::CacheType::BufferCache)) { | 679 | VideoCommon::CacheType::BufferCache | |
| 680 | VideoCommon::CacheType::QueryCache)) { | ||
| 679 | return true; | 681 | return true; |
| 680 | } | 682 | } |
| 681 | return false; | 683 | return false; |