diff options
| author | 2023-08-04 03:32:30 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:29 +0200 | |
| commit | f1a2e367113518b277f34ffbb04499882c3b6051 (patch) | |
| tree | 0920a98bd359b9207130d01f6df4ae5135ec805c /src/video_core/renderer_vulkan | |
| parent | Query Cache: Setup Base rework (diff) | |
| download | yuzu-f1a2e367113518b277f34ffbb04499882c3b6051.tar.gz yuzu-f1a2e367113518b277f34ffbb04499882c3b6051.tar.xz yuzu-f1a2e367113518b277f34ffbb04499882c3b6051.zip | |
Query Cachge: Fully rework Vulkan's query cache
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 1264 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.h | 105 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 98 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_scheduler.h | 2 |
10 files changed, 1337 insertions, 219 deletions
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e15865d16..d8148e89a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 61 | if (device.IsExtTransformFeedbackSupported()) { | 61 | if (device.IsExtTransformFeedbackSupported()) { |
| 62 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 62 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 63 | } | 63 | } |
| 64 | if (device.IsExtConditionalRendering()) { | ||
| 65 | flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; | ||
| 66 | } | ||
| 64 | const VkBufferCreateInfo buffer_ci = { | 67 | const VkBufferCreateInfo buffer_ci = { |
| 65 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 68 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 66 | .pNext = nullptr, | 69 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 54ee030ce..97cd4521d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" |
| 15 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | ||
| 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 16 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 17 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 18 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -302,6 +303,52 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 302 | return {staging.buffer, staging.offset}; | 303 | return {staging.buffer, staging.offset}; |
| 303 | } | 304 | } |
| 304 | 305 | ||
| 306 | ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, | ||
| 307 | Scheduler& scheduler_, | ||
| 308 | DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||
| 309 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||
| 310 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, | ||
| 311 | RESOLVE_CONDITIONAL_RENDER_COMP_SPV), | ||
| 312 | scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||
| 313 | |||
| 314 | void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, | ||
| 315 | u32 src_offset, bool compare_to_zero) { | ||
| 316 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 317 | |||
| 318 | const size_t compare_size = compare_to_zero ? 8 : 24; | ||
| 319 | |||
| 320 | compute_pass_descriptor_queue.Acquire(); | ||
| 321 | compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); | ||
| 322 | compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); | ||
| 323 | const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||
| 324 | |||
| 325 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 326 | scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||
| 327 | static constexpr VkMemoryBarrier read_barrier{ | ||
| 328 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 329 | .pNext = nullptr, | ||
| 330 | .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 331 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 332 | }; | ||
| 333 | static constexpr VkMemoryBarrier write_barrier{ | ||
| 334 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 335 | .pNext = nullptr, | ||
| 336 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 337 | .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||
| 338 | }; | ||
| 339 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 340 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||
| 341 | |||
| 342 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 343 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||
| 344 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 345 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 346 | cmdbuf.Dispatch(1, 1, 1); | ||
| 347 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 348 | VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier); | ||
| 349 | }); | ||
| 350 | } | ||
| 351 | |||
| 305 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 352 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
| 306 | DescriptorPool& descriptor_pool_, | 353 | DescriptorPool& descriptor_pool_, |
| 307 | StagingBufferPool& staging_buffer_pool_, | 354 | StagingBufferPool& staging_buffer_pool_, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index dd3927376..c62f30d30 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -82,6 +82,19 @@ private: | |||
| 82 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | 82 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; |
| 83 | }; | 83 | }; |
| 84 | 84 | ||
| 85 | class ConditionalRenderingResolvePass final : public ComputePass { | ||
| 86 | public: | ||
| 87 | explicit ConditionalRenderingResolvePass( | ||
| 88 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||
| 89 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||
| 90 | |||
| 91 | void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero); | ||
| 92 | |||
| 93 | private: | ||
| 94 | Scheduler& scheduler; | ||
| 95 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||
| 96 | }; | ||
| 97 | |||
| 85 | class ASTCDecoderPass final : public ComputePass { | 98 | class ASTCDecoderPass final : public ComputePass { |
| 86 | public: | 99 | public: |
| 87 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 100 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 145359d4e..14fc5ad71 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "video_core/fence_manager.h" | 8 | #include "video_core/fence_manager.h" |
| 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
| 13 | class System; | 14 | class System; |
| @@ -20,7 +21,6 @@ class RasterizerInterface; | |||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| 21 | 22 | ||
| 22 | class Device; | 23 | class Device; |
| 23 | class QueryCache; | ||
| 24 | class Scheduler; | 24 | class Scheduler; |
| 25 | 25 | ||
| 26 | class InnerFence : public VideoCommon::FenceBase { | 26 | class InnerFence : public VideoCommon::FenceBase { |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 29e0b797b..42f571007 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -1,139 +1,1223 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | ||
| 5 | #include <cstddef> | 4 | #include <cstddef> |
| 5 | #include <limits> | ||
| 6 | #include <map> | ||
| 7 | #include <memory> | ||
| 8 | #include <span> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 6 | #include <utility> | 11 | #include <utility> |
| 7 | #include <vector> | 12 | #include <vector> |
| 8 | 13 | ||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "video_core/query_cache/query_cache.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 23 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_device.h" | 27 | #include "video_core/vulkan_common/vulkan_device.h" |
| 28 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 29 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 30 | ||
| 15 | namespace Vulkan { | 31 | namespace Vulkan { |
| 16 | 32 | ||
| 17 | using VideoCore::QueryType; | 33 | using VideoCommon::QueryType; |
| 18 | 34 | ||
| 19 | namespace { | 35 | namespace { |
| 36 | class SamplesQueryBank : public VideoCommon::BankBase { | ||
| 37 | public: | ||
| 38 | static constexpr size_t BANK_SIZE = 256; | ||
| 39 | static constexpr size_t QUERY_SIZE = 8; | ||
| 40 | SamplesQueryBank(const Device& device_, size_t index_) | ||
| 41 | : BankBase(BANK_SIZE), device{device_}, index{index_} { | ||
| 42 | const auto& dev = device.GetLogical(); | ||
| 43 | query_pool = dev.CreateQueryPool({ | ||
| 44 | .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, | ||
| 45 | .pNext = nullptr, | ||
| 46 | .flags = 0, | ||
| 47 | .queryType = VK_QUERY_TYPE_OCCLUSION, | ||
| 48 | .queryCount = BANK_SIZE, | ||
| 49 | .pipelineStatistics = 0, | ||
| 50 | }); | ||
| 51 | Reset(); | ||
| 52 | } | ||
| 20 | 53 | ||
| 21 | constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; | 54 | ~SamplesQueryBank() = default; |
| 22 | 55 | ||
| 23 | constexpr VkQueryType GetTarget(QueryType type) { | 56 | void Reset() override { |
| 24 | return QUERY_TARGETS[static_cast<std::size_t>(type)]; | 57 | ASSERT(references == 0); |
| 25 | } | 58 | VideoCommon::BankBase::Reset(); |
| 59 | const auto& dev = device.GetLogical(); | ||
| 60 | dev.ResetQueryPool(*query_pool, 0, BANK_SIZE); | ||
| 61 | host_results.fill(0ULL); | ||
| 62 | next_bank = 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | void Sync(size_t start, size_t size) { | ||
| 66 | const auto& dev = device.GetLogical(); | ||
| 67 | const VkResult query_result = dev.GetQueryResults( | ||
| 68 | *query_pool, static_cast<u32>(start), static_cast<u32>(size), sizeof(u64) * size, | ||
| 69 | &host_results[start], sizeof(u64), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | ||
| 70 | switch (query_result) { | ||
| 71 | case VK_SUCCESS: | ||
| 72 | return; | ||
| 73 | case VK_ERROR_DEVICE_LOST: | ||
| 74 | device.ReportLoss(); | ||
| 75 | [[fallthrough]]; | ||
| 76 | default: | ||
| 77 | throw vk::Exception(query_result); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | VkQueryPool GetInnerPool() { | ||
| 82 | return *query_pool; | ||
| 83 | } | ||
| 84 | |||
| 85 | size_t GetIndex() const { | ||
| 86 | return index; | ||
| 87 | } | ||
| 88 | |||
| 89 | const std::array<u64, BANK_SIZE>& GetResults() const { | ||
| 90 | return host_results; | ||
| 91 | } | ||
| 92 | |||
| 93 | size_t next_bank; | ||
| 94 | |||
| 95 | private: | ||
| 96 | const Device& device; | ||
| 97 | const size_t index; | ||
| 98 | vk::QueryPool query_pool; | ||
| 99 | std::array<u64, BANK_SIZE> host_results; | ||
| 100 | }; | ||
| 101 | |||
| 102 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | ||
| 103 | |||
| 104 | struct HostSyncValues { | ||
| 105 | VAddr address; | ||
| 106 | size_t size; | ||
| 107 | size_t offset; | ||
| 108 | |||
| 109 | static constexpr bool GeneratesBaseBuffer = false; | ||
| 110 | }; | ||
| 111 | |||
| 112 | template <typename Traits> | ||
| 113 | class SamplesStreamer : public BaseStreamer { | ||
| 114 | public: | ||
| 115 | SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | ||
| 116 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) | ||
| 117 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | ||
| 118 | memory_allocator{memory_allocator_} { | ||
| 119 | BuildResolveBuffer(); | ||
| 120 | current_bank = nullptr; | ||
| 121 | current_query = nullptr; | ||
| 122 | } | ||
| 123 | |||
| 124 | void StartCounter() override { | ||
| 125 | if (has_started) { | ||
| 126 | return; | ||
| 127 | } | ||
| 128 | ReserveHostQuery(); | ||
| 129 | scheduler.Record([query_pool = current_query_pool, | ||
| 130 | query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { | ||
| 131 | const bool use_precise = Settings::IsGPULevelHigh(); | ||
| 132 | cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index), | ||
| 133 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | ||
| 134 | }); | ||
| 135 | has_started = true; | ||
| 136 | } | ||
| 137 | |||
| 138 | void PauseCounter() override { | ||
| 139 | if (!has_started) { | ||
| 140 | return; | ||
| 141 | } | ||
| 142 | scheduler.Record([query_pool = current_query_pool, | ||
| 143 | query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { | ||
| 144 | cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index)); | ||
| 145 | }); | ||
| 146 | has_started = false; | ||
| 147 | } | ||
| 148 | |||
| 149 | void ResetCounter() override { | ||
| 150 | if (has_started) { | ||
| 151 | PauseCounter(); | ||
| 152 | } | ||
| 153 | AbandonCurrentQuery(); | ||
| 154 | } | ||
| 155 | |||
| 156 | void CloseCounter() override { | ||
| 157 | PauseCounter(); | ||
| 158 | } | ||
| 159 | |||
| 160 | bool HasPendingSync() override { | ||
| 161 | return !pending_sync.empty(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void SyncWrites() override { | ||
| 165 | if (sync_values_stash.empty()) { | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | |||
| 169 | for (size_t i = 0; i < sync_values_stash.size(); i++) { | ||
| 170 | runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], *resolve_buffers[i]); | ||
| 171 | } | ||
| 172 | |||
| 173 | sync_values_stash.clear(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void PresyncWrites() override { | ||
| 177 | if (pending_sync.empty()) { | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | PauseCounter(); | ||
| 181 | sync_values_stash.clear(); | ||
| 182 | sync_values_stash.emplace_back(); | ||
| 183 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); | ||
| 184 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | ||
| 185 | std::unordered_map<size_t, std::pair<size_t, size_t>> offsets; | ||
| 186 | size_t this_bank_slot = std::numeric_limits<size_t>::max(); | ||
| 187 | size_t resolve_slots_remaining = resolve_slots; | ||
| 188 | size_t resolve_buffer_index = 0; | ||
| 189 | ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start, | ||
| 190 | size_t amount) { | ||
| 191 | size_t bank_id = bank->GetIndex(); | ||
| 192 | if (this_bank_slot != bank_id) { | ||
| 193 | this_bank_slot = bank_id; | ||
| 194 | if (resolve_slots_remaining == 0) { | ||
| 195 | resolve_buffer_index++; | ||
| 196 | if (resolve_buffer_index >= resolve_buffers.size()) { | ||
| 197 | BuildResolveBuffer(); | ||
| 198 | } | ||
| 199 | resolve_slots_remaining = resolve_slots; | ||
| 200 | sync_values_stash.emplace_back(); | ||
| 201 | sync_values = sync_values = &sync_values_stash.back(); | ||
| 202 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | ||
| 203 | } | ||
| 204 | resolve_slots_remaining--; | ||
| 205 | } | ||
| 206 | auto& resolve_buffer = resolve_buffers[resolve_buffer_index]; | ||
| 207 | const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * | ||
| 208 | (resolve_slots - resolve_slots_remaining - 1); | ||
| 209 | VkQueryPool query_pool = bank->GetInnerPool(); | ||
| 210 | scheduler.Record([start, amount, base_offset, query_pool, | ||
| 211 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { | ||
| 212 | size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; | ||
| 213 | const VkBufferMemoryBarrier copy_query_pool_barrier{ | ||
| 214 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 215 | .pNext = nullptr, | ||
| 216 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 217 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 218 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 219 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 220 | .buffer = buffer, | ||
| 221 | .offset = final_offset, | ||
| 222 | .size = amount * SamplesQueryBank::QUERY_SIZE, | ||
| 223 | }; | ||
| 224 | |||
| 225 | cmdbuf.CopyQueryPoolResults( | ||
| 226 | query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer, | ||
| 227 | static_cast<u32>(final_offset), SamplesQueryBank::QUERY_SIZE, | ||
| 228 | VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); | ||
| 229 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 230 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier); | ||
| 231 | }); | ||
| 232 | offsets[bank_id] = {sync_values_stash.size() - 1, base_offset}; | ||
| 233 | }); | ||
| 234 | |||
| 235 | // Convert queries | ||
| 236 | for (auto q : pending_sync) { | ||
| 237 | auto* query = GetQuery(q); | ||
| 238 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { | ||
| 239 | continue; | ||
| 240 | } | ||
| 241 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { | ||
| 242 | continue; | ||
| 243 | } | ||
| 244 | if (query->size_slots > 1) { | ||
| 245 | // This is problematic. | ||
| 246 | UNIMPLEMENTED(); | ||
| 247 | } | ||
| 248 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | ||
| 249 | auto loc_data = offsets[query->start_bank_id]; | ||
| 250 | sync_values_stash[loc_data.first].emplace_back(HostSyncValues{ | ||
| 251 | .address = query->guest_address, | ||
| 252 | .size = SamplesQueryBank::QUERY_SIZE, | ||
| 253 | .offset = loc_data.second + query->start_slot * SamplesQueryBank::QUERY_SIZE, | ||
| 254 | }); | ||
| 255 | } | ||
| 256 | |||
| 257 | AbandonCurrentQuery(); | ||
| 258 | pending_sync.clear(); | ||
| 259 | } | ||
| 260 | |||
| 261 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 262 | [[maybe_unused]] std::optional<u32> subreport) override { | ||
| 263 | auto index = BuildQuery(); | ||
| 264 | auto* new_query = GetQuery(index); | ||
| 265 | new_query->guest_address = address; | ||
| 266 | new_query->value = 100; | ||
| 267 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | ||
| 268 | if (has_timestamp) { | ||
| 269 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 270 | } | ||
| 271 | if (!current_query) { | ||
| 272 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 273 | return index; | ||
| 274 | } | ||
| 275 | new_query->start_bank_id = current_query->start_bank_id; | ||
| 276 | new_query->size_banks = current_query->size_banks; | ||
| 277 | new_query->start_slot = current_query->start_slot; | ||
| 278 | new_query->size_slots = current_query->size_slots; | ||
| 279 | ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 280 | bank->AddReference(amount); | ||
| 281 | }); | ||
| 282 | pending_sync.push_back(index); | ||
| 283 | pending_flush_queries.push_back(index); | ||
| 284 | return index; | ||
| 285 | } | ||
| 286 | |||
| 287 | bool HasUnsyncedQueries() override { | ||
| 288 | return !pending_flush_queries.empty(); | ||
| 289 | } | ||
| 290 | |||
| 291 | void PushUnsyncedQueries() override { | ||
| 292 | PauseCounter(); | ||
| 293 | { | ||
| 294 | std::scoped_lock lk(flush_guard); | ||
| 295 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | void PopUnsyncedQueries() override { | ||
| 300 | std::vector<size_t> current_flush_queries; | ||
| 301 | { | ||
| 302 | std::scoped_lock lk(flush_guard); | ||
| 303 | current_flush_queries = std::move(pending_flush_sets.front()); | ||
| 304 | pending_flush_sets.pop_front(); | ||
| 305 | } | ||
| 306 | ApplyBanksWideOp<false>( | ||
| 307 | current_flush_queries, | ||
| 308 | [](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); }); | ||
| 309 | for (auto q : current_flush_queries) { | ||
| 310 | auto* query = GetQuery(q); | ||
| 311 | u64 total = 0; | ||
| 312 | ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 313 | const auto& results = bank->GetResults(); | ||
| 314 | for (size_t i = 0; i < amount; i++) { | ||
| 315 | total += results[start + i]; | ||
| 316 | } | ||
| 317 | }); | ||
| 318 | query->value = total; | ||
| 319 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | private: | ||
| 324 | template <typename Func> | ||
| 325 | void ApplyBankOp(VideoCommon::HostQueryBase* query, Func&& func) { | ||
| 326 | size_t size_slots = query->size_slots; | ||
| 327 | if (size_slots == 0) { | ||
| 328 | return; | ||
| 329 | } | ||
| 330 | size_t bank_id = query->start_bank_id; | ||
| 331 | size_t banks_set = query->size_banks; | ||
| 332 | size_t start_slot = query->start_slot; | ||
| 333 | for (size_t i = 0; i < banks_set; i++) { | ||
| 334 | auto& the_bank = bank_pool.GetBank(bank_id); | ||
| 335 | size_t amount = std::min(the_bank.Size() - start_slot, size_slots); | ||
| 336 | func(&the_bank, start_slot, amount); | ||
| 337 | bank_id = the_bank.next_bank - 1; | ||
| 338 | start_slot = 0; | ||
| 339 | size_slots -= amount; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | template <bool is_ordered, typename Func> | ||
| 344 | void ApplyBanksWideOp(std::vector<size_t>& queries, Func&& func) { | ||
| 345 | std::conditional_t<is_ordered, std::map<size_t, std::pair<size_t, size_t>>, | ||
| 346 | std::unordered_map<size_t, std::pair<size_t, size_t>>> | ||
| 347 | indexer; | ||
| 348 | for (auto q : queries) { | ||
| 349 | auto* query = GetQuery(q); | ||
| 350 | ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 351 | auto id = bank->GetIndex(); | ||
| 352 | auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), | ||
| 353 | std::numeric_limits<size_t>::min()); | ||
| 354 | auto& current_pair = pair.first->second; | ||
| 355 | current_pair.first = std::min(current_pair.first, start); | ||
| 356 | current_pair.second = std::max(current_pair.second, amount + start); | ||
| 357 | }); | ||
| 358 | } | ||
| 359 | for (auto& cont : indexer) { | ||
| 360 | func(&bank_pool.GetBank(cont.first), cont.second.first, | ||
| 361 | cont.second.second - cont.second.first); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | void ReserveBank() { | ||
| 366 | current_bank_id = | ||
| 367 | bank_pool.ReserveBank([this](std::deque<SamplesQueryBank>& queue, size_t index) { | ||
| 368 | queue.emplace_back(device, index); | ||
| 369 | }); | ||
| 370 | if (current_bank) { | ||
| 371 | current_bank->next_bank = current_bank_id + 1; | ||
| 372 | } | ||
| 373 | current_bank = &bank_pool.GetBank(current_bank_id); | ||
| 374 | current_query_pool = current_bank->GetInnerPool(); | ||
| 375 | } | ||
| 376 | |||
| 377 | size_t ReserveBankSlot() { | ||
| 378 | if (!current_bank || current_bank->IsClosed()) { | ||
| 379 | ReserveBank(); | ||
| 380 | } | ||
| 381 | auto [built, index] = current_bank->Reserve(); | ||
| 382 | current_bank_slot = index; | ||
| 383 | return index; | ||
| 384 | } | ||
| 385 | |||
| 386 | void ReserveHostQuery() { | ||
| 387 | size_t new_slot = ReserveBankSlot(); | ||
| 388 | current_bank->AddReference(1); | ||
| 389 | if (current_query) { | ||
| 390 | size_t bank_id = current_query->start_bank_id; | ||
| 391 | size_t banks_set = current_query->size_banks - 1; | ||
| 392 | bool found = bank_id == current_bank_id; | ||
| 393 | while (!found && banks_set > 0) { | ||
| 394 | SamplesQueryBank& some_bank = bank_pool.GetBank(bank_id); | ||
| 395 | bank_id = some_bank.next_bank - 1; | ||
| 396 | found = bank_id == current_bank_id; | ||
| 397 | banks_set--; | ||
| 398 | } | ||
| 399 | if (!found) { | ||
| 400 | current_query->size_banks++; | ||
| 401 | } | ||
| 402 | current_query->size_slots++; | ||
| 403 | } else { | ||
| 404 | current_query_id = BuildQuery(); | ||
| 405 | current_query = GetQuery(current_query_id); | ||
| 406 | current_query->start_bank_id = static_cast<u32>(current_bank_id); | ||
| 407 | current_query->size_banks = 1; | ||
| 408 | current_query->start_slot = new_slot; | ||
| 409 | current_query->size_slots = 1; | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | void Free(size_t query_id) override { | ||
| 414 | std::scoped_lock lk(guard); | ||
| 415 | auto* query = GetQuery(query_id); | ||
| 416 | ApplyBankOp(query, [](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 417 | bank->CloseReference(amount); | ||
| 418 | }); | ||
| 419 | ReleaseQuery(query_id); | ||
| 420 | } | ||
| 421 | |||
| 422 | void AbandonCurrentQuery() { | ||
| 423 | if (!current_query) { | ||
| 424 | return; | ||
| 425 | } | ||
| 426 | Free(current_query_id); | ||
| 427 | current_query = nullptr; | ||
| 428 | current_query_id = 0; | ||
| 429 | } | ||
| 430 | |||
| 431 | void BuildResolveBuffer() { | ||
| 432 | const VkBufferCreateInfo buffer_ci = { | ||
| 433 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 434 | .pNext = nullptr, | ||
| 435 | .flags = 0, | ||
| 436 | .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, | ||
| 437 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||
| 438 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 439 | .queueFamilyIndexCount = 0, | ||
| 440 | .pQueueFamilyIndices = nullptr, | ||
| 441 | }; | ||
| 442 | resolve_buffers.emplace_back( | ||
| 443 | std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); | ||
| 444 | } | ||
| 445 | |||
| 446 | static constexpr size_t resolve_slots = 8; | ||
| 447 | |||
| 448 | QueryCacheRuntime& runtime; | ||
| 449 | const Device& device; | ||
| 450 | Scheduler& scheduler; | ||
| 451 | const MemoryAllocator& memory_allocator; | ||
| 452 | VideoCommon::BankPool<SamplesQueryBank> bank_pool; | ||
| 453 | std::deque<vk::Buffer> resolve_buffers; | ||
| 454 | std::deque<std::vector<HostSyncValues>> sync_values_stash; | ||
| 455 | |||
| 456 | // syncing queue | ||
| 457 | std::vector<size_t> pending_sync; | ||
| 458 | |||
| 459 | // flush levels | ||
| 460 | std::vector<size_t> pending_flush_queries; | ||
| 461 | std::deque<std::vector<size_t>> pending_flush_sets; | ||
| 462 | |||
| 463 | // State Machine | ||
| 464 | size_t current_bank_slot; | ||
| 465 | size_t current_bank_id; | ||
| 466 | SamplesQueryBank* current_bank; | ||
| 467 | VkQueryPool current_query_pool; | ||
| 468 | size_t current_query_id; | ||
| 469 | VideoCommon::HostQueryBase* current_query; | ||
| 470 | bool has_started{}; | ||
| 471 | std::mutex flush_guard; | ||
| 472 | }; | ||
| 473 | |||
| 474 | // Transform feedback queries | ||
| 475 | class TFBQueryBank : public VideoCommon::BankBase { | ||
| 476 | public: | ||
| 477 | static constexpr size_t BANK_SIZE = 1024; | ||
| 478 | static constexpr size_t QUERY_SIZE = 4; | ||
| 479 | TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) | ||
| 480 | : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { | ||
| 481 | const VkBufferCreateInfo buffer_ci = { | ||
| 482 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 483 | .pNext = nullptr, | ||
| 484 | .flags = 0, | ||
| 485 | .size = QUERY_SIZE * BANK_SIZE, | ||
| 486 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 487 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 488 | .queueFamilyIndexCount = 0, | ||
| 489 | .pQueueFamilyIndices = nullptr, | ||
| 490 | }; | ||
| 491 | buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 492 | } | ||
| 493 | |||
| 494 | ~TFBQueryBank() = default; | ||
| 495 | |||
| 496 | void Reset() override { | ||
| 497 | ASSERT(references == 0); | ||
| 498 | VideoCommon::BankBase::Reset(); | ||
| 499 | } | ||
| 500 | |||
| 501 | void Sync(StagingBufferRef& stagging_buffer, size_t extra_offset, size_t start, size_t size) { | ||
| 502 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 503 | scheduler.Record([this, dst_buffer = stagging_buffer.buffer, extra_offset, start, | ||
| 504 | size](vk::CommandBuffer cmdbuf) { | ||
| 505 | std::array<VkBufferCopy, 1> copy{VkBufferCopy{ | ||
| 506 | .srcOffset = start * QUERY_SIZE, | ||
| 507 | .dstOffset = extra_offset, | ||
| 508 | .size = size * QUERY_SIZE, | ||
| 509 | }}; | ||
| 510 | cmdbuf.CopyBuffer(*buffer, dst_buffer, copy); | ||
| 511 | }); | ||
| 512 | } | ||
| 513 | |||
| 514 | size_t GetIndex() const { | ||
| 515 | return index; | ||
| 516 | } | ||
| 517 | |||
| 518 | VkBuffer GetBuffer() const { | ||
| 519 | return *buffer; | ||
| 520 | } | ||
| 521 | |||
| 522 | private: | ||
| 523 | Scheduler& scheduler; | ||
| 524 | const size_t index; | ||
| 525 | vk::Buffer buffer; | ||
| 526 | }; | ||
| 527 | |||
| 528 | template <typename Traits> | ||
| 529 | class TFBCounterStreamer : public BaseStreamer { | ||
| 530 | public: | ||
| 531 | TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | ||
| 532 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, | ||
| 533 | StagingBufferPool& staging_pool_) | ||
| 534 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | ||
| 535 | memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { | ||
| 536 | buffers_count = 0; | ||
| 537 | current_bank = nullptr; | ||
| 538 | counter_buffers.fill(VK_NULL_HANDLE); | ||
| 539 | offsets.fill(0); | ||
| 540 | const VkBufferCreateInfo buffer_ci = { | ||
| 541 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 542 | .pNext = nullptr, | ||
| 543 | .flags = 0, | ||
| 544 | .size = TFBQueryBank::QUERY_SIZE * NUM_STREAMS, | ||
| 545 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 546 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT, | ||
| 547 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 548 | .queueFamilyIndexCount = 0, | ||
| 549 | .pQueueFamilyIndices = nullptr, | ||
| 550 | }; | ||
| 551 | |||
| 552 | counters_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 553 | for (auto& c : counter_buffers) { | ||
| 554 | c = *counters_buffer; | ||
| 555 | } | ||
| 556 | size_t base_offset = 0; | ||
| 557 | for (auto& o : offsets) { | ||
| 558 | o = base_offset; | ||
| 559 | base_offset += TFBQueryBank::QUERY_SIZE; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | void StartCounter() override { | ||
| 564 | FlushBeginTFB(); | ||
| 565 | has_started = true; | ||
| 566 | } | ||
| 567 | |||
| 568 | void PauseCounter() override { | ||
| 569 | CloseCounter(); | ||
| 570 | } | ||
| 571 | |||
| 572 | void ResetCounter() override { | ||
| 573 | CloseCounter(); | ||
| 574 | } | ||
| 575 | |||
| 576 | void CloseCounter() override { | ||
| 577 | if (has_flushed_end_pending) { | ||
| 578 | FlushEndTFB(); | ||
| 579 | } | ||
| 580 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 581 | if (regs.transform_feedback_enabled == 0) { | ||
| 582 | streams_mask = 0; | ||
| 583 | has_started = false; | ||
| 584 | } | ||
| 585 | }); | ||
| 586 | } | ||
| 587 | |||
| 588 | bool HasPendingSync() override { | ||
| 589 | return !pending_sync.empty(); | ||
| 590 | } | ||
| 591 | |||
| 592 | void SyncWrites() override { | ||
| 593 | CloseCounter(); | ||
| 594 | std::unordered_map<size_t, std::vector<HostSyncValues>> sync_values_stash; | ||
| 595 | for (auto q : pending_sync) { | ||
| 596 | auto* query = GetQuery(q); | ||
| 597 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { | ||
| 598 | continue; | ||
| 599 | } | ||
| 600 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { | ||
| 601 | continue; | ||
| 602 | } | ||
| 603 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | ||
| 604 | sync_values_stash.try_emplace(query->start_bank_id); | ||
| 605 | sync_values_stash[query->start_bank_id].emplace_back(HostSyncValues{ | ||
| 606 | .address = query->guest_address, | ||
| 607 | .size = TFBQueryBank::QUERY_SIZE, | ||
| 608 | .offset = query->start_slot * TFBQueryBank::QUERY_SIZE, | ||
| 609 | }); | ||
| 610 | } | ||
| 611 | for (auto& p : sync_values_stash) { | ||
| 612 | auto& bank = bank_pool.GetBank(p.first); | ||
| 613 | runtime.template SyncValues<HostSyncValues>(p.second, bank.GetBuffer()); | ||
| 614 | } | ||
| 615 | pending_sync.clear(); | ||
| 616 | } | ||
| 617 | |||
| 618 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 619 | std::optional<u32> subreport_) override { | ||
| 620 | auto index = BuildQuery(); | ||
| 621 | auto* new_query = GetQuery(index); | ||
| 622 | new_query->guest_address = address; | ||
| 623 | new_query->value = 0; | ||
| 624 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | ||
| 625 | if (has_timestamp) { | ||
| 626 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 627 | } | ||
| 628 | if (!subreport_) { | ||
| 629 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 630 | return index; | ||
| 631 | } | ||
| 632 | const size_t subreport = static_cast<size_t>(*subreport_); | ||
| 633 | UpdateBuffers(); | ||
| 634 | if ((streams_mask & (1ULL << subreport)) == 0) { | ||
| 635 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 636 | return index; | ||
| 637 | } | ||
| 638 | CloseCounter(); | ||
| 639 | auto [bank_slot, data_slot] = ProduceCounterBuffer(subreport); | ||
| 640 | new_query->start_bank_id = static_cast<u32>(bank_slot); | ||
| 641 | new_query->size_banks = 1; | ||
| 642 | new_query->start_slot = static_cast<u32>(data_slot); | ||
| 643 | new_query->size_slots = 1; | ||
| 644 | pending_sync.push_back(index); | ||
| 645 | pending_flush_queries.push_back(index); | ||
| 646 | return index; | ||
| 647 | } | ||
| 648 | |||
| 649 | bool HasUnsyncedQueries() override { | ||
| 650 | return !pending_flush_queries.empty(); | ||
| 651 | } | ||
| 652 | |||
| 653 | void PushUnsyncedQueries() override { | ||
| 654 | CloseCounter(); | ||
| 655 | auto staging_ref = staging_pool.Request( | ||
| 656 | pending_flush_queries.size() * TFBQueryBank::QUERY_SIZE, MemoryUsage::Download, true); | ||
| 657 | size_t offset_base = staging_ref.offset; | ||
| 658 | for (auto q : pending_flush_queries) { | ||
| 659 | auto* query = GetQuery(q); | ||
| 660 | auto& bank = bank_pool.GetBank(query->start_bank_id); | ||
| 661 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); | ||
| 662 | offset_base += TFBQueryBank::QUERY_SIZE; | ||
| 663 | bank.CloseReference(); | ||
| 664 | } | ||
| 665 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 666 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 667 | .pNext = nullptr, | ||
| 668 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 669 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 670 | }; | ||
| 671 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 672 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 673 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 674 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 675 | }); | ||
| 676 | |||
| 677 | std::scoped_lock lk(flush_guard); | ||
| 678 | for (auto& str : free_queue) { | ||
| 679 | staging_pool.FreeDeferred(str); | ||
| 680 | } | ||
| 681 | free_queue.clear(); | ||
| 682 | download_buffers.emplace_back(staging_ref); | ||
| 683 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | ||
| 684 | } | ||
| 685 | |||
| 686 | void PopUnsyncedQueries() override { | ||
| 687 | StagingBufferRef staging_ref; | ||
| 688 | std::vector<size_t> flushed_queries; | ||
| 689 | { | ||
| 690 | std::scoped_lock lk(flush_guard); | ||
| 691 | staging_ref = download_buffers.front(); | ||
| 692 | flushed_queries = std::move(pending_flush_sets.front()); | ||
| 693 | download_buffers.pop_front(); | ||
| 694 | pending_flush_sets.pop_front(); | ||
| 695 | } | ||
| 696 | |||
| 697 | size_t offset_base = staging_ref.offset; | ||
| 698 | for (auto q : flushed_queries) { | ||
| 699 | auto* query = GetQuery(q); | ||
| 700 | u32 result = 0; | ||
| 701 | std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32)); | ||
| 702 | query->value = static_cast<u64>(result); | ||
| 703 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 704 | offset_base += TFBQueryBank::QUERY_SIZE; | ||
| 705 | } | ||
| 706 | |||
| 707 | { | ||
| 708 | std::scoped_lock lk(flush_guard); | ||
| 709 | free_queue.emplace_back(staging_ref); | ||
| 710 | } | ||
| 711 | } | ||
| 712 | |||
| 713 | private: | ||
| 714 | void FlushBeginTFB() { | ||
| 715 | if (has_flushed_end_pending) [[unlikely]] { | ||
| 716 | return; | ||
| 717 | } | ||
| 718 | has_flushed_end_pending = true; | ||
| 719 | if (!has_started || buffers_count == 0) { | ||
| 720 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 721 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 722 | }); | ||
| 723 | UpdateBuffers(); | ||
| 724 | return; | ||
| 725 | } | ||
| 726 | scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | ||
| 727 | cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); | ||
| 728 | }); | ||
| 729 | UpdateBuffers(); | ||
| 730 | } | ||
| 731 | |||
| 732 | void FlushEndTFB() { | ||
| 733 | if (!has_flushed_end_pending) [[unlikely]] { | ||
| 734 | UNREACHABLE(); | ||
| 735 | return; | ||
| 736 | } | ||
| 737 | has_flushed_end_pending = false; | ||
| 738 | |||
| 739 | if (buffers_count == 0) { | ||
| 740 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 741 | cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 742 | }); | ||
| 743 | } else { | ||
| 744 | scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | ||
| 745 | cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); | ||
| 746 | }); | ||
| 747 | } | ||
| 748 | } | ||
| 749 | |||
| 750 | void UpdateBuffers() { | ||
| 751 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 752 | buffers_count = 0; | ||
| 753 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | ||
| 754 | i++) { | ||
| 755 | const auto& tf = regs.transform_feedback; | ||
| 756 | if (tf.buffers[i].enable == 0) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | const size_t stream = tf.controls[i].stream; | ||
| 760 | streams_mask |= 1ULL << stream; | ||
| 761 | buffers_count = std::max<size_t>(buffers_count, stream + 1); | ||
| 762 | } | ||
| 763 | }); | ||
| 764 | } | ||
| 765 | |||
| 766 | std::pair<size_t, size_t> ProduceCounterBuffer(size_t stream) { | ||
| 767 | if (current_bank == nullptr || current_bank->IsClosed()) { | ||
| 768 | current_bank_id = | ||
| 769 | bank_pool.ReserveBank([this](std::deque<TFBQueryBank>& queue, size_t index) { | ||
| 770 | queue.emplace_back(scheduler, memory_allocator, index); | ||
| 771 | }); | ||
| 772 | current_bank = &bank_pool.GetBank(current_bank_id); | ||
| 773 | } | ||
| 774 | auto [dont_care, slot] = current_bank->Reserve(); | ||
| 775 | current_bank->AddReference(); | ||
| 776 | |||
| 777 | static constexpr VkMemoryBarrier READ_BARRIER{ | ||
| 778 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 779 | .pNext = nullptr, | ||
| 780 | .srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, | ||
| 781 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 782 | }; | ||
| 783 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 784 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 785 | .pNext = nullptr, | ||
| 786 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 787 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, | ||
| 788 | }; | ||
| 789 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 790 | scheduler.Record([dst_buffer = current_bank->GetBuffer(), | ||
| 791 | src_buffer = counter_buffers[stream], src_offset = offsets[stream], | ||
| 792 | slot](vk::CommandBuffer cmdbuf) { | ||
| 793 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, | ||
| 794 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); | ||
| 795 | std::array<VkBufferCopy, 1> copy{VkBufferCopy{ | ||
| 796 | .srcOffset = src_offset, | ||
| 797 | .dstOffset = slot * TFBQueryBank::QUERY_SIZE, | ||
| 798 | .size = TFBQueryBank::QUERY_SIZE, | ||
| 799 | }}; | ||
| 800 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 801 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 802 | 0, WRITE_BARRIER); | ||
| 803 | }); | ||
| 804 | return {current_bank_id, slot}; | ||
| 805 | } | ||
| 806 | |||
| 807 | static constexpr size_t NUM_STREAMS = 4; | ||
| 808 | static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; | ||
| 809 | |||
| 810 | QueryCacheRuntime& runtime; | ||
| 811 | const Device& device; | ||
| 812 | Scheduler& scheduler; | ||
| 813 | const MemoryAllocator& memory_allocator; | ||
| 814 | StagingBufferPool& staging_pool; | ||
| 815 | VideoCommon::BankPool<TFBQueryBank> bank_pool; | ||
| 816 | size_t current_bank_id; | ||
| 817 | TFBQueryBank* current_bank; | ||
| 818 | vk::Buffer counters_buffer; | ||
| 819 | |||
| 820 | // syncing queue | ||
| 821 | std::vector<size_t> pending_sync; | ||
| 822 | |||
| 823 | // flush levels | ||
| 824 | std::vector<size_t> pending_flush_queries; | ||
| 825 | std::deque<StagingBufferRef> download_buffers; | ||
| 826 | std::deque<std::vector<size_t>> pending_flush_sets; | ||
| 827 | std::vector<StagingBufferRef> free_queue; | ||
| 828 | std::mutex flush_guard; | ||
| 829 | |||
| 830 | // state machine | ||
| 831 | bool has_started{}; | ||
| 832 | bool has_flushed_end_pending{}; | ||
| 833 | size_t buffers_count{}; | ||
| 834 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | ||
| 835 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | ||
| 836 | u64 streams_mask; | ||
| 837 | }; | ||
| 838 | |||
| 839 | } // namespace | ||
| 840 | |||
| 841 | struct QueryCacheRuntimeImpl { | ||
| 842 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | ||
| 843 | Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, | ||
| 844 | const Device& device_, const MemoryAllocator& memory_allocator_, | ||
| 845 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 846 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 847 | DescriptorPool& descriptor_pool) | ||
| 848 | : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, | ||
| 849 | buffer_cache{buffer_cache_}, device{device_}, | ||
| 850 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | ||
| 851 | guest_streamer(0, runtime), | ||
| 852 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device, | ||
| 853 | scheduler, memory_allocator), | ||
| 854 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | ||
| 855 | scheduler, memory_allocator, staging_pool), | ||
| 856 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { | ||
| 857 | |||
| 858 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; | ||
| 859 | hcr_setup.pNext = nullptr; | ||
| 860 | hcr_setup.flags = 0; | ||
| 861 | |||
| 862 | conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>( | ||
| 863 | device, scheduler, descriptor_pool, compute_pass_descriptor_queue); | ||
| 864 | |||
| 865 | const VkBufferCreateInfo buffer_ci = { | ||
| 866 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 867 | .pNext = nullptr, | ||
| 868 | .flags = 0, | ||
| 869 | .size = sizeof(u32), | ||
| 870 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | ||
| 871 | VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT, | ||
| 872 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 873 | .queueFamilyIndexCount = 0, | ||
| 874 | .pQueueFamilyIndices = nullptr, | ||
| 875 | }; | ||
| 876 | hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 877 | } | ||
| 26 | 878 | ||
| 27 | } // Anonymous namespace | 879 | VideoCore::RasterizerInterface* rasterizer; |
| 880 | Core::Memory::Memory& cpu_memory; | ||
| 881 | Vulkan::BufferCache& buffer_cache; | ||
| 28 | 882 | ||
| 29 | QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_) | 883 | const Device& device; |
| 30 | : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} | 884 | const MemoryAllocator& memory_allocator; |
| 885 | Scheduler& scheduler; | ||
| 886 | StagingBufferPool& staging_pool; | ||
| 31 | 887 | ||
| 32 | QueryPool::~QueryPool() = default; | 888 | // Streamers |
| 889 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; | ||
| 890 | SamplesStreamer<QueryCacheParams> sample_streamer; | ||
| 891 | TFBCounterStreamer<QueryCacheParams> tfb_streamer; | ||
| 33 | 892 | ||
| 34 | std::pair<VkQueryPool, u32> QueryPool::Commit() { | 893 | std::vector<std::pair<VAddr, VAddr>> little_cache; |
| 35 | std::size_t index; | 894 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| 36 | do { | 895 | std::vector<size_t> redirect_cache; |
| 37 | index = CommitResource(); | 896 | std::vector<std::vector<VkBufferCopy>> copies_setup; |
| 38 | } while (usage[index]); | ||
| 39 | usage[index] = true; | ||
| 40 | 897 | ||
| 41 | return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; | 898 | // Host conditional rendering data |
| 899 | std::unique_ptr<ConditionalRenderingResolvePass> conditional_resolve_pass; | ||
| 900 | vk::Buffer hcr_resolve_buffer; | ||
| 901 | VkConditionalRenderingBeginInfoEXT hcr_setup; | ||
| 902 | VkBuffer hcr_buffer; | ||
| 903 | size_t hcr_offset; | ||
| 904 | bool hcr_is_set; | ||
| 905 | bool is_hcr_running; | ||
| 906 | |||
| 907 | // maxwell3d | ||
| 908 | Tegra::Engines::Maxwell3D* maxwell3d; | ||
| 909 | }; | ||
| 910 | |||
| 911 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||
| 912 | Core::Memory::Memory& cpu_memory_, | ||
| 913 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||
| 914 | const MemoryAllocator& memory_allocator_, | ||
| 915 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 916 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 917 | DescriptorPool& descriptor_pool) { | ||
| 918 | impl = std::make_unique<QueryCacheRuntimeImpl>( | ||
| 919 | *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | ||
| 920 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | ||
| 921 | } | ||
| 922 | |||
| 923 | void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { | ||
| 924 | impl->maxwell3d = maxwell3d; | ||
| 42 | } | 925 | } |
| 43 | 926 | ||
| 44 | void QueryPool::Allocate(std::size_t begin, std::size_t end) { | 927 | template <typename Func> |
| 45 | usage.resize(end); | 928 | void QueryCacheRuntime::View3DRegs(Func&& func) { |
| 929 | func(impl->maxwell3d->regs); | ||
| 930 | } | ||
| 46 | 931 | ||
| 47 | pools.push_back(device.GetLogical().CreateQueryPool({ | 932 | void QueryCacheRuntime::EndHostConditionalRendering() { |
| 48 | .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, | 933 | PauseHostConditionalRendering(); |
| 49 | .pNext = nullptr, | 934 | impl->hcr_is_set = false; |
| 50 | .flags = 0, | 935 | impl->is_hcr_running = false; |
| 51 | .queryType = GetTarget(type), | 936 | impl->hcr_buffer = nullptr; |
| 52 | .queryCount = static_cast<u32>(end - begin), | 937 | impl->hcr_offset = 0; |
| 53 | .pipelineStatistics = 0, | ||
| 54 | })); | ||
| 55 | } | 938 | } |
| 56 | 939 | ||
| 57 | void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | 940 | void QueryCacheRuntime::PauseHostConditionalRendering() { |
| 58 | const auto it = | 941 | if (!impl->hcr_is_set) { |
| 59 | std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { | 942 | return; |
| 60 | return query_pool == *pool; | 943 | } |
| 944 | if (impl->is_hcr_running) { | ||
| 945 | impl->scheduler.Record( | ||
| 946 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndConditionalRenderingEXT(); }); | ||
| 947 | } | ||
| 948 | impl->is_hcr_running = false; | ||
| 949 | } | ||
| 950 | |||
| 951 | void QueryCacheRuntime::ResumeHostConditionalRendering() { | ||
| 952 | if (!impl->hcr_is_set) { | ||
| 953 | return; | ||
| 954 | } | ||
| 955 | if (!impl->is_hcr_running) { | ||
| 956 | impl->scheduler.Record([hcr_setup = impl->hcr_setup](vk::CommandBuffer cmdbuf) { | ||
| 957 | cmdbuf.BeginConditionalRenderingEXT(hcr_setup); | ||
| 61 | }); | 958 | }); |
| 959 | } | ||
| 960 | impl->is_hcr_running = true; | ||
| 961 | } | ||
| 62 | 962 | ||
| 63 | if (it != std::end(pools)) { | 963 | void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, |
| 64 | const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); | 964 | bool is_equal) { |
| 65 | usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; | 965 | { |
| 966 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 967 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||
| 968 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; | ||
| 969 | const auto [buffer, offset] = | ||
| 970 | impl->buffer_cache.ObtainCPUBuffer(object.address, 8, sync_info, post_op); | ||
| 971 | impl->hcr_buffer = buffer->Handle(); | ||
| 972 | impl->hcr_offset = offset; | ||
| 973 | } | ||
| 974 | if (impl->hcr_is_set) { | ||
| 975 | if (impl->hcr_setup.buffer == impl->hcr_buffer && | ||
| 976 | impl->hcr_setup.offset == impl->hcr_offset) { | ||
| 977 | ResumeHostConditionalRendering(); | ||
| 978 | return; | ||
| 979 | } | ||
| 980 | PauseHostConditionalRendering(); | ||
| 66 | } | 981 | } |
| 982 | impl->hcr_setup.buffer = impl->hcr_buffer; | ||
| 983 | impl->hcr_setup.offset = impl->hcr_offset; | ||
| 984 | impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0; | ||
| 985 | impl->hcr_is_set = true; | ||
| 986 | impl->is_hcr_running = false; | ||
| 987 | ResumeHostConditionalRendering(); | ||
| 67 | } | 988 | } |
| 68 | 989 | ||
| 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, | 990 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { |
| 70 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 991 | VkBuffer to_resolve; |
| 71 | Scheduler& scheduler_) | 992 | u32 to_resolve_offset; |
| 72 | : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, | 993 | { |
| 73 | query_pools{ | 994 | std::scoped_lock lk(impl->buffer_cache.mutex); |
| 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 995 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::NoSynchronize; |
| 75 | } {} | 996 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 76 | 997 | const auto [buffer, offset] = | |
| 77 | QueryCache::~QueryCache() { | 998 | impl->buffer_cache.ObtainCPUBuffer(address, 24, sync_info, post_op); |
| 78 | // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class | 999 | to_resolve = buffer->Handle(); |
| 79 | // destructor is called. The query cache should be redesigned to have a proper ownership model | 1000 | to_resolve_offset = static_cast<u32>(offset); |
| 80 | // instead of using shared pointers. | 1001 | } |
| 81 | for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) { | 1002 | if (impl->is_hcr_running) { |
| 82 | auto& stream = Stream(static_cast<QueryType>(query_type)); | 1003 | PauseHostConditionalRendering(); |
| 83 | stream.Update(false); | ||
| 84 | stream.Reset(); | ||
| 85 | } | 1004 | } |
| 1005 | impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, | ||
| 1006 | to_resolve_offset, false); | ||
| 1007 | impl->hcr_setup.buffer = *impl->hcr_resolve_buffer; | ||
| 1008 | impl->hcr_setup.offset = 0; | ||
| 1009 | impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; | ||
| 1010 | impl->hcr_is_set = true; | ||
| 1011 | impl->is_hcr_running = false; | ||
| 1012 | ResumeHostConditionalRendering(); | ||
| 86 | } | 1013 | } |
| 87 | 1014 | ||
| 88 | std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) { | 1015 | bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, |
| 89 | return query_pools[static_cast<std::size_t>(type)].Commit(); | 1016 | [[maybe_unused]] bool qc_dirty) { |
| 1017 | if (!impl->device.IsExtConditionalRendering()) { | ||
| 1018 | return false; | ||
| 1019 | } | ||
| 1020 | HostConditionalRenderingCompareValueImpl(object_1, false); | ||
| 1021 | return true; | ||
| 90 | } | 1022 | } |
| 91 | 1023 | ||
| 92 | void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { | 1024 | bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
| 93 | query_pools[static_cast<std::size_t>(type)].Reserve(query); | 1025 | VideoCommon::LookupData object_2, |
| 1026 | bool qc_dirty, bool equal_check) { | ||
| 1027 | if (!impl->device.IsExtConditionalRendering()) { | ||
| 1028 | return false; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | const auto check_in_bc = [&](VAddr address) { | ||
| 1032 | return impl->buffer_cache.IsRegionGpuModified(address, 8); | ||
| 1033 | }; | ||
| 1034 | const auto check_value = [&](VAddr address) { | ||
| 1035 | u8* ptr = impl->cpu_memory.GetPointer(address); | ||
| 1036 | u64 value{}; | ||
| 1037 | std::memcpy(&value, ptr, sizeof(value)); | ||
| 1038 | return value == 0; | ||
| 1039 | }; | ||
| 1040 | std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2}; | ||
| 1041 | std::array<bool, 2> is_in_bc{}; | ||
| 1042 | std::array<bool, 2> is_in_qc{}; | ||
| 1043 | std::array<bool, 2> is_in_ac{}; | ||
| 1044 | std::array<bool, 2> is_null{}; | ||
| 1045 | { | ||
| 1046 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 1047 | for (size_t i = 0; i < 2; i++) { | ||
| 1048 | is_in_qc[i] = objects[i]->found_query != nullptr; | ||
| 1049 | is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address); | ||
| 1050 | is_in_ac[i] = is_in_qc[i] || is_in_bc[i]; | ||
| 1051 | } | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | if (!is_in_ac[0] && !is_in_ac[1]) { | ||
| 1055 | EndHostConditionalRendering(); | ||
| 1056 | return false; | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | if (!qc_dirty && !is_in_bc[0] && !is_in_bc[1]) { | ||
| 1060 | EndHostConditionalRendering(); | ||
| 1061 | return false; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | for (size_t i = 0; i < 2; i++) { | ||
| 1065 | is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | for (size_t i = 0; i < 2; i++) { | ||
| 1069 | if (is_null[i]) { | ||
| 1070 | size_t j = (i + 1) % 2; | ||
| 1071 | HostConditionalRenderingCompareValueImpl(*objects[j], equal_check); | ||
| 1072 | return true; | ||
| 1073 | } | ||
| 1074 | } | ||
| 1075 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); | ||
| 1076 | return true; | ||
| 94 | } | 1077 | } |
| 95 | 1078 | ||
| 96 | HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | 1079 | QueryCacheRuntime::~QueryCacheRuntime() = default; |
| 97 | QueryType type_) | 1080 | |
| 98 | : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, | 1081 | VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryType query_type) { |
| 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { | 1082 | switch (query_type) { |
| 100 | const vk::Device* logical = &cache.GetDevice().GetLogical(); | 1083 | case QueryType::Payload: |
| 101 | cache.GetScheduler().Record([logical, query_ = query](vk::CommandBuffer cmdbuf) { | 1084 | return &impl->guest_streamer; |
| 102 | const bool use_precise = Settings::IsGPULevelHigh(); | 1085 | case QueryType::ZPassPixelCount64: |
| 103 | logical->ResetQueryPool(query_.first, query_.second, 1); | 1086 | return &impl->sample_streamer; |
| 104 | cmdbuf.BeginQuery(query_.first, query_.second, | 1087 | case QueryType::StreamingByteCount: |
| 105 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | 1088 | return &impl->tfb_streamer; |
| 106 | }); | 1089 | default: |
| 1090 | return nullptr; | ||
| 1091 | } | ||
| 107 | } | 1092 | } |
| 108 | 1093 | ||
| 109 | HostCounter::~HostCounter() { | 1094 | void QueryCacheRuntime::Barriers(bool is_prebarrier) { |
| 110 | cache.Reserve(type, query); | 1095 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 1096 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1097 | .pNext = nullptr, | ||
| 1098 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1099 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 1100 | }; | ||
| 1101 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 1102 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1103 | .pNext = nullptr, | ||
| 1104 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 1105 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1106 | }; | ||
| 1107 | if (is_prebarrier) { | ||
| 1108 | impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 1109 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 1110 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); | ||
| 1111 | }); | ||
| 1112 | } else { | ||
| 1113 | impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 1114 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1115 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 1116 | }); | ||
| 1117 | } | ||
| 111 | } | 1118 | } |
| 112 | 1119 | ||
| 113 | void HostCounter::EndQuery() { | 1120 | template <typename SyncValuesType> |
| 114 | cache.GetScheduler().Record([query_ = query](vk::CommandBuffer cmdbuf) { | 1121 | void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer) { |
| 115 | cmdbuf.EndQuery(query_.first, query_.second); | 1122 | if (values.size() == 0) { |
| 1123 | return; | ||
| 1124 | } | ||
| 1125 | impl->redirect_cache.clear(); | ||
| 1126 | impl->little_cache.clear(); | ||
| 1127 | size_t total_size = 0; | ||
| 1128 | for (auto& sync_val : values) { | ||
| 1129 | total_size += sync_val.size; | ||
| 1130 | bool found = false; | ||
| 1131 | VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | ||
| 1132 | VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | ||
| 1133 | for (size_t i = 0; i < impl->little_cache.size(); i++) { | ||
| 1134 | const auto set_found = [&] { | ||
| 1135 | impl->redirect_cache.push_back(i); | ||
| 1136 | found = true; | ||
| 1137 | }; | ||
| 1138 | auto& loc = impl->little_cache[i]; | ||
| 1139 | if (base < loc.second && loc.first < base_end) { | ||
| 1140 | set_found(); | ||
| 1141 | break; | ||
| 1142 | } | ||
| 1143 | if (loc.first == base_end) { | ||
| 1144 | loc.first = base; | ||
| 1145 | set_found(); | ||
| 1146 | break; | ||
| 1147 | } | ||
| 1148 | if (loc.second == base) { | ||
| 1149 | loc.second = base_end; | ||
| 1150 | set_found(); | ||
| 1151 | break; | ||
| 1152 | } | ||
| 1153 | } | ||
| 1154 | if (!found) { | ||
| 1155 | impl->redirect_cache.push_back(impl->little_cache.size()); | ||
| 1156 | impl->little_cache.emplace_back(base, base_end); | ||
| 1157 | } | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | // Vulkan part. | ||
| 1161 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 1162 | impl->buffer_cache.BufferOperations([&] { | ||
| 1163 | impl->buffers_to_upload_to.clear(); | ||
| 1164 | for (auto& pair : impl->little_cache) { | ||
| 1165 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||
| 1166 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; | ||
| 1167 | const auto [buffer, offset] = impl->buffer_cache.ObtainCPUBuffer( | ||
| 1168 | pair.first, static_cast<u32>(pair.second - pair.first), sync_info, post_op); | ||
| 1169 | impl->buffers_to_upload_to.emplace_back(buffer->Handle(), offset); | ||
| 1170 | } | ||
| 116 | }); | 1171 | }); |
| 117 | } | ||
| 118 | 1172 | ||
| 119 | u64 HostCounter::BlockingQuery(bool async) const { | 1173 | VkBuffer src_buffer; |
| 120 | if (!async) { | 1174 | [[maybe_unused]] StagingBufferRef ref; |
| 121 | cache.GetScheduler().Wait(tick); | 1175 | impl->copies_setup.clear(); |
| 122 | } | 1176 | impl->copies_setup.resize(impl->little_cache.size()); |
| 123 | u64 data; | 1177 | if constexpr (SyncValuesType::GeneratesBaseBuffer) { |
| 124 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 1178 | ref = impl->staging_pool.Request(total_size, MemoryUsage::Upload); |
| 125 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 1179 | size_t current_offset = ref.offset; |
| 126 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | 1180 | size_t accumulated_size = 0; |
| 127 | 1181 | for (size_t i = 0; i < values.size(); i++) { | |
| 128 | switch (query_result) { | 1182 | size_t which_copy = impl->redirect_cache[i]; |
| 129 | case VK_SUCCESS: | 1183 | impl->copies_setup[which_copy].emplace_back(VkBufferCopy{ |
| 130 | return data; | 1184 | .srcOffset = current_offset + accumulated_size, |
| 131 | case VK_ERROR_DEVICE_LOST: | 1185 | .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address - |
| 132 | cache.GetDevice().ReportLoss(); | 1186 | impl->little_cache[which_copy].first, |
| 133 | [[fallthrough]]; | 1187 | .size = values[i].size, |
| 134 | default: | 1188 | }); |
| 135 | throw vk::Exception(query_result); | 1189 | std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value, |
| 1190 | values[i].size); | ||
| 1191 | accumulated_size += values[i].size; | ||
| 1192 | } | ||
| 1193 | src_buffer = ref.buffer; | ||
| 1194 | } else { | ||
| 1195 | for (size_t i = 0; i < values.size(); i++) { | ||
| 1196 | size_t which_copy = impl->redirect_cache[i]; | ||
| 1197 | impl->copies_setup[which_copy].emplace_back(VkBufferCopy{ | ||
| 1198 | .srcOffset = values[i].offset, | ||
| 1199 | .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address - | ||
| 1200 | impl->little_cache[which_copy].first, | ||
| 1201 | .size = values[i].size, | ||
| 1202 | }); | ||
| 1203 | } | ||
| 1204 | src_buffer = base_src_buffer; | ||
| 136 | } | 1205 | } |
| 1206 | |||
| 1207 | impl->scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 1208 | impl->scheduler.Record([src_buffer, dst_buffers = std::move(impl->buffers_to_upload_to), | ||
| 1209 | vk_copies = std::move(impl->copies_setup)](vk::CommandBuffer cmdbuf) { | ||
| 1210 | size_t size = dst_buffers.size(); | ||
| 1211 | for (size_t i = 0; i < size; i++) { | ||
| 1212 | cmdbuf.CopyBuffer(src_buffer, dst_buffers[i].first, vk_copies[i]); | ||
| 1213 | } | ||
| 1214 | }); | ||
| 137 | } | 1215 | } |
| 138 | 1216 | ||
| 139 | } // namespace Vulkan | 1217 | } // namespace Vulkan |
| 1218 | |||
| 1219 | namespace VideoCommon { | ||
| 1220 | |||
| 1221 | template class QueryCacheBase<Vulkan::QueryCacheParams>; | ||
| 1222 | |||
| 1223 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index c1b9552eb..9ad2929d7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -1,101 +1,74 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <cstddef> | ||
| 7 | #include <memory> | 6 | #include <memory> |
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | 7 | ||
| 11 | #include "common/common_types.h" | 8 | #include "video_core/query_cache/query_cache_base.h" |
| 12 | #include "video_core/query_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 13 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 15 | 10 | ||
| 16 | namespace VideoCore { | 11 | namespace VideoCore { |
| 17 | class RasterizerInterface; | 12 | class RasterizerInterface; |
| 18 | } | 13 | } |
| 19 | 14 | ||
| 15 | namespace VideoCommon { | ||
| 16 | class StreamerInterface; | ||
| 17 | } | ||
| 18 | |||
| 20 | namespace Vulkan { | 19 | namespace Vulkan { |
| 21 | 20 | ||
| 22 | class CachedQuery; | ||
| 23 | class Device; | 21 | class Device; |
| 24 | class HostCounter; | ||
| 25 | class QueryCache; | ||
| 26 | class Scheduler; | 22 | class Scheduler; |
| 23 | class StagingBufferPool; | ||
| 27 | 24 | ||
| 28 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | 25 | struct QueryCacheRuntimeImpl; |
| 29 | 26 | ||
| 30 | class QueryPool final : public ResourcePool { | 27 | class QueryCacheRuntime { |
| 31 | public: | 28 | public: |
| 32 | explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); | 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 33 | ~QueryPool() override; | 30 | Core::Memory::Memory& cpu_memory_, |
| 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||
| 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||
| 33 | StagingBufferPool& staging_pool_, | ||
| 34 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 35 | DescriptorPool& descriptor_pool); | ||
| 36 | ~QueryCacheRuntime(); | ||
| 34 | 37 | ||
| 35 | std::pair<VkQueryPool, u32> Commit(); | 38 | template <typename SyncValuesType> |
| 39 | void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr); | ||
| 36 | 40 | ||
| 37 | void Reserve(std::pair<VkQueryPool, u32> query); | 41 | void Barriers(bool is_prebarrier); |
| 38 | 42 | ||
| 39 | protected: | 43 | void EndHostConditionalRendering(); |
| 40 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 41 | 44 | ||
| 42 | private: | 45 | void PauseHostConditionalRendering(); |
| 43 | static constexpr std::size_t GROW_STEP = 512; | ||
| 44 | 46 | ||
| 45 | const Device& device; | 47 | void ResumeHostConditionalRendering(); |
| 46 | const VideoCore::QueryType type; | ||
| 47 | 48 | ||
| 48 | std::vector<vk::QueryPool> pools; | 49 | bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); |
| 49 | std::vector<bool> usage; | ||
| 50 | }; | ||
| 51 | 50 | ||
| 52 | class QueryCache final | 51 | bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
| 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 52 | VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); |
| 54 | public: | ||
| 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 56 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 57 | Scheduler& scheduler_); | ||
| 58 | ~QueryCache(); | ||
| 59 | |||
| 60 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); | ||
| 61 | 53 | ||
| 62 | void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); | 54 | VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); |
| 63 | 55 | ||
| 64 | const Device& GetDevice() const noexcept { | 56 | void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d); |
| 65 | return device; | ||
| 66 | } | ||
| 67 | 57 | ||
| 68 | Scheduler& GetScheduler() const noexcept { | 58 | template <typename Func> |
| 69 | return scheduler; | 59 | void View3DRegs(Func&& func); |
| 70 | } | ||
| 71 | 60 | ||
| 72 | private: | 61 | private: |
| 73 | const Device& device; | 62 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |
| 74 | Scheduler& scheduler; | 63 | void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); |
| 75 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | 64 | friend struct QueryCacheRuntimeImpl; |
| 65 | std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||
| 76 | }; | 66 | }; |
| 77 | 67 | ||
| 78 | class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | 68 | struct QueryCacheParams { |
| 79 | public: | 69 | using RuntimeType = Vulkan::QueryCacheRuntime; |
| 80 | explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | ||
| 81 | VideoCore::QueryType type_); | ||
| 82 | ~HostCounter(); | ||
| 83 | |||
| 84 | void EndQuery(); | ||
| 85 | |||
| 86 | private: | ||
| 87 | u64 BlockingQuery(bool async = false) const override; | ||
| 88 | |||
| 89 | QueryCache& cache; | ||
| 90 | const VideoCore::QueryType type; | ||
| 91 | const std::pair<VkQueryPool, u32> query; | ||
| 92 | const u64 tick; | ||
| 93 | }; | 70 | }; |
| 94 | 71 | ||
| 95 | class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { | 72 | using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; |
| 96 | public: | ||
| 97 | explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) | ||
| 98 | : CachedQueryBase{cpu_addr_, host_ptr_} {} | ||
| 99 | }; | ||
| 100 | 73 | ||
| 101 | } // namespace Vulkan | 74 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 01e76a82c..e8862ba04 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 24 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 25 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 25 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 28 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 29 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 30 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 170 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 171 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 171 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | 172 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |
| 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 173 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 174 | query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | ||
| 175 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||
| 176 | query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | ||
| 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | 177 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, |
| 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 178 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 175 | query_cache{*this, cpu_memory_, device, scheduler}, | ||
| 176 | accelerate_dma(buffer_cache, texture_cache, scheduler), | 179 | accelerate_dma(buffer_cache, texture_cache, scheduler), |
| 177 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 180 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 178 | wfi_event(device.GetLogical().CreateEvent()) { | 181 | wfi_event(device.GetLogical().CreateEvent()) { |
| @@ -189,13 +192,15 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 189 | FlushWork(); | 192 | FlushWork(); |
| 190 | gpu_memory->FlushCaching(); | 193 | gpu_memory->FlushCaching(); |
| 191 | 194 | ||
| 195 | query_cache.NotifySegment(true); | ||
| 196 | |||
| 192 | #if ANDROID | 197 | #if ANDROID |
| 193 | if (Settings::IsGPULevelHigh()) { | 198 | if (Settings::IsGPULevelHigh()) { |
| 194 | // This is problematic on Android, disable on GPU Normal. | 199 | // This is problematic on Android, disable on GPU Normal. |
| 195 | query_cache.UpdateCounters(); | 200 | // query_cache.UpdateCounters(); |
| 196 | } | 201 | } |
| 197 | #else | 202 | #else |
| 198 | query_cache.UpdateCounters(); | 203 | // query_cache.UpdateCounters(); |
| 199 | #endif | 204 | #endif |
| 200 | 205 | ||
| 201 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | 206 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |
| @@ -207,13 +212,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 207 | pipeline->SetEngine(maxwell3d, gpu_memory); | 212 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 208 | pipeline->Configure(is_indexed); | 213 | pipeline->Configure(is_indexed); |
| 209 | 214 | ||
| 210 | BeginTransformFeedback(); | ||
| 211 | |||
| 212 | UpdateDynamicStates(); | 215 | UpdateDynamicStates(); |
| 213 | 216 | ||
| 217 | HandleTransformFeedback(); | ||
| 218 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 219 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 214 | draw_func(); | 220 | draw_func(); |
| 215 | |||
| 216 | EndTransformFeedback(); | ||
| 217 | } | 221 | } |
| 218 | 222 | ||
| 219 | void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | 223 | void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { |
| @@ -241,6 +245,14 @@ void RasterizerVulkan::DrawIndirect() { | |||
| 241 | const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); | 245 | const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); |
| 242 | const auto& buffer = indirect_buffer.first; | 246 | const auto& buffer = indirect_buffer.first; |
| 243 | const auto& offset = indirect_buffer.second; | 247 | const auto& offset = indirect_buffer.second; |
| 248 | if (params.is_byte_count) { | ||
| 249 | scheduler.Record([buffer_obj = buffer->Handle(), offset, | ||
| 250 | stride = params.stride](vk::CommandBuffer cmdbuf) { | ||
| 251 | cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, | ||
| 252 | static_cast<u32>(stride)); | ||
| 253 | }); | ||
| 254 | return; | ||
| 255 | } | ||
| 244 | if (params.include_count) { | 256 | if (params.include_count) { |
| 245 | const auto count = buffer_cache.GetDrawIndirectCount(); | 257 | const auto count = buffer_cache.GetDrawIndirectCount(); |
| 246 | const auto& draw_buffer = count.first; | 258 | const auto& draw_buffer = count.first; |
| @@ -280,13 +292,15 @@ void RasterizerVulkan::DrawTexture() { | |||
| 280 | SCOPE_EXIT({ gpu.TickWork(); }); | 292 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 281 | FlushWork(); | 293 | FlushWork(); |
| 282 | 294 | ||
| 295 | query_cache.NotifySegment(true); | ||
| 296 | |||
| 283 | #if ANDROID | 297 | #if ANDROID |
| 284 | if (Settings::IsGPULevelHigh()) { | 298 | if (Settings::IsGPULevelHigh()) { |
| 285 | // This is problematic on Android, disable on GPU Normal. | 299 | // This is problematic on Android, disable on GPU Normal. |
| 286 | query_cache.UpdateCounters(); | 300 | // query_cache.UpdateCounters(); |
| 287 | } | 301 | } |
| 288 | #else | 302 | #else |
| 289 | query_cache.UpdateCounters(); | 303 | // query_cache.UpdateCounters(); |
| 290 | #endif | 304 | #endif |
| 291 | 305 | ||
| 292 | texture_cache.SynchronizeGraphicsDescriptors(); | 306 | texture_cache.SynchronizeGraphicsDescriptors(); |
| @@ -294,6 +308,8 @@ void RasterizerVulkan::DrawTexture() { | |||
| 294 | 308 | ||
| 295 | UpdateDynamicStates(); | 309 | UpdateDynamicStates(); |
| 296 | 310 | ||
| 311 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 312 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 297 | const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); | 313 | const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); |
| 298 | const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); | 314 | const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); |
| 299 | const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); | 315 | const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); |
| @@ -319,12 +335,16 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 319 | #if ANDROID | 335 | #if ANDROID |
| 320 | if (Settings::IsGPULevelHigh()) { | 336 | if (Settings::IsGPULevelHigh()) { |
| 321 | // This is problematic on Android, disable on GPU Normal. | 337 | // This is problematic on Android, disable on GPU Normal. |
| 322 | query_cache.UpdateCounters(); | 338 | // query_cache.UpdateCounters(); |
| 323 | } | 339 | } |
| 324 | #else | 340 | #else |
| 325 | query_cache.UpdateCounters(); | 341 | // query_cache.UpdateCounters(); |
| 326 | #endif | 342 | #endif |
| 327 | 343 | ||
| 344 | query_cache.NotifySegment(true); | ||
| 345 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 346 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 347 | |||
| 328 | auto& regs = maxwell3d->regs; | 348 | auto& regs = maxwell3d->regs; |
| 329 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | 349 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || |
| 330 | regs.clear_surface.A; | 350 | regs.clear_surface.A; |
| @@ -482,13 +502,13 @@ void RasterizerVulkan::DispatchCompute() { | |||
| 482 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | 502 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); |
| 483 | } | 503 | } |
| 484 | 504 | ||
| 485 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 505 | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { |
| 486 | query_cache.ResetCounter(type); | 506 | query_cache.CounterReset(type); |
| 487 | } | 507 | } |
| 488 | 508 | ||
| 489 | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | 509 | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 490 | std::optional<u64> timestamp) { | 510 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 491 | query_cache.Query(gpu_addr, type, timestamp); | 511 | query_cache.CounterReport(gpu_addr, type, flags, payload, subreport); |
| 492 | } | 512 | } |
| 493 | 513 | ||
| 494 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 514 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| @@ -669,8 +689,8 @@ void RasterizerVulkan::SignalReference() { | |||
| 669 | fence_manager.SignalReference(); | 689 | fence_manager.SignalReference(); |
| 670 | } | 690 | } |
| 671 | 691 | ||
| 672 | void RasterizerVulkan::ReleaseFences() { | 692 | void RasterizerVulkan::ReleaseFences(bool force) { |
| 673 | fence_manager.WaitPendingFences(); | 693 | fence_manager.WaitPendingFences(force); |
| 674 | } | 694 | } |
| 675 | 695 | ||
| 676 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | 696 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, |
| @@ -694,6 +714,8 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 694 | flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; | 714 | flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; |
| 695 | } | 715 | } |
| 696 | 716 | ||
| 717 | query_cache.NotifyWFI(); | ||
| 718 | |||
| 697 | scheduler.RequestOutsideRenderPassOperationContext(); | 719 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 698 | scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { | 720 | scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { |
| 699 | cmdbuf.SetEvent(event, flags); | 721 | cmdbuf.SetEvent(event, flags); |
| @@ -737,19 +759,7 @@ void RasterizerVulkan::TickFrame() { | |||
| 737 | 759 | ||
| 738 | bool RasterizerVulkan::AccelerateConditionalRendering() { | 760 | bool RasterizerVulkan::AccelerateConditionalRendering() { |
| 739 | gpu_memory->FlushCaching(); | 761 | gpu_memory->FlushCaching(); |
| 740 | if (Settings::IsGPULevelHigh()) { | 762 | return query_cache.AccelerateHostConditionalRendering(); |
| 741 | // TODO(Blinkhawk): Reimplement Host conditional rendering. | ||
| 742 | return false; | ||
| 743 | } | ||
| 744 | // Medium / Low Hack: stub any checks on queries written into the buffer cache. | ||
| 745 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; | ||
| 746 | Maxwell::ReportSemaphore::Compare cmp; | ||
| 747 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), | ||
| 748 | VideoCommon::CacheType::BufferCache | | ||
| 749 | VideoCommon::CacheType::QueryCache)) { | ||
| 750 | return true; | ||
| 751 | } | ||
| 752 | return false; | ||
| 753 | } | 763 | } |
| 754 | 764 | ||
| 755 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 765 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| @@ -795,6 +805,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 795 | if (!image_view) { | 805 | if (!image_view) { |
| 796 | return false; | 806 | return false; |
| 797 | } | 807 | } |
| 808 | query_cache.NotifySegment(false); | ||
| 798 | screen_info.image = image_view->ImageHandle(); | 809 | screen_info.image = image_view->ImageHandle(); |
| 799 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); | 810 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); |
| 800 | screen_info.width = image_view->size.width; | 811 | screen_info.width = image_view->size.width; |
| @@ -933,31 +944,18 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 933 | } | 944 | } |
| 934 | } | 945 | } |
| 935 | 946 | ||
| 936 | void RasterizerVulkan::BeginTransformFeedback() { | 947 | void RasterizerVulkan::HandleTransformFeedback() { |
| 937 | const auto& regs = maxwell3d->regs; | 948 | const auto& regs = maxwell3d->regs; |
| 938 | if (regs.transform_feedback_enabled == 0) { | ||
| 939 | return; | ||
| 940 | } | ||
| 941 | if (!device.IsExtTransformFeedbackSupported()) { | 949 | if (!device.IsExtTransformFeedbackSupported()) { |
| 942 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 950 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |
| 943 | return; | 951 | return; |
| 944 | } | 952 | } |
| 945 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | 953 | query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, |
| 946 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); | 954 | regs.transform_feedback_enabled); |
| 947 | scheduler.Record( | 955 | if (regs.transform_feedback_enabled != 0) { |
| 948 | [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 956 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || |
| 949 | } | 957 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); |
| 950 | |||
| 951 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 952 | const auto& regs = maxwell3d->regs; | ||
| 953 | if (regs.transform_feedback_enabled == 0) { | ||
| 954 | return; | ||
| 955 | } | ||
| 956 | if (!device.IsExtTransformFeedbackSupported()) { | ||
| 957 | return; | ||
| 958 | } | 958 | } |
| 959 | scheduler.Record( | ||
| 960 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||
| 961 | } | 959 | } |
| 962 | 960 | ||
| 963 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 961 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b31982485..ffd44c68d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -84,8 +84,8 @@ public: | |||
| 84 | void DrawTexture() override; | 84 | void DrawTexture() override; |
| 85 | void Clear(u32 layer_count) override; | 85 | void Clear(u32 layer_count) override; |
| 86 | void DispatchCompute() override; | 86 | void DispatchCompute() override; |
| 87 | void ResetCounter(VideoCore::QueryType type) override; | 87 | void ResetCounter(VideoCommon::QueryType type) override; |
| 88 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 88 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; |
| 89 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 89 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 90 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 90 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 91 | void FlushAll() override; | 91 | void FlushAll() override; |
| @@ -106,7 +106,7 @@ public: | |||
| 106 | void SyncOperation(std::function<void()>&& func) override; | 106 | void SyncOperation(std::function<void()>&& func) override; |
| 107 | void SignalSyncPoint(u32 value) override; | 107 | void SignalSyncPoint(u32 value) override; |
| 108 | void SignalReference() override; | 108 | void SignalReference() override; |
| 109 | void ReleaseFences() override; | 109 | void ReleaseFences(bool force = true) override; |
| 110 | void FlushAndInvalidateRegion( | 110 | void FlushAndInvalidateRegion( |
| 111 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 111 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 112 | void WaitForIdle() override; | 112 | void WaitForIdle() override; |
| @@ -146,9 +146,7 @@ private: | |||
| 146 | 146 | ||
| 147 | void UpdateDynamicStates(); | 147 | void UpdateDynamicStates(); |
| 148 | 148 | ||
| 149 | void BeginTransformFeedback(); | 149 | void HandleTransformFeedback(); |
| 150 | |||
| 151 | void EndTransformFeedback(); | ||
| 152 | 150 | ||
| 153 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 151 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 154 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 152 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -195,8 +193,9 @@ private: | |||
| 195 | TextureCache texture_cache; | 193 | TextureCache texture_cache; |
| 196 | BufferCacheRuntime buffer_cache_runtime; | 194 | BufferCacheRuntime buffer_cache_runtime; |
| 197 | BufferCache buffer_cache; | 195 | BufferCache buffer_cache; |
| 198 | PipelineCache pipeline_cache; | 196 | QueryCacheRuntime query_cache_runtime; |
| 199 | QueryCache query_cache; | 197 | QueryCache query_cache; |
| 198 | PipelineCache pipeline_cache; | ||
| 200 | AccelerateDMA accelerate_dma; | 199 | AccelerateDMA accelerate_dma; |
| 201 | FenceManager fence_manager; | 200 | FenceManager fence_manager; |
| 202 | 201 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 89fd31b4f..3be7837f4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() { | |||
| 243 | #if ANDROID | 243 | #if ANDROID |
| 244 | if (Settings::IsGPULevelHigh()) { | 244 | if (Settings::IsGPULevelHigh()) { |
| 245 | // This is problematic on Android, disable on GPU Normal. | 245 | // This is problematic on Android, disable on GPU Normal. |
| 246 | query_cache->UpdateCounters(); | 246 | query_cache->NotifySegment(true); |
| 247 | } | 247 | } |
| 248 | #else | 248 | #else |
| 249 | query_cache->UpdateCounters(); | 249 | query_cache->NotifySegment(true); |
| 250 | #endif | 250 | #endif |
| 251 | } | 251 | } |
| 252 | } | 252 | } |
| @@ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() { | |||
| 261 | #if ANDROID | 261 | #if ANDROID |
| 262 | if (Settings::IsGPULevelHigh()) { | 262 | if (Settings::IsGPULevelHigh()) { |
| 263 | // This is problematic on Android, disable on GPU Normal. | 263 | // This is problematic on Android, disable on GPU Normal. |
| 264 | query_cache->DisableStreams(); | 264 | // query_cache->DisableStreams(); |
| 265 | } | 265 | } |
| 266 | #else | 266 | #else |
| 267 | query_cache->DisableStreams(); | 267 | // query_cache->DisableStreams(); |
| 268 | #endif | 268 | #endif |
| 269 | query_cache->NotifySegment(false); | ||
| 269 | EndRenderPass(); | 270 | EndRenderPass(); |
| 270 | } | 271 | } |
| 271 | 272 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 475c682eb..c87e5fb07 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/polyfill_thread.h" | 16 | #include "common/polyfill_thread.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 18 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -24,7 +25,6 @@ class Device; | |||
| 24 | class Framebuffer; | 25 | class Framebuffer; |
| 25 | class GraphicsPipeline; | 26 | class GraphicsPipeline; |
| 26 | class StateTracker; | 27 | class StateTracker; |
| 27 | class QueryCache; | ||
| 28 | 28 | ||
| 29 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 29 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 30 | /// OpenGL-like operations on Vulkan command buffers. | 30 | /// OpenGL-like operations on Vulkan command buffers. |