summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/settings.cpp10
-rw-r--r--src/common/settings.h2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h19
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h12
-rw-r--r--src/video_core/control/channel_state_cache.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp74
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/engines/maxwell_dma.cpp12
-rw-r--r--src/video_core/engines/puller.cpp11
-rw-r--r--src/video_core/fence_manager.h23
-rw-r--r--src/video_core/gpu.cpp4
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/resolve_conditional_render.comp20
-rw-r--r--src/video_core/macro/macro_hle.cpp24
-rw-r--r--src/video_core/query_cache.h13
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp18
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp1264
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h105
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp98
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h6
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp3
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h19
35 files changed, 1555 insertions, 337 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 4ecaf550b..3fde3cae6 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -130,13 +130,17 @@ void LogSettings() {
130 log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); 130 log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir));
131} 131}
132 132
133void UpdateGPUAccuracy() {
134 values.current_gpu_accuracy = values.gpu_accuracy.GetValue();
135}
136
133bool IsGPULevelExtreme() { 137bool IsGPULevelExtreme() {
134 return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme; 138 return values.current_gpu_accuracy == GpuAccuracy::Extreme;
135} 139}
136 140
137bool IsGPULevelHigh() { 141bool IsGPULevelHigh() {
138 return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme || 142 return values.current_gpu_accuracy == GpuAccuracy::Extreme ||
139 values.gpu_accuracy.GetValue() == GpuAccuracy::High; 143 values.current_gpu_accuracy == GpuAccuracy::High;
140} 144}
141 145
142bool IsFastmemEnabled() { 146bool IsFastmemEnabled() {
diff --git a/src/common/settings.h b/src/common/settings.h
index 82ec9077e..ae5e5d2b8 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -307,6 +307,7 @@ struct Values {
307 Specialization::Default, 307 Specialization::Default,
308 true, 308 true,
309 true}; 309 true};
310 GpuAccuracy current_gpu_accuracy{GpuAccuracy::High};
310 SwitchableSetting<AnisotropyMode, true> max_anisotropy{ 311 SwitchableSetting<AnisotropyMode, true> max_anisotropy{
311 linkage, AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, 312 linkage, AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16,
312 "max_anisotropy", Category::RendererAdvanced}; 313 "max_anisotropy", Category::RendererAdvanced};
@@ -522,6 +523,7 @@ struct Values {
522 523
523extern Values values; 524extern Values values;
524 525
526void UpdateGPUAccuracy();
525bool IsGPULevelExtreme(); 527bool IsGPULevelExtreme();
526bool IsGPULevelHigh(); 528bool IsGPULevelHigh();
527 529
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 8be7bd594..f91b7d1e4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -272,13 +272,20 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
272 if (!cpu_addr) { 272 if (!cpu_addr) {
273 return {&slot_buffers[NULL_BUFFER_ID], 0}; 273 return {&slot_buffers[NULL_BUFFER_ID], 0};
274 } 274 }
275 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 275 return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op);
276}
277
278template <class P>
279std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size,
280 ObtainBufferSynchronize sync_info,
281 ObtainBufferOperation post_op) {
282 const BufferId buffer_id = FindBuffer(cpu_addr, size);
276 Buffer& buffer = slot_buffers[buffer_id]; 283 Buffer& buffer = slot_buffers[buffer_id];
277 284
278 // synchronize op 285 // synchronize op
279 switch (sync_info) { 286 switch (sync_info) {
280 case ObtainBufferSynchronize::FullSynchronize: 287 case ObtainBufferSynchronize::FullSynchronize:
281 SynchronizeBuffer(buffer, *cpu_addr, size); 288 SynchronizeBuffer(buffer, cpu_addr, size);
282 break; 289 break;
283 default: 290 default:
284 break; 291 break;
@@ -286,11 +293,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
286 293
287 switch (post_op) { 294 switch (post_op) {
288 case ObtainBufferOperation::MarkAsWritten: 295 case ObtainBufferOperation::MarkAsWritten:
289 MarkWrittenBuffer(buffer_id, *cpu_addr, size); 296 MarkWrittenBuffer(buffer_id, cpu_addr, size);
290 break; 297 break;
291 case ObtainBufferOperation::DiscardWrite: { 298 case ObtainBufferOperation::DiscardWrite: {
292 VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); 299 VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64);
293 VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); 300 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64);
294 IntervalType interval{cpu_addr_start, cpu_addr_end}; 301 IntervalType interval{cpu_addr_start, cpu_addr_end};
295 ClearDownload(interval); 302 ClearDownload(interval);
296 common_ranges.subtract(interval); 303 common_ranges.subtract(interval);
@@ -300,7 +307,7 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
300 break; 307 break;
301 } 308 }
302 309
303 return {&buffer, buffer.Offset(*cpu_addr)}; 310 return {&buffer, buffer.Offset(cpu_addr)};
304} 311}
305 312
306template <class P> 313template <class P>
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 0b7135d49..9507071e5 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -295,6 +295,10 @@ public:
295 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, 295 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
296 ObtainBufferSynchronize sync_info, 296 ObtainBufferSynchronize sync_info,
297 ObtainBufferOperation post_op); 297 ObtainBufferOperation post_op);
298
299 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size,
300 ObtainBufferSynchronize sync_info,
301 ObtainBufferOperation post_op);
298 void FlushCachedWrites(); 302 void FlushCachedWrites();
299 303
300 /// Return true when there are uncommitted buffers to be downloaded 304 /// Return true when there are uncommitted buffers to be downloaded
@@ -335,6 +339,14 @@ public:
335 339
336 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); 340 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
337 341
342 template <typename Func>
343 void BufferOperations(Func&& func) {
344 do {
345 channel_state->has_deleted_buffers = false;
346 func();
347 } while (channel_state->has_deleted_buffers);
348 }
349
338 std::recursive_mutex mutex; 350 std::recursive_mutex mutex;
339 Runtime& runtime; 351 Runtime& runtime;
340 352
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
index 46bc9e322..5574e1fba 100644
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -51,7 +51,7 @@ public:
51 virtual void CreateChannel(Tegra::Control::ChannelState& channel); 51 virtual void CreateChannel(Tegra::Control::ChannelState& channel);
52 52
53 /// Bind a channel for execution. 53 /// Bind a channel for execution.
54 void BindToChannel(s32 id); 54 virtual void BindToChannel(s32 id);
55 55
56 /// Erase channel's state. 56 /// Erase channel's state.
57 void EraseChannel(s32 id); 57 void EraseChannel(s32 id);
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 06e349e43..922c399e6 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,6 @@
20 20
21namespace Tegra::Engines { 21namespace Tegra::Engines {
22 22
23using VideoCore::QueryType;
24
25/// First register id that is actually a Macro call. 23/// First register id that is actually a Macro call.
26constexpr u32 MacroRegistersStart = 0xE00; 24constexpr u32 MacroRegistersStart = 0xE00;
27 25
@@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
500} 498}
501 499
502void Maxwell3D::ProcessQueryGet() { 500void Maxwell3D::ProcessQueryGet() {
501 VideoCommon::QueryPropertiesFlags flags{};
502 if (regs.report_semaphore.query.short_query == 0) {
503 flags |= VideoCommon::QueryPropertiesFlags::HasTimeout;
504 }
505 const GPUVAddr sequence_address{regs.report_semaphore.Address()};
506 const VideoCommon::QueryType query_type =
507 static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value());
508 const u32 payload = regs.report_semaphore.payload;
509 const u32 subreport = regs.report_semaphore.query.sub_report;
503 switch (regs.report_semaphore.query.operation) { 510 switch (regs.report_semaphore.query.operation) {
504 case Regs::ReportSemaphore::Operation::Release: 511 case Regs::ReportSemaphore::Operation::Release:
505 if (regs.report_semaphore.query.short_query != 0) { 512 if (regs.report_semaphore.query.short_query != 0) {
506 const GPUVAddr sequence_address{regs.report_semaphore.Address()}; 513 flags |= VideoCommon::QueryPropertiesFlags::IsAFence;
507 const u32 payload = regs.report_semaphore.payload;
508 std::function<void()> operation([this, sequence_address, payload] {
509 memory_manager.Write<u32>(sequence_address, payload);
510 });
511 rasterizer->SignalFence(std::move(operation));
512 } else {
513 struct LongQueryResult {
514 u64_le value;
515 u64_le timestamp;
516 };
517 const GPUVAddr sequence_address{regs.report_semaphore.Address()};
518 const u32 payload = regs.report_semaphore.payload;
519 [this, sequence_address, payload] {
520 memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks());
521 memory_manager.Write<u64>(sequence_address, payload);
522 }();
523 } 514 }
515 rasterizer->Query(sequence_address, query_type, flags, payload, subreport);
524 break; 516 break;
525 case Regs::ReportSemaphore::Operation::Acquire: 517 case Regs::ReportSemaphore::Operation::Acquire:
526 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that 518 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() {
528 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); 520 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
529 break; 521 break;
530 case Regs::ReportSemaphore::Operation::ReportOnly: 522 case Regs::ReportSemaphore::Operation::ReportOnly:
531 if (const std::optional<u64> result = GetQueryResult()) { 523 rasterizer->Query(sequence_address, query_type, flags, payload, subreport);
532 // If the query returns an empty optional it means it's cached and deferred.
533 // In this case we have a non-empty result, so we stamp it immediately.
534 StampQueryResult(*result, regs.report_semaphore.query.short_query == 0);
535 }
536 break; 524 break;
537 case Regs::ReportSemaphore::Operation::Trap: 525 case Regs::ReportSemaphore::Operation::Trap:
538 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); 526 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
@@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() {
544} 532}
545 533
546void Maxwell3D::ProcessQueryCondition() { 534void Maxwell3D::ProcessQueryCondition() {
535 if (rasterizer->AccelerateConditionalRendering()) {
536 execute_on = true;
537 return;
538 }
547 const GPUVAddr condition_address{regs.render_enable.Address()}; 539 const GPUVAddr condition_address{regs.render_enable.Address()};
548 switch (regs.render_enable_override) { 540 switch (regs.render_enable_override) {
549 case Regs::RenderEnable::Override::AlwaysRender: 541 case Regs::RenderEnable::Override::AlwaysRender:
@@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() {
553 execute_on = false; 545 execute_on = false;
554 break; 546 break;
555 case Regs::RenderEnable::Override::UseRenderEnable: { 547 case Regs::RenderEnable::Override::UseRenderEnable: {
556 if (rasterizer->AccelerateConditionalRendering()) {
557 execute_on = true;
558 return;
559 }
560 switch (regs.render_enable.mode) { 548 switch (regs.render_enable.mode) {
561 case Regs::RenderEnable::Mode::True: { 549 case Regs::RenderEnable::Mode::True: {
562 execute_on = true; 550 execute_on = true;
@@ -606,7 +594,13 @@ void Maxwell3D::ProcessCounterReset() {
606#endif 594#endif
607 switch (regs.clear_report_value) { 595 switch (regs.clear_report_value) {
608 case Regs::ClearReport::ZPassPixelCount: 596 case Regs::ClearReport::ZPassPixelCount:
609 rasterizer->ResetCounter(QueryType::SamplesPassed); 597 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
598 break;
599 case Regs::ClearReport::PrimitivesGenerated:
600 rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
601 break;
602 case Regs::ClearReport::VtgPrimitivesOut:
603 rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
610 break; 604 break;
611 default: 605 default:
612 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); 606 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
@@ -620,28 +614,6 @@ void Maxwell3D::ProcessSyncPoint() {
620 rasterizer->SignalSyncPoint(sync_point); 614 rasterizer->SignalSyncPoint(sync_point);
621} 615}
622 616
623std::optional<u64> Maxwell3D::GetQueryResult() {
624 switch (regs.report_semaphore.query.report) {
625 case Regs::ReportSemaphore::Report::Payload:
626 return regs.report_semaphore.payload;
627 case Regs::ReportSemaphore::Report::ZPassPixelCount64:
628#if ANDROID
629 if (!Settings::IsGPULevelHigh()) {
630 // This is problematic on Android, disable on GPU Normal.
631 return 120;
632 }
633#endif
634 // Deferred.
635 rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed,
636 system.GPU().GetTicks());
637 return std::nullopt;
638 default:
639 LOG_DEBUG(HW_GPU, "Unimplemented query report type {}",
640 regs.report_semaphore.query.report.Value());
641 return 1;
642 }
643}
644
645void Maxwell3D::ProcessCBBind(size_t stage_index) { 617void Maxwell3D::ProcessCBBind(size_t stage_index) {
646 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader 618 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
647 // stage. 619 // stage.
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 6c19354e1..17faacc37 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3182,9 +3182,6 @@ private:
3182 /// Handles writes to syncing register. 3182 /// Handles writes to syncing register.
3183 void ProcessSyncPoint(); 3183 void ProcessSyncPoint();
3184 3184
3185 /// Returns a query's value or an empty object if the value will be deferred through a cache.
3186 std::optional<u64> GetQueryResult();
3187
3188 void RefreshParametersImpl(); 3185 void RefreshParametersImpl();
3189 3186
3190 bool IsMethodExecutable(u32 method); 3187 bool IsMethodExecutable(u32 method);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 279f0daa1..422d4d859 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() {
362 const auto type = regs.launch_dma.semaphore_type; 362 const auto type = regs.launch_dma.semaphore_type;
363 const GPUVAddr address = regs.semaphore.address; 363 const GPUVAddr address = regs.semaphore.address;
364 const u32 payload = regs.semaphore.payload; 364 const u32 payload = regs.semaphore.payload;
365 VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence};
365 switch (type) { 366 switch (type) {
366 case LaunchDMA::SemaphoreType::NONE: 367 case LaunchDMA::SemaphoreType::NONE:
367 break; 368 break;
368 case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { 369 case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
369 std::function<void()> operation( 370 rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0);
370 [this, address, payload] { memory_manager.Write<u32>(address, payload); });
371 rasterizer->SignalFence(std::move(operation));
372 break; 371 break;
373 } 372 }
374 case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { 373 case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
375 std::function<void()> operation([this, address, payload] { 374 rasterizer->Query(address, VideoCommon::QueryType::Payload,
376 memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); 375 flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
377 memory_manager.Write<u64>(address, payload);
378 });
379 rasterizer->SignalFence(std::move(operation));
380 break; 376 break;
381 } 377 }
382 default: 378 default:
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 6de2543b7..582738234 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -82,10 +82,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
82 if (op == GpuSemaphoreOperation::WriteLong) { 82 if (op == GpuSemaphoreOperation::WriteLong) {
83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
84 const u32 payload = regs.semaphore_sequence; 84 const u32 payload = regs.semaphore_sequence;
85 [this, sequence_address, payload] { 85 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
86 memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks());
87 memory_manager.Write<u64>(sequence_address, payload);
88 }();
89 } else { 86 } else {
90 do { 87 do {
91 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; 88 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@@ -120,10 +117,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
120void Puller::ProcessSemaphoreRelease() { 117void Puller::ProcessSemaphoreRelease() {
121 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 118 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
122 const u32 payload = regs.semaphore_release; 119 const u32 payload = regs.semaphore_release;
123 std::function<void()> operation([this, sequence_address, payload] { 120 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
124 memory_manager.Write<u32>(sequence_address, payload);
125 });
126 rasterizer->SignalFence(std::move(operation));
127} 121}
128 122
129void Puller::ProcessSemaphoreAcquire() { 123void Puller::ProcessSemaphoreAcquire() {
@@ -132,7 +126,6 @@ void Puller::ProcessSemaphoreAcquire() {
132 while (word != value) { 126 while (word != value) {
133 regs.acquire_active = true; 127 regs.acquire_active = true;
134 regs.acquire_value = value; 128 regs.acquire_value = value;
135 std::this_thread::sleep_for(std::chrono::milliseconds(1));
136 rasterizer->ReleaseFences(); 129 rasterizer->ReleaseFences();
137 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); 130 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
138 // TODO(kemathe73) figure out how to do the acquire_timeout 131 // TODO(kemathe73) figure out how to do the acquire_timeout
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index ab20ff30f..8459a3092 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -104,9 +104,28 @@ public:
104 SignalFence(std::move(func)); 104 SignalFence(std::move(func));
105 } 105 }
106 106
107 void WaitPendingFences() { 107 void WaitPendingFences(bool force) {
108 if constexpr (!can_async_check) { 108 if constexpr (!can_async_check) {
109 TryReleasePendingFences<true>(); 109 if (force) {
110 TryReleasePendingFences<true>();
111 } else {
112 TryReleasePendingFences<false>();
113 }
114 } else {
115 if (!force) {
116 return;
117 }
118 std::mutex wait_mutex;
119 std::condition_variable wait_cv;
120 std::atomic<bool> wait_finished{};
121 std::function<void()> func([&] {
122 std::scoped_lock lk(wait_mutex);
123 wait_finished.store(true, std::memory_order_relaxed);
124 wait_cv.notify_all();
125 });
126 SignalFence(std::move(func));
127 std::unique_lock lk(wait_mutex);
128 wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
110 } 129 }
111 } 130 }
112 131
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index c192e33b2..11549d448 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -102,7 +102,8 @@ struct GPU::Impl {
102 102
103 /// Signal the ending of command list. 103 /// Signal the ending of command list.
104 void OnCommandListEnd() { 104 void OnCommandListEnd() {
105 rasterizer->ReleaseFences(); 105 rasterizer->ReleaseFences(false);
106 Settings::UpdateGPUAccuracy();
106 } 107 }
107 108
108 /// Request a host GPU memory flush from the CPU. 109 /// Request a host GPU memory flush from the CPU.
@@ -220,6 +221,7 @@ struct GPU::Impl {
220 /// This can be used to launch any necessary threads and register any necessary 221 /// This can be used to launch any necessary threads and register any necessary
221 /// core timing events. 222 /// core timing events.
222 void Start() { 223 void Start() {
224 Settings::UpdateGPUAccuracy();
223 gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); 225 gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
224 } 226 }
225 227
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index c4d459077..fb24b6532 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -41,6 +41,7 @@ set(SHADER_FILES
41 pitch_unswizzle.comp 41 pitch_unswizzle.comp
42 present_bicubic.frag 42 present_bicubic.frag
43 present_gaussian.frag 43 present_gaussian.frag
44 resolve_conditional_render.comp
44 smaa_edge_detection.vert 45 smaa_edge_detection.vert
45 smaa_edge_detection.frag 46 smaa_edge_detection.frag
46 smaa_blending_weight_calculation.vert 47 smaa_blending_weight_calculation.vert
diff --git a/src/video_core/host_shaders/resolve_conditional_render.comp b/src/video_core/host_shaders/resolve_conditional_render.comp
new file mode 100644
index 000000000..307e77d1a
--- /dev/null
+++ b/src/video_core/host_shaders/resolve_conditional_render.comp
@@ -0,0 +1,20 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#version 450
5
6layout(local_size_x = 1) in;
7
8layout(std430, binding = 0) buffer Query {
9 uvec2 initial;
10 uvec2 unknown;
11 uvec2 current;
12};
13
14layout(std430, binding = 1) buffer Result {
15 uint result;
16};
17
18void main() {
19 result = all(equal(initial, current)) ? 1 : 0;
20}
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 6272a4652..e980af171 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -319,6 +319,25 @@ private:
319 } 319 }
320}; 320};
321 321
322class HLE_DrawIndirectByteCount final : public HLEMacroImpl {
323public:
324 explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
325
326 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
327 maxwell3d.RefreshParameters();
328
329 maxwell3d.regs.draw.begin = parameters[0];
330 maxwell3d.regs.draw_auto_stride = parameters[1];
331 maxwell3d.regs.draw_auto_byte_count = parameters[2];
332
333 if (maxwell3d.ShouldExecute()) {
334 maxwell3d.draw_manager->DrawArray(
335 maxwell3d.regs.draw.topology, 0,
336 maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
337 }
338 }
339};
340
322class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { 341class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
323public: 342public:
324 explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} 343 explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
@@ -536,6 +555,11 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
536 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 555 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
537 return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); 556 return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
538 })); 557 }));
558 builders.emplace(0xB5F74EDB717278ECULL,
559 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
560 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
561 return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__);
562 }));
539} 563}
540 564
541HLEMacro::~HLEMacro() = default; 565HLEMacro::~HLEMacro() = default;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 7047e2e63..9fcaeeac7 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -25,6 +25,13 @@
25#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h" 26#include "video_core/texture_cache/slot_vector.h"
27 27
28namespace VideoCore {
29enum class QueryType {
30 SamplesPassed,
31};
32constexpr std::size_t NumQueryTypes = 1;
33} // namespace VideoCore
34
28namespace VideoCommon { 35namespace VideoCommon {
29 36
30using AsyncJobId = SlotId; 37using AsyncJobId = SlotId;
@@ -98,10 +105,10 @@ private:
98}; 105};
99 106
100template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 107template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
101class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 108class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
102public: 109public:
103 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, 110 explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_,
104 Core::Memory::Memory& cpu_memory_) 111 Core::Memory::Memory& cpu_memory_)
105 : rasterizer{rasterizer_}, 112 : rasterizer{rasterizer_},
106 // Use reinterpret_cast instead of static_cast as workaround for 113 // Use reinterpret_cast instead of static_cast as workaround for
107 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) 114 // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index cb8029a4f..2ba7cbb0d 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
9#include <utility> 9#include <utility>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/polyfill_thread.h" 11#include "common/polyfill_thread.h"
12#include "video_core/query_cache/types.h"
12#include "video_core/cache_types.h" 13#include "video_core/cache_types.h"
13#include "video_core/engines/fermi_2d.h" 14#include "video_core/engines/fermi_2d.h"
14#include "video_core/gpu.h" 15#include "video_core/gpu.h"
@@ -26,11 +27,6 @@ struct ChannelState;
26 27
27namespace VideoCore { 28namespace VideoCore {
28 29
29enum class QueryType {
30 SamplesPassed,
31};
32constexpr std::size_t NumQueryTypes = 1;
33
34enum class LoadCallbackStage { 30enum class LoadCallbackStage {
35 Prepare, 31 Prepare,
36 Build, 32 Build,
@@ -58,10 +54,10 @@ public:
58 virtual void DispatchCompute() = 0; 54 virtual void DispatchCompute() = 0;
59 55
60 /// Resets the counter of a query 56 /// Resets the counter of a query
61 virtual void ResetCounter(QueryType type) = 0; 57 virtual void ResetCounter(VideoCommon::QueryType type) = 0;
62 58
63 /// Records a GPU query and caches it 59 /// Records a GPU query and caches it
64 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 60 virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
65 61
66 /// Signal an uniform buffer binding 62 /// Signal an uniform buffer binding
67 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 63 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
@@ -83,7 +79,7 @@ public:
83 virtual void SignalReference() = 0; 79 virtual void SignalReference() = 0;
84 80
85 /// Release all pending fences. 81 /// Release all pending fences.
86 virtual void ReleaseFences() = 0; 82 virtual void ReleaseFences(bool force = true) = 0;
87 83
88 /// Notify rasterizer that all caches should be flushed to Switch memory 84 /// Notify rasterizer that all caches should be flushed to Switch memory
89 virtual void FlushAll() = 0; 85 virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 92ecf6682..65cd5aa06 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -26,16 +26,18 @@ void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
26void RasterizerNull::DrawTexture() {} 26void RasterizerNull::DrawTexture() {}
27void RasterizerNull::Clear(u32 layer_count) {} 27void RasterizerNull::Clear(u32 layer_count) {}
28void RasterizerNull::DispatchCompute() {} 28void RasterizerNull::DispatchCompute() {}
29void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} 29void RasterizerNull::ResetCounter(VideoCommon::QueryType type) {}
30void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, 30void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
31 std::optional<u64> timestamp) { 31 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
32 if (!gpu_memory) { 32 if (!gpu_memory) {
33 return; 33 return;
34 } 34 }
35 35 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
36 gpu_memory->Write(gpu_addr, u64{0}); 36 u64 ticks = m_gpu.GetTicks();
37 if (timestamp) { 37 gpu_memory->Write<u64>(gpu_addr + 8, ticks);
38 gpu_memory->Write(gpu_addr + 8, *timestamp); 38 gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload));
39 } else {
40 gpu_memory->Write<u32>(gpu_addr, payload);
39 } 41 }
40} 42}
41void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 43void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
@@ -74,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) {
74 syncpoint_manager.IncrementHost(value); 76 syncpoint_manager.IncrementHost(value);
75} 77}
76void RasterizerNull::SignalReference() {} 78void RasterizerNull::SignalReference() {}
77void RasterizerNull::ReleaseFences() {} 79void RasterizerNull::ReleaseFences(bool) {}
78void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} 80void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
79void RasterizerNull::WaitForIdle() {} 81void RasterizerNull::WaitForIdle() {}
80void RasterizerNull::FragmentBarrier() {} 82void RasterizerNull::FragmentBarrier() {}
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 93b9a6971..57a8c4c85 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -42,8 +42,8 @@ public:
42 void DrawTexture() override; 42 void DrawTexture() override;
43 void Clear(u32 layer_count) override; 43 void Clear(u32 layer_count) override;
44 void DispatchCompute() override; 44 void DispatchCompute() override;
45 void ResetCounter(VideoCore::QueryType type) override; 45 void ResetCounter(VideoCommon::QueryType type) override;
46 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 46 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
47 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 47 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
48 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 48 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
49 void FlushAll() override; 49 void FlushAll() override;
@@ -63,7 +63,7 @@ public:
63 void SyncOperation(std::function<void()>&& func) override; 63 void SyncOperation(std::function<void()>&& func) override;
64 void SignalSyncPoint(u32 value) override; 64 void SignalSyncPoint(u32 value) override;
65 void SignalReference() override; 65 void SignalReference() override;
66 void ReleaseFences() override; 66 void ReleaseFences(bool force) override;
67 void FlushAndInvalidateRegion( 67 void FlushAndInvalidateRegion(
68 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 68 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
69 void WaitForIdle() override; 69 void WaitForIdle() override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 99d7347f5..ec142d48e 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -27,7 +27,7 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
27} // Anonymous namespace 27} // Anonymous namespace
28 28
29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) 29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
30 : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} 30 : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
31 31
32QueryCache::~QueryCache() = default; 32QueryCache::~QueryCache() = default;
33 33
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 872513f22..0721e0b3d 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,7 +26,7 @@ class RasterizerOpenGL;
26using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; 26using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
27 27
28class QueryCache final 28class QueryCache final
29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 29 : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 30public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); 31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
32 ~QueryCache(); 32 ~QueryCache();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index dd03efecd..a975bbe75 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -396,13 +396,31 @@ void RasterizerOpenGL::DispatchCompute() {
396 has_written_global_memory |= pipeline->WritesGlobalMemory(); 396 has_written_global_memory |= pipeline->WritesGlobalMemory();
397} 397}
398 398
399void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 399void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
400 query_cache.ResetCounter(type); 400 if (type == VideoCommon::QueryType::ZPassPixelCount64) {
401 query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
402 }
401} 403}
402 404
403void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, 405void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
404 std::optional<u64> timestamp) { 406 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
405 query_cache.Query(gpu_addr, type, timestamp); 407 if (type == VideoCommon::QueryType::ZPassPixelCount64) {
408 std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)
409 ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt };
410 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
411 query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
412 } else {
413 query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
414 }
415 return;
416 }
417 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
418 u64 ticks = gpu.GetTicks();
419 gpu_memory->Write<u64>(gpu_addr + 8, ticks);
420 gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload));
421 } else {
422 gpu_memory->Write<u32>(gpu_addr, payload);
423 }
406} 424}
407 425
408void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 426void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
@@ -573,8 +591,8 @@ void RasterizerOpenGL::SignalReference() {
573 fence_manager.SignalOrdering(); 591 fence_manager.SignalOrdering();
574} 592}
575 593
576void RasterizerOpenGL::ReleaseFences() { 594void RasterizerOpenGL::ReleaseFences(bool force) {
577 fence_manager.WaitPendingFences(); 595 fence_manager.WaitPendingFences(force);
578} 596}
579 597
580void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, 598void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 8eda2ddba..05e048e15 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -86,8 +86,8 @@ public:
86 void DrawTexture() override; 86 void DrawTexture() override;
87 void Clear(u32 layer_count) override; 87 void Clear(u32 layer_count) override;
88 void DispatchCompute() override; 88 void DispatchCompute() override;
89 void ResetCounter(VideoCore::QueryType type) override; 89 void ResetCounter(VideoCommon::QueryType type) override;
90 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 90 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
91 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 91 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
92 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 92 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
93 void FlushAll() override; 93 void FlushAll() override;
@@ -107,7 +107,7 @@ public:
107 void SyncOperation(std::function<void()>&& func) override; 107 void SyncOperation(std::function<void()>&& func) override;
108 void SignalSyncPoint(u32 value) override; 108 void SignalSyncPoint(u32 value) override;
109 void SignalReference() override; 109 void SignalReference() override;
110 void ReleaseFences() override; 110 void ReleaseFences(bool force = true) override;
111 void FlushAndInvalidateRegion( 111 void FlushAndInvalidateRegion(
112 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 112 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
113 void WaitForIdle() override; 113 void WaitForIdle() override;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index e15865d16..d8148e89a 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
61 if (device.IsExtTransformFeedbackSupported()) { 61 if (device.IsExtTransformFeedbackSupported()) {
62 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; 62 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
63 } 63 }
64 if (device.IsExtConditionalRendering()) {
65 flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT;
66 }
64 const VkBufferCreateInfo buffer_ci = { 67 const VkBufferCreateInfo buffer_ci = {
65 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 68 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
66 .pNext = nullptr, 69 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 54ee030ce..97cd4521d 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -12,6 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/div_ceil.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/astc_decoder_comp_spv.h" 14#include "video_core/host_shaders/astc_decoder_comp_spv.h"
15#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h"
15#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 16#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
16#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 17#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
17#include "video_core/renderer_vulkan/vk_compute_pass.h" 18#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -302,6 +303,52 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
302 return {staging.buffer, staging.offset}; 303 return {staging.buffer, staging.offset};
303} 304}
304 305
306ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_,
307 Scheduler& scheduler_,
308 DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
309 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
310 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
311 RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
312 scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
313
314void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer,
315 u32 src_offset, bool compare_to_zero) {
316 scheduler.RequestOutsideRenderPassOperationContext();
317
318 const size_t compare_size = compare_to_zero ? 8 : 24;
319
320 compute_pass_descriptor_queue.Acquire();
321 compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
322 compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
323 const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
324
325 scheduler.RequestOutsideRenderPassOperationContext();
326 scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
327 static constexpr VkMemoryBarrier read_barrier{
328 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
329 .pNext = nullptr,
330 .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
331 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
332 };
333 static constexpr VkMemoryBarrier write_barrier{
334 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
335 .pNext = nullptr,
336 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
337 .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT,
338 };
339 const VkDescriptorSet set = descriptor_allocator.Commit();
340 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
341
342 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
343 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier);
344 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
345 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
346 cmdbuf.Dispatch(1, 1, 1);
347 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
348 VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier);
349 });
350}
351
305ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, 352ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
306 DescriptorPool& descriptor_pool_, 353 DescriptorPool& descriptor_pool_,
307 StagingBufferPool& staging_buffer_pool_, 354 StagingBufferPool& staging_buffer_pool_,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index dd3927376..c62f30d30 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -82,6 +82,19 @@ private:
82 ComputePassDescriptorQueue& compute_pass_descriptor_queue; 82 ComputePassDescriptorQueue& compute_pass_descriptor_queue;
83}; 83};
84 84
85class ConditionalRenderingResolvePass final : public ComputePass {
86public:
87 explicit ConditionalRenderingResolvePass(
88 const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
89 ComputePassDescriptorQueue& compute_pass_descriptor_queue_);
90
91 void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero);
92
93private:
94 Scheduler& scheduler;
95 ComputePassDescriptorQueue& compute_pass_descriptor_queue;
96};
97
85class ASTCDecoderPass final : public ComputePass { 98class ASTCDecoderPass final : public ComputePass {
86public: 99public:
87 explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, 100 explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 145359d4e..14fc5ad71 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -8,6 +8,7 @@
8#include "video_core/fence_manager.h" 8#include "video_core/fence_manager.h"
9#include "video_core/renderer_vulkan/vk_buffer_cache.h" 9#include "video_core/renderer_vulkan/vk_buffer_cache.h"
10#include "video_core/renderer_vulkan/vk_texture_cache.h" 10#include "video_core/renderer_vulkan/vk_texture_cache.h"
11#include "video_core/renderer_vulkan/vk_query_cache.h"
11 12
12namespace Core { 13namespace Core {
13class System; 14class System;
@@ -20,7 +21,6 @@ class RasterizerInterface;
20namespace Vulkan { 21namespace Vulkan {
21 22
22class Device; 23class Device;
23class QueryCache;
24class Scheduler; 24class Scheduler;
25 25
26class InnerFence : public VideoCommon::FenceBase { 26class InnerFence : public VideoCommon::FenceBase {
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 29e0b797b..42f571007 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1,139 +1,1223 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include <algorithm>
5#include <cstddef> 4#include <cstddef>
5#include <limits>
6#include <map>
7#include <memory>
8#include <span>
9#include <type_traits>
10#include <unordered_map>
6#include <utility> 11#include <utility>
7#include <vector> 12#include <vector>
8 13
14#include <boost/container/small_vector.hpp>
15#include <boost/icl/interval_set.hpp>
16
17#include "common/common_types.h"
18#include "core/memory.h"
19#include "video_core/query_cache/query_cache.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h"
9#include "video_core/renderer_vulkan/vk_query_cache.h" 22#include "video_core/renderer_vulkan/vk_query_cache.h"
10#include "video_core/renderer_vulkan/vk_resource_pool.h" 23#include "video_core/renderer_vulkan/vk_resource_pool.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 24#include "video_core/renderer_vulkan/vk_scheduler.h"
25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
26#include "video_core/renderer_vulkan/vk_update_descriptor.h"
12#include "video_core/vulkan_common/vulkan_device.h" 27#include "video_core/vulkan_common/vulkan_device.h"
28#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h" 29#include "video_core/vulkan_common/vulkan_wrapper.h"
14 30
15namespace Vulkan { 31namespace Vulkan {
16 32
17using VideoCore::QueryType; 33using VideoCommon::QueryType;
18 34
19namespace { 35namespace {
36class SamplesQueryBank : public VideoCommon::BankBase {
37public:
38 static constexpr size_t BANK_SIZE = 256;
39 static constexpr size_t QUERY_SIZE = 8;
40 SamplesQueryBank(const Device& device_, size_t index_)
41 : BankBase(BANK_SIZE), device{device_}, index{index_} {
42 const auto& dev = device.GetLogical();
43 query_pool = dev.CreateQueryPool({
44 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
45 .pNext = nullptr,
46 .flags = 0,
47 .queryType = VK_QUERY_TYPE_OCCLUSION,
48 .queryCount = BANK_SIZE,
49 .pipelineStatistics = 0,
50 });
51 Reset();
52 }
20 53
21constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; 54 ~SamplesQueryBank() = default;
22 55
23constexpr VkQueryType GetTarget(QueryType type) { 56 void Reset() override {
24 return QUERY_TARGETS[static_cast<std::size_t>(type)]; 57 ASSERT(references == 0);
25} 58 VideoCommon::BankBase::Reset();
59 const auto& dev = device.GetLogical();
60 dev.ResetQueryPool(*query_pool, 0, BANK_SIZE);
61 host_results.fill(0ULL);
62 next_bank = 0;
63 }
64
65 void Sync(size_t start, size_t size) {
66 const auto& dev = device.GetLogical();
67 const VkResult query_result = dev.GetQueryResults(
68 *query_pool, static_cast<u32>(start), static_cast<u32>(size), sizeof(u64) * size,
69 &host_results[start], sizeof(u64), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
70 switch (query_result) {
71 case VK_SUCCESS:
72 return;
73 case VK_ERROR_DEVICE_LOST:
74 device.ReportLoss();
75 [[fallthrough]];
76 default:
77 throw vk::Exception(query_result);
78 }
79 }
80
81 VkQueryPool GetInnerPool() {
82 return *query_pool;
83 }
84
85 size_t GetIndex() const {
86 return index;
87 }
88
89 const std::array<u64, BANK_SIZE>& GetResults() const {
90 return host_results;
91 }
92
93 size_t next_bank;
94
95private:
96 const Device& device;
97 const size_t index;
98 vk::QueryPool query_pool;
99 std::array<u64, BANK_SIZE> host_results;
100};
101
102using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>;
103
104struct HostSyncValues {
105 VAddr address;
106 size_t size;
107 size_t offset;
108
109 static constexpr bool GeneratesBaseBuffer = false;
110};
111
112template <typename Traits>
113class SamplesStreamer : public BaseStreamer {
114public:
115 SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
116 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
117 : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
118 memory_allocator{memory_allocator_} {
119 BuildResolveBuffer();
120 current_bank = nullptr;
121 current_query = nullptr;
122 }
123
124 void StartCounter() override {
125 if (has_started) {
126 return;
127 }
128 ReserveHostQuery();
129 scheduler.Record([query_pool = current_query_pool,
130 query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
131 const bool use_precise = Settings::IsGPULevelHigh();
132 cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
133 use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
134 });
135 has_started = true;
136 }
137
138 void PauseCounter() override {
139 if (!has_started) {
140 return;
141 }
142 scheduler.Record([query_pool = current_query_pool,
143 query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
144 cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index));
145 });
146 has_started = false;
147 }
148
149 void ResetCounter() override {
150 if (has_started) {
151 PauseCounter();
152 }
153 AbandonCurrentQuery();
154 }
155
156 void CloseCounter() override {
157 PauseCounter();
158 }
159
160 bool HasPendingSync() override {
161 return !pending_sync.empty();
162 }
163
164 void SyncWrites() override {
165 if (sync_values_stash.empty()) {
166 return;
167 }
168
169 for (size_t i = 0; i < sync_values_stash.size(); i++) {
170 runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], *resolve_buffers[i]);
171 }
172
173 sync_values_stash.clear();
174 }
175
176 void PresyncWrites() override {
177 if (pending_sync.empty()) {
178 return;
179 }
180 PauseCounter();
181 sync_values_stash.clear();
182 sync_values_stash.emplace_back();
183 std::vector<HostSyncValues>* sync_values = &sync_values_stash.back();
184 sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
185 std::unordered_map<size_t, std::pair<size_t, size_t>> offsets;
186 size_t this_bank_slot = std::numeric_limits<size_t>::max();
187 size_t resolve_slots_remaining = resolve_slots;
188 size_t resolve_buffer_index = 0;
189 ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start,
190 size_t amount) {
191 size_t bank_id = bank->GetIndex();
192 if (this_bank_slot != bank_id) {
193 this_bank_slot = bank_id;
194 if (resolve_slots_remaining == 0) {
195 resolve_buffer_index++;
196 if (resolve_buffer_index >= resolve_buffers.size()) {
197 BuildResolveBuffer();
198 }
199 resolve_slots_remaining = resolve_slots;
200 sync_values_stash.emplace_back();
201 sync_values = sync_values = &sync_values_stash.back();
202 sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
203 }
204 resolve_slots_remaining--;
205 }
206 auto& resolve_buffer = resolve_buffers[resolve_buffer_index];
207 const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
208 (resolve_slots - resolve_slots_remaining - 1);
209 VkQueryPool query_pool = bank->GetInnerPool();
210 scheduler.Record([start, amount, base_offset, query_pool,
211 buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
212 size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
213 const VkBufferMemoryBarrier copy_query_pool_barrier{
214 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
215 .pNext = nullptr,
216 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
217 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
218 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
219 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
220 .buffer = buffer,
221 .offset = final_offset,
222 .size = amount * SamplesQueryBank::QUERY_SIZE,
223 };
224
225 cmdbuf.CopyQueryPoolResults(
226 query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer,
227 static_cast<u32>(final_offset), SamplesQueryBank::QUERY_SIZE,
228 VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT);
229 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
230 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier);
231 });
232 offsets[bank_id] = {sync_values_stash.size() - 1, base_offset};
233 });
234
235 // Convert queries
236 for (auto q : pending_sync) {
237 auto* query = GetQuery(q);
238 if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) {
239 continue;
240 }
241 if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) {
242 continue;
243 }
244 if (query->size_slots > 1) {
245 // This is problematic.
246 UNIMPLEMENTED();
247 }
248 query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
249 auto loc_data = offsets[query->start_bank_id];
250 sync_values_stash[loc_data.first].emplace_back(HostSyncValues{
251 .address = query->guest_address,
252 .size = SamplesQueryBank::QUERY_SIZE,
253 .offset = loc_data.second + query->start_slot * SamplesQueryBank::QUERY_SIZE,
254 });
255 }
256
257 AbandonCurrentQuery();
258 pending_sync.clear();
259 }
260
261 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
262 [[maybe_unused]] std::optional<u32> subreport) override {
263 auto index = BuildQuery();
264 auto* new_query = GetQuery(index);
265 new_query->guest_address = address;
266 new_query->value = 100;
267 new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
268 if (has_timestamp) {
269 new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
270 }
271 if (!current_query) {
272 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
273 return index;
274 }
275 new_query->start_bank_id = current_query->start_bank_id;
276 new_query->size_banks = current_query->size_banks;
277 new_query->start_slot = current_query->start_slot;
278 new_query->size_slots = current_query->size_slots;
279 ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
280 bank->AddReference(amount);
281 });
282 pending_sync.push_back(index);
283 pending_flush_queries.push_back(index);
284 return index;
285 }
286
287 bool HasUnsyncedQueries() override {
288 return !pending_flush_queries.empty();
289 }
290
291 void PushUnsyncedQueries() override {
292 PauseCounter();
293 {
294 std::scoped_lock lk(flush_guard);
295 pending_flush_sets.emplace_back(std::move(pending_flush_queries));
296 }
297 }
298
299 void PopUnsyncedQueries() override {
300 std::vector<size_t> current_flush_queries;
301 {
302 std::scoped_lock lk(flush_guard);
303 current_flush_queries = std::move(pending_flush_sets.front());
304 pending_flush_sets.pop_front();
305 }
306 ApplyBanksWideOp<false>(
307 current_flush_queries,
308 [](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); });
309 for (auto q : current_flush_queries) {
310 auto* query = GetQuery(q);
311 u64 total = 0;
312 ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) {
313 const auto& results = bank->GetResults();
314 for (size_t i = 0; i < amount; i++) {
315 total += results[start + i];
316 }
317 });
318 query->value = total;
319 query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
320 }
321 }
322
323private:
324 template <typename Func>
325 void ApplyBankOp(VideoCommon::HostQueryBase* query, Func&& func) {
326 size_t size_slots = query->size_slots;
327 if (size_slots == 0) {
328 return;
329 }
330 size_t bank_id = query->start_bank_id;
331 size_t banks_set = query->size_banks;
332 size_t start_slot = query->start_slot;
333 for (size_t i = 0; i < banks_set; i++) {
334 auto& the_bank = bank_pool.GetBank(bank_id);
335 size_t amount = std::min(the_bank.Size() - start_slot, size_slots);
336 func(&the_bank, start_slot, amount);
337 bank_id = the_bank.next_bank - 1;
338 start_slot = 0;
339 size_slots -= amount;
340 }
341 }
342
343 template <bool is_ordered, typename Func>
344 void ApplyBanksWideOp(std::vector<size_t>& queries, Func&& func) {
345 std::conditional_t<is_ordered, std::map<size_t, std::pair<size_t, size_t>>,
346 std::unordered_map<size_t, std::pair<size_t, size_t>>>
347 indexer;
348 for (auto q : queries) {
349 auto* query = GetQuery(q);
350 ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
351 auto id = bank->GetIndex();
352 auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(),
353 std::numeric_limits<size_t>::min());
354 auto& current_pair = pair.first->second;
355 current_pair.first = std::min(current_pair.first, start);
356 current_pair.second = std::max(current_pair.second, amount + start);
357 });
358 }
359 for (auto& cont : indexer) {
360 func(&bank_pool.GetBank(cont.first), cont.second.first,
361 cont.second.second - cont.second.first);
362 }
363 }
364
365 void ReserveBank() {
366 current_bank_id =
367 bank_pool.ReserveBank([this](std::deque<SamplesQueryBank>& queue, size_t index) {
368 queue.emplace_back(device, index);
369 });
370 if (current_bank) {
371 current_bank->next_bank = current_bank_id + 1;
372 }
373 current_bank = &bank_pool.GetBank(current_bank_id);
374 current_query_pool = current_bank->GetInnerPool();
375 }
376
377 size_t ReserveBankSlot() {
378 if (!current_bank || current_bank->IsClosed()) {
379 ReserveBank();
380 }
381 auto [built, index] = current_bank->Reserve();
382 current_bank_slot = index;
383 return index;
384 }
385
386 void ReserveHostQuery() {
387 size_t new_slot = ReserveBankSlot();
388 current_bank->AddReference(1);
389 if (current_query) {
390 size_t bank_id = current_query->start_bank_id;
391 size_t banks_set = current_query->size_banks - 1;
392 bool found = bank_id == current_bank_id;
393 while (!found && banks_set > 0) {
394 SamplesQueryBank& some_bank = bank_pool.GetBank(bank_id);
395 bank_id = some_bank.next_bank - 1;
396 found = bank_id == current_bank_id;
397 banks_set--;
398 }
399 if (!found) {
400 current_query->size_banks++;
401 }
402 current_query->size_slots++;
403 } else {
404 current_query_id = BuildQuery();
405 current_query = GetQuery(current_query_id);
406 current_query->start_bank_id = static_cast<u32>(current_bank_id);
407 current_query->size_banks = 1;
408 current_query->start_slot = new_slot;
409 current_query->size_slots = 1;
410 }
411 }
412
413 void Free(size_t query_id) override {
414 std::scoped_lock lk(guard);
415 auto* query = GetQuery(query_id);
416 ApplyBankOp(query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
417 bank->CloseReference(amount);
418 });
419 ReleaseQuery(query_id);
420 }
421
422 void AbandonCurrentQuery() {
423 if (!current_query) {
424 return;
425 }
426 Free(current_query_id);
427 current_query = nullptr;
428 current_query_id = 0;
429 }
430
431 void BuildResolveBuffer() {
432 const VkBufferCreateInfo buffer_ci = {
433 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
434 .pNext = nullptr,
435 .flags = 0,
436 .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
437 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
438 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
439 .queueFamilyIndexCount = 0,
440 .pQueueFamilyIndices = nullptr,
441 };
442 resolve_buffers.emplace_back(
443 std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)));
444 }
445
446 static constexpr size_t resolve_slots = 8;
447
448 QueryCacheRuntime& runtime;
449 const Device& device;
450 Scheduler& scheduler;
451 const MemoryAllocator& memory_allocator;
452 VideoCommon::BankPool<SamplesQueryBank> bank_pool;
453 std::deque<vk::Buffer> resolve_buffers;
454 std::deque<std::vector<HostSyncValues>> sync_values_stash;
455
456 // syncing queue
457 std::vector<size_t> pending_sync;
458
459 // flush levels
460 std::vector<size_t> pending_flush_queries;
461 std::deque<std::vector<size_t>> pending_flush_sets;
462
463 // State Machine
464 size_t current_bank_slot;
465 size_t current_bank_id;
466 SamplesQueryBank* current_bank;
467 VkQueryPool current_query_pool;
468 size_t current_query_id;
469 VideoCommon::HostQueryBase* current_query;
470 bool has_started{};
471 std::mutex flush_guard;
472};
473
474// Transform feedback queries
475class TFBQueryBank : public VideoCommon::BankBase {
476public:
477 static constexpr size_t BANK_SIZE = 1024;
478 static constexpr size_t QUERY_SIZE = 4;
479 TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_)
480 : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
481 const VkBufferCreateInfo buffer_ci = {
482 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
483 .pNext = nullptr,
484 .flags = 0,
485 .size = QUERY_SIZE * BANK_SIZE,
486 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
487 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
488 .queueFamilyIndexCount = 0,
489 .pQueueFamilyIndices = nullptr,
490 };
491 buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
492 }
493
494 ~TFBQueryBank() = default;
495
496 void Reset() override {
497 ASSERT(references == 0);
498 VideoCommon::BankBase::Reset();
499 }
500
501 void Sync(StagingBufferRef& stagging_buffer, size_t extra_offset, size_t start, size_t size) {
502 scheduler.RequestOutsideRenderPassOperationContext();
503 scheduler.Record([this, dst_buffer = stagging_buffer.buffer, extra_offset, start,
504 size](vk::CommandBuffer cmdbuf) {
505 std::array<VkBufferCopy, 1> copy{VkBufferCopy{
506 .srcOffset = start * QUERY_SIZE,
507 .dstOffset = extra_offset,
508 .size = size * QUERY_SIZE,
509 }};
510 cmdbuf.CopyBuffer(*buffer, dst_buffer, copy);
511 });
512 }
513
514 size_t GetIndex() const {
515 return index;
516 }
517
518 VkBuffer GetBuffer() const {
519 return *buffer;
520 }
521
522private:
523 Scheduler& scheduler;
524 const size_t index;
525 vk::Buffer buffer;
526};
527
528template <typename Traits>
529class TFBCounterStreamer : public BaseStreamer {
530public:
531 TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
532 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
533 StagingBufferPool& staging_pool_)
534 : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_},
535 memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
536 buffers_count = 0;
537 current_bank = nullptr;
538 counter_buffers.fill(VK_NULL_HANDLE);
539 offsets.fill(0);
540 const VkBufferCreateInfo buffer_ci = {
541 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
542 .pNext = nullptr,
543 .flags = 0,
544 .size = TFBQueryBank::QUERY_SIZE * NUM_STREAMS,
545 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
546 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT,
547 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
548 .queueFamilyIndexCount = 0,
549 .pQueueFamilyIndices = nullptr,
550 };
551
552 counters_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
553 for (auto& c : counter_buffers) {
554 c = *counters_buffer;
555 }
556 size_t base_offset = 0;
557 for (auto& o : offsets) {
558 o = base_offset;
559 base_offset += TFBQueryBank::QUERY_SIZE;
560 }
561 }
562
563 void StartCounter() override {
564 FlushBeginTFB();
565 has_started = true;
566 }
567
568 void PauseCounter() override {
569 CloseCounter();
570 }
571
572 void ResetCounter() override {
573 CloseCounter();
574 }
575
576 void CloseCounter() override {
577 if (has_flushed_end_pending) {
578 FlushEndTFB();
579 }
580 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
581 if (regs.transform_feedback_enabled == 0) {
582 streams_mask = 0;
583 has_started = false;
584 }
585 });
586 }
587
588 bool HasPendingSync() override {
589 return !pending_sync.empty();
590 }
591
592 void SyncWrites() override {
593 CloseCounter();
594 std::unordered_map<size_t, std::vector<HostSyncValues>> sync_values_stash;
595 for (auto q : pending_sync) {
596 auto* query = GetQuery(q);
597 if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) {
598 continue;
599 }
600 if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) {
601 continue;
602 }
603 query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
604 sync_values_stash.try_emplace(query->start_bank_id);
605 sync_values_stash[query->start_bank_id].emplace_back(HostSyncValues{
606 .address = query->guest_address,
607 .size = TFBQueryBank::QUERY_SIZE,
608 .offset = query->start_slot * TFBQueryBank::QUERY_SIZE,
609 });
610 }
611 for (auto& p : sync_values_stash) {
612 auto& bank = bank_pool.GetBank(p.first);
613 runtime.template SyncValues<HostSyncValues>(p.second, bank.GetBuffer());
614 }
615 pending_sync.clear();
616 }
617
618 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
619 std::optional<u32> subreport_) override {
620 auto index = BuildQuery();
621 auto* new_query = GetQuery(index);
622 new_query->guest_address = address;
623 new_query->value = 0;
624 new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
625 if (has_timestamp) {
626 new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
627 }
628 if (!subreport_) {
629 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
630 return index;
631 }
632 const size_t subreport = static_cast<size_t>(*subreport_);
633 UpdateBuffers();
634 if ((streams_mask & (1ULL << subreport)) == 0) {
635 new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
636 return index;
637 }
638 CloseCounter();
639 auto [bank_slot, data_slot] = ProduceCounterBuffer(subreport);
640 new_query->start_bank_id = static_cast<u32>(bank_slot);
641 new_query->size_banks = 1;
642 new_query->start_slot = static_cast<u32>(data_slot);
643 new_query->size_slots = 1;
644 pending_sync.push_back(index);
645 pending_flush_queries.push_back(index);
646 return index;
647 }
648
649 bool HasUnsyncedQueries() override {
650 return !pending_flush_queries.empty();
651 }
652
653 void PushUnsyncedQueries() override {
654 CloseCounter();
655 auto staging_ref = staging_pool.Request(
656 pending_flush_queries.size() * TFBQueryBank::QUERY_SIZE, MemoryUsage::Download, true);
657 size_t offset_base = staging_ref.offset;
658 for (auto q : pending_flush_queries) {
659 auto* query = GetQuery(q);
660 auto& bank = bank_pool.GetBank(query->start_bank_id);
661 bank.Sync(staging_ref, offset_base, query->start_slot, 1);
662 offset_base += TFBQueryBank::QUERY_SIZE;
663 bank.CloseReference();
664 }
665 static constexpr VkMemoryBarrier WRITE_BARRIER{
666 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
667 .pNext = nullptr,
668 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
669 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
670 };
671 scheduler.RequestOutsideRenderPassOperationContext();
672 scheduler.Record([](vk::CommandBuffer cmdbuf) {
673 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
674 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
675 });
676
677 std::scoped_lock lk(flush_guard);
678 for (auto& str : free_queue) {
679 staging_pool.FreeDeferred(str);
680 }
681 free_queue.clear();
682 download_buffers.emplace_back(staging_ref);
683 pending_flush_sets.emplace_back(std::move(pending_flush_queries));
684 }
685
686 void PopUnsyncedQueries() override {
687 StagingBufferRef staging_ref;
688 std::vector<size_t> flushed_queries;
689 {
690 std::scoped_lock lk(flush_guard);
691 staging_ref = download_buffers.front();
692 flushed_queries = std::move(pending_flush_sets.front());
693 download_buffers.pop_front();
694 pending_flush_sets.pop_front();
695 }
696
697 size_t offset_base = staging_ref.offset;
698 for (auto q : flushed_queries) {
699 auto* query = GetQuery(q);
700 u32 result = 0;
701 std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32));
702 query->value = static_cast<u64>(result);
703 query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
704 offset_base += TFBQueryBank::QUERY_SIZE;
705 }
706
707 {
708 std::scoped_lock lk(flush_guard);
709 free_queue.emplace_back(staging_ref);
710 }
711 }
712
713private:
714 void FlushBeginTFB() {
715 if (has_flushed_end_pending) [[unlikely]] {
716 return;
717 }
718 has_flushed_end_pending = true;
719 if (!has_started || buffers_count == 0) {
720 scheduler.Record([](vk::CommandBuffer cmdbuf) {
721 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
722 });
723 UpdateBuffers();
724 return;
725 }
726 scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
727 cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
728 });
729 UpdateBuffers();
730 }
731
732 void FlushEndTFB() {
733 if (!has_flushed_end_pending) [[unlikely]] {
734 UNREACHABLE();
735 return;
736 }
737 has_flushed_end_pending = false;
738
739 if (buffers_count == 0) {
740 scheduler.Record([](vk::CommandBuffer cmdbuf) {
741 cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
742 });
743 } else {
744 scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
745 cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
746 });
747 }
748 }
749
750 void UpdateBuffers() {
751 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
752 buffers_count = 0;
753 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
754 i++) {
755 const auto& tf = regs.transform_feedback;
756 if (tf.buffers[i].enable == 0) {
757 continue;
758 }
759 const size_t stream = tf.controls[i].stream;
760 streams_mask |= 1ULL << stream;
761 buffers_count = std::max<size_t>(buffers_count, stream + 1);
762 }
763 });
764 }
765
766 std::pair<size_t, size_t> ProduceCounterBuffer(size_t stream) {
767 if (current_bank == nullptr || current_bank->IsClosed()) {
768 current_bank_id =
769 bank_pool.ReserveBank([this](std::deque<TFBQueryBank>& queue, size_t index) {
770 queue.emplace_back(scheduler, memory_allocator, index);
771 });
772 current_bank = &bank_pool.GetBank(current_bank_id);
773 }
774 auto [dont_care, slot] = current_bank->Reserve();
775 current_bank->AddReference();
776
777 static constexpr VkMemoryBarrier READ_BARRIER{
778 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
779 .pNext = nullptr,
780 .srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT,
781 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
782 };
783 static constexpr VkMemoryBarrier WRITE_BARRIER{
784 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
785 .pNext = nullptr,
786 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
787 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
788 };
789 scheduler.RequestOutsideRenderPassOperationContext();
790 scheduler.Record([dst_buffer = current_bank->GetBuffer(),
791 src_buffer = counter_buffers[stream], src_offset = offsets[stream],
792 slot](vk::CommandBuffer cmdbuf) {
793 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
794 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
795 std::array<VkBufferCopy, 1> copy{VkBufferCopy{
796 .srcOffset = src_offset,
797 .dstOffset = slot * TFBQueryBank::QUERY_SIZE,
798 .size = TFBQueryBank::QUERY_SIZE,
799 }};
800 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
801 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
802 0, WRITE_BARRIER);
803 });
804 return {current_bank_id, slot};
805 }
806
807 static constexpr size_t NUM_STREAMS = 4;
808 static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
809
810 QueryCacheRuntime& runtime;
811 const Device& device;
812 Scheduler& scheduler;
813 const MemoryAllocator& memory_allocator;
814 StagingBufferPool& staging_pool;
815 VideoCommon::BankPool<TFBQueryBank> bank_pool;
816 size_t current_bank_id;
817 TFBQueryBank* current_bank;
818 vk::Buffer counters_buffer;
819
820 // syncing queue
821 std::vector<size_t> pending_sync;
822
823 // flush levels
824 std::vector<size_t> pending_flush_queries;
825 std::deque<StagingBufferRef> download_buffers;
826 std::deque<std::vector<size_t>> pending_flush_sets;
827 std::vector<StagingBufferRef> free_queue;
828 std::mutex flush_guard;
829
830 // state machine
831 bool has_started{};
832 bool has_flushed_end_pending{};
833 size_t buffers_count{};
834 std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
835 std::array<VkDeviceSize, NUM_STREAMS> offsets{};
836 u64 streams_mask;
837};
838
839} // namespace
840
841struct QueryCacheRuntimeImpl {
842 QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_,
843 Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_,
844 const Device& device_, const MemoryAllocator& memory_allocator_,
845 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
846 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
847 DescriptorPool& descriptor_pool)
848 : rasterizer{rasterizer_}, cpu_memory{cpu_memory_},
849 buffer_cache{buffer_cache_}, device{device_},
850 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
851 guest_streamer(0, runtime),
852 sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device,
853 scheduler, memory_allocator),
854 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
855 scheduler, memory_allocator, staging_pool),
856 hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
857
858 hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
859 hcr_setup.pNext = nullptr;
860 hcr_setup.flags = 0;
861
862 conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>(
863 device, scheduler, descriptor_pool, compute_pass_descriptor_queue);
864
865 const VkBufferCreateInfo buffer_ci = {
866 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
867 .pNext = nullptr,
868 .flags = 0,
869 .size = sizeof(u32),
870 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
871 VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT,
872 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
873 .queueFamilyIndexCount = 0,
874 .pQueueFamilyIndices = nullptr,
875 };
876 hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
877 }
26 878
27} // Anonymous namespace 879 VideoCore::RasterizerInterface* rasterizer;
880 Core::Memory::Memory& cpu_memory;
881 Vulkan::BufferCache& buffer_cache;
28 882
29QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_) 883 const Device& device;
30 : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} 884 const MemoryAllocator& memory_allocator;
885 Scheduler& scheduler;
886 StagingBufferPool& staging_pool;
31 887
32QueryPool::~QueryPool() = default; 888 // Streamers
889 VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
890 SamplesStreamer<QueryCacheParams> sample_streamer;
891 TFBCounterStreamer<QueryCacheParams> tfb_streamer;
33 892
34std::pair<VkQueryPool, u32> QueryPool::Commit() { 893 std::vector<std::pair<VAddr, VAddr>> little_cache;
35 std::size_t index; 894 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
36 do { 895 std::vector<size_t> redirect_cache;
37 index = CommitResource(); 896 std::vector<std::vector<VkBufferCopy>> copies_setup;
38 } while (usage[index]);
39 usage[index] = true;
40 897
41 return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; 898 // Host conditional rendering data
899 std::unique_ptr<ConditionalRenderingResolvePass> conditional_resolve_pass;
900 vk::Buffer hcr_resolve_buffer;
901 VkConditionalRenderingBeginInfoEXT hcr_setup;
902 VkBuffer hcr_buffer;
903 size_t hcr_offset;
904 bool hcr_is_set;
905 bool is_hcr_running;
906
907 // maxwell3d
908 Tegra::Engines::Maxwell3D* maxwell3d;
909};
910
911QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
912 Core::Memory::Memory& cpu_memory_,
913 Vulkan::BufferCache& buffer_cache_, const Device& device_,
914 const MemoryAllocator& memory_allocator_,
915 Scheduler& scheduler_, StagingBufferPool& staging_pool_,
916 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
917 DescriptorPool& descriptor_pool) {
918 impl = std::make_unique<QueryCacheRuntimeImpl>(
919 *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_,
920 staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
921}
922
923void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) {
924 impl->maxwell3d = maxwell3d;
42} 925}
43 926
44void QueryPool::Allocate(std::size_t begin, std::size_t end) { 927template <typename Func>
45 usage.resize(end); 928void QueryCacheRuntime::View3DRegs(Func&& func) {
929 func(impl->maxwell3d->regs);
930}
46 931
47 pools.push_back(device.GetLogical().CreateQueryPool({ 932void QueryCacheRuntime::EndHostConditionalRendering() {
48 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 933 PauseHostConditionalRendering();
49 .pNext = nullptr, 934 impl->hcr_is_set = false;
50 .flags = 0, 935 impl->is_hcr_running = false;
51 .queryType = GetTarget(type), 936 impl->hcr_buffer = nullptr;
52 .queryCount = static_cast<u32>(end - begin), 937 impl->hcr_offset = 0;
53 .pipelineStatistics = 0,
54 }));
55} 938}
56 939
57void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { 940void QueryCacheRuntime::PauseHostConditionalRendering() {
58 const auto it = 941 if (!impl->hcr_is_set) {
59 std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { 942 return;
60 return query_pool == *pool; 943 }
944 if (impl->is_hcr_running) {
945 impl->scheduler.Record(
946 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndConditionalRenderingEXT(); });
947 }
948 impl->is_hcr_running = false;
949}
950
951void QueryCacheRuntime::ResumeHostConditionalRendering() {
952 if (!impl->hcr_is_set) {
953 return;
954 }
955 if (!impl->is_hcr_running) {
956 impl->scheduler.Record([hcr_setup = impl->hcr_setup](vk::CommandBuffer cmdbuf) {
957 cmdbuf.BeginConditionalRenderingEXT(hcr_setup);
61 }); 958 });
959 }
960 impl->is_hcr_running = true;
961}
62 962
63 if (it != std::end(pools)) { 963void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object,
64 const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); 964 bool is_equal) {
65 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; 965 {
966 std::scoped_lock lk(impl->buffer_cache.mutex);
967 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
968 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
969 const auto [buffer, offset] =
970 impl->buffer_cache.ObtainCPUBuffer(object.address, 8, sync_info, post_op);
971 impl->hcr_buffer = buffer->Handle();
972 impl->hcr_offset = offset;
973 }
974 if (impl->hcr_is_set) {
975 if (impl->hcr_setup.buffer == impl->hcr_buffer &&
976 impl->hcr_setup.offset == impl->hcr_offset) {
977 ResumeHostConditionalRendering();
978 return;
979 }
980 PauseHostConditionalRendering();
66 } 981 }
982 impl->hcr_setup.buffer = impl->hcr_buffer;
983 impl->hcr_setup.offset = impl->hcr_offset;
984 impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0;
985 impl->hcr_is_set = true;
986 impl->is_hcr_running = false;
987 ResumeHostConditionalRendering();
67} 988}
68 989
69QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, 990void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) {
70 Core::Memory::Memory& cpu_memory_, const Device& device_, 991 VkBuffer to_resolve;
71 Scheduler& scheduler_) 992 u32 to_resolve_offset;
72 : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, 993 {
73 query_pools{ 994 std::scoped_lock lk(impl->buffer_cache.mutex);
74 QueryPool{device_, scheduler_, QueryType::SamplesPassed}, 995 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::NoSynchronize;
75 } {} 996 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
76 997 const auto [buffer, offset] =
77QueryCache::~QueryCache() { 998 impl->buffer_cache.ObtainCPUBuffer(address, 24, sync_info, post_op);
78 // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class 999 to_resolve = buffer->Handle();
79 // destructor is called. The query cache should be redesigned to have a proper ownership model 1000 to_resolve_offset = static_cast<u32>(offset);
80 // instead of using shared pointers. 1001 }
81 for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) { 1002 if (impl->is_hcr_running) {
82 auto& stream = Stream(static_cast<QueryType>(query_type)); 1003 PauseHostConditionalRendering();
83 stream.Update(false);
84 stream.Reset();
85 } 1004 }
1005 impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve,
1006 to_resolve_offset, false);
1007 impl->hcr_setup.buffer = *impl->hcr_resolve_buffer;
1008 impl->hcr_setup.offset = 0;
1009 impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1010 impl->hcr_is_set = true;
1011 impl->is_hcr_running = false;
1012 ResumeHostConditionalRendering();
86} 1013}
87 1014
88std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) { 1015bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,
89 return query_pools[static_cast<std::size_t>(type)].Commit(); 1016 [[maybe_unused]] bool qc_dirty) {
1017 if (!impl->device.IsExtConditionalRendering()) {
1018 return false;
1019 }
1020 HostConditionalRenderingCompareValueImpl(object_1, false);
1021 return true;
90} 1022}
91 1023
92void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { 1024bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
93 query_pools[static_cast<std::size_t>(type)].Reserve(query); 1025 VideoCommon::LookupData object_2,
1026 bool qc_dirty, bool equal_check) {
1027 if (!impl->device.IsExtConditionalRendering()) {
1028 return false;
1029 }
1030
1031 const auto check_in_bc = [&](VAddr address) {
1032 return impl->buffer_cache.IsRegionGpuModified(address, 8);
1033 };
1034 const auto check_value = [&](VAddr address) {
1035 u8* ptr = impl->cpu_memory.GetPointer(address);
1036 u64 value{};
1037 std::memcpy(&value, ptr, sizeof(value));
1038 return value == 0;
1039 };
1040 std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2};
1041 std::array<bool, 2> is_in_bc{};
1042 std::array<bool, 2> is_in_qc{};
1043 std::array<bool, 2> is_in_ac{};
1044 std::array<bool, 2> is_null{};
1045 {
1046 std::scoped_lock lk(impl->buffer_cache.mutex);
1047 for (size_t i = 0; i < 2; i++) {
1048 is_in_qc[i] = objects[i]->found_query != nullptr;
1049 is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address);
1050 is_in_ac[i] = is_in_qc[i] || is_in_bc[i];
1051 }
1052 }
1053
1054 if (!is_in_ac[0] && !is_in_ac[1]) {
1055 EndHostConditionalRendering();
1056 return false;
1057 }
1058
1059 if (!qc_dirty && !is_in_bc[0] && !is_in_bc[1]) {
1060 EndHostConditionalRendering();
1061 return false;
1062 }
1063
1064 for (size_t i = 0; i < 2; i++) {
1065 is_null[i] = !is_in_ac[i] && check_value(objects[i]->address);
1066 }
1067
1068 for (size_t i = 0; i < 2; i++) {
1069 if (is_null[i]) {
1070 size_t j = (i + 1) % 2;
1071 HostConditionalRenderingCompareValueImpl(*objects[j], equal_check);
1072 return true;
1073 }
1074 }
1075 HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
1076 return true;
94} 1077}
95 1078
96HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, 1079QueryCacheRuntime::~QueryCacheRuntime() = default;
97 QueryType type_) 1080
98 : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, 1081VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryType query_type) {
99 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { 1082 switch (query_type) {
100 const vk::Device* logical = &cache.GetDevice().GetLogical(); 1083 case QueryType::Payload:
101 cache.GetScheduler().Record([logical, query_ = query](vk::CommandBuffer cmdbuf) { 1084 return &impl->guest_streamer;
102 const bool use_precise = Settings::IsGPULevelHigh(); 1085 case QueryType::ZPassPixelCount64:
103 logical->ResetQueryPool(query_.first, query_.second, 1); 1086 return &impl->sample_streamer;
104 cmdbuf.BeginQuery(query_.first, query_.second, 1087 case QueryType::StreamingByteCount:
105 use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); 1088 return &impl->tfb_streamer;
106 }); 1089 default:
1090 return nullptr;
1091 }
107} 1092}
108 1093
109HostCounter::~HostCounter() { 1094void QueryCacheRuntime::Barriers(bool is_prebarrier) {
110 cache.Reserve(type, query); 1095 static constexpr VkMemoryBarrier READ_BARRIER{
1096 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
1097 .pNext = nullptr,
1098 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
1099 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
1100 };
1101 static constexpr VkMemoryBarrier WRITE_BARRIER{
1102 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
1103 .pNext = nullptr,
1104 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
1105 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
1106 };
1107 if (is_prebarrier) {
1108 impl->scheduler.Record([](vk::CommandBuffer cmdbuf) {
1109 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1110 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER);
1111 });
1112 } else {
1113 impl->scheduler.Record([](vk::CommandBuffer cmdbuf) {
1114 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
1115 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER);
1116 });
1117 }
111} 1118}
112 1119
113void HostCounter::EndQuery() { 1120template <typename SyncValuesType>
114 cache.GetScheduler().Record([query_ = query](vk::CommandBuffer cmdbuf) { 1121void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer) {
115 cmdbuf.EndQuery(query_.first, query_.second); 1122 if (values.size() == 0) {
1123 return;
1124 }
1125 impl->redirect_cache.clear();
1126 impl->little_cache.clear();
1127 size_t total_size = 0;
1128 for (auto& sync_val : values) {
1129 total_size += sync_val.size;
1130 bool found = false;
1131 VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE);
1132 VAddr base_end = base + Core::Memory::YUZU_PAGESIZE;
1133 for (size_t i = 0; i < impl->little_cache.size(); i++) {
1134 const auto set_found = [&] {
1135 impl->redirect_cache.push_back(i);
1136 found = true;
1137 };
1138 auto& loc = impl->little_cache[i];
1139 if (base < loc.second && loc.first < base_end) {
1140 set_found();
1141 break;
1142 }
1143 if (loc.first == base_end) {
1144 loc.first = base;
1145 set_found();
1146 break;
1147 }
1148 if (loc.second == base) {
1149 loc.second = base_end;
1150 set_found();
1151 break;
1152 }
1153 }
1154 if (!found) {
1155 impl->redirect_cache.push_back(impl->little_cache.size());
1156 impl->little_cache.emplace_back(base, base_end);
1157 }
1158 }
1159
1160 // Vulkan part.
1161 std::scoped_lock lk(impl->buffer_cache.mutex);
1162 impl->buffer_cache.BufferOperations([&] {
1163 impl->buffers_to_upload_to.clear();
1164 for (auto& pair : impl->little_cache) {
1165 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
1166 const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
1167 const auto [buffer, offset] = impl->buffer_cache.ObtainCPUBuffer(
1168 pair.first, static_cast<u32>(pair.second - pair.first), sync_info, post_op);
1169 impl->buffers_to_upload_to.emplace_back(buffer->Handle(), offset);
1170 }
116 }); 1171 });
117}
118 1172
119u64 HostCounter::BlockingQuery(bool async) const { 1173 VkBuffer src_buffer;
120 if (!async) { 1174 [[maybe_unused]] StagingBufferRef ref;
121 cache.GetScheduler().Wait(tick); 1175 impl->copies_setup.clear();
122 } 1176 impl->copies_setup.resize(impl->little_cache.size());
123 u64 data; 1177 if constexpr (SyncValuesType::GeneratesBaseBuffer) {
124 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 1178 ref = impl->staging_pool.Request(total_size, MemoryUsage::Upload);
125 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 1179 size_t current_offset = ref.offset;
126 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); 1180 size_t accumulated_size = 0;
127 1181 for (size_t i = 0; i < values.size(); i++) {
128 switch (query_result) { 1182 size_t which_copy = impl->redirect_cache[i];
129 case VK_SUCCESS: 1183 impl->copies_setup[which_copy].emplace_back(VkBufferCopy{
130 return data; 1184 .srcOffset = current_offset + accumulated_size,
131 case VK_ERROR_DEVICE_LOST: 1185 .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address -
132 cache.GetDevice().ReportLoss(); 1186 impl->little_cache[which_copy].first,
133 [[fallthrough]]; 1187 .size = values[i].size,
134 default: 1188 });
135 throw vk::Exception(query_result); 1189 std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value,
1190 values[i].size);
1191 accumulated_size += values[i].size;
1192 }
1193 src_buffer = ref.buffer;
1194 } else {
1195 for (size_t i = 0; i < values.size(); i++) {
1196 size_t which_copy = impl->redirect_cache[i];
1197 impl->copies_setup[which_copy].emplace_back(VkBufferCopy{
1198 .srcOffset = values[i].offset,
1199 .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address -
1200 impl->little_cache[which_copy].first,
1201 .size = values[i].size,
1202 });
1203 }
1204 src_buffer = base_src_buffer;
136 } 1205 }
1206
1207 impl->scheduler.RequestOutsideRenderPassOperationContext();
1208 impl->scheduler.Record([src_buffer, dst_buffers = std::move(impl->buffers_to_upload_to),
1209 vk_copies = std::move(impl->copies_setup)](vk::CommandBuffer cmdbuf) {
1210 size_t size = dst_buffers.size();
1211 for (size_t i = 0; i < size; i++) {
1212 cmdbuf.CopyBuffer(src_buffer, dst_buffers[i].first, vk_copies[i]);
1213 }
1214 });
137} 1215}
138 1216
139} // namespace Vulkan 1217} // namespace Vulkan
1218
1219namespace VideoCommon {
1220
1221template class QueryCacheBase<Vulkan::QueryCacheParams>;
1222
1223} // namespace VideoCommon
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index c1b9552eb..9ad2929d7 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -1,101 +1,74 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <cstddef>
7#include <memory> 6#include <memory>
8#include <utility>
9#include <vector>
10 7
11#include "common/common_types.h" 8#include "video_core/query_cache/query_cache_base.h"
12#include "video_core/query_cache.h" 9#include "video_core/renderer_vulkan/vk_buffer_cache.h"
13#include "video_core/renderer_vulkan/vk_resource_pool.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 10
16namespace VideoCore { 11namespace VideoCore {
17class RasterizerInterface; 12class RasterizerInterface;
18} 13}
19 14
15namespace VideoCommon {
16class StreamerInterface;
17}
18
20namespace Vulkan { 19namespace Vulkan {
21 20
22class CachedQuery;
23class Device; 21class Device;
24class HostCounter;
25class QueryCache;
26class Scheduler; 22class Scheduler;
23class StagingBufferPool;
27 24
28using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; 25struct QueryCacheRuntimeImpl;
29 26
30class QueryPool final : public ResourcePool { 27class QueryCacheRuntime {
31public: 28public:
32 explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); 29 explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
33 ~QueryPool() override; 30 Core::Memory::Memory& cpu_memory_,
31 Vulkan::BufferCache& buffer_cache_, const Device& device_,
32 const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
33 StagingBufferPool& staging_pool_,
34 ComputePassDescriptorQueue& compute_pass_descriptor_queue,
35 DescriptorPool& descriptor_pool);
36 ~QueryCacheRuntime();
34 37
35 std::pair<VkQueryPool, u32> Commit(); 38 template <typename SyncValuesType>
39 void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr);
36 40
37 void Reserve(std::pair<VkQueryPool, u32> query); 41 void Barriers(bool is_prebarrier);
38 42
39protected: 43 void EndHostConditionalRendering();
40 void Allocate(std::size_t begin, std::size_t end) override;
41 44
42private: 45 void PauseHostConditionalRendering();
43 static constexpr std::size_t GROW_STEP = 512;
44 46
45 const Device& device; 47 void ResumeHostConditionalRendering();
46 const VideoCore::QueryType type;
47 48
48 std::vector<vk::QueryPool> pools; 49 bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
49 std::vector<bool> usage;
50};
51 50
52class QueryCache final 51 bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 52 VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check);
54public:
55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, const Device& device_,
57 Scheduler& scheduler_);
58 ~QueryCache();
59
60 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
61 53
62 void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); 54 VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
63 55
64 const Device& GetDevice() const noexcept { 56 void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d);
65 return device;
66 }
67 57
68 Scheduler& GetScheduler() const noexcept { 58 template <typename Func>
69 return scheduler; 59 void View3DRegs(Func&& func);
70 }
71 60
72private: 61private:
73 const Device& device; 62 void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal);
74 Scheduler& scheduler; 63 void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal);
75 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; 64 friend struct QueryCacheRuntimeImpl;
65 std::unique_ptr<QueryCacheRuntimeImpl> impl;
76}; 66};
77 67
78class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { 68struct QueryCacheParams {
79public: 69 using RuntimeType = Vulkan::QueryCacheRuntime;
80 explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
81 VideoCore::QueryType type_);
82 ~HostCounter();
83
84 void EndQuery();
85
86private:
87 u64 BlockingQuery(bool async = false) const override;
88
89 QueryCache& cache;
90 const VideoCore::QueryType type;
91 const std::pair<VkQueryPool, u32> query;
92 const u64 tick;
93}; 70};
94 71
95class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { 72using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
96public:
97 explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_)
98 : CachedQueryBase{cpu_addr_, host_ptr_} {}
99};
100 73
101} // namespace Vulkan 74} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 01e76a82c..e8862ba04 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,6 +24,7 @@
24#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 24#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
25#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 25#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
26#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
27#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_rasterizer.h" 28#include "video_core/renderer_vulkan/vk_rasterizer.h"
28#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
29#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 30#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
170 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 171 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
171 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), 172 guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
172 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 173 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
174 query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler,
175 staging_pool, compute_pass_descriptor_queue, descriptor_pool),
176 query_cache(gpu, *this, cpu_memory_, query_cache_runtime),
173 pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, 177 pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue,
174 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), 178 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
175 query_cache{*this, cpu_memory_, device, scheduler},
176 accelerate_dma(buffer_cache, texture_cache, scheduler), 179 accelerate_dma(buffer_cache, texture_cache, scheduler),
177 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 180 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
178 wfi_event(device.GetLogical().CreateEvent()) { 181 wfi_event(device.GetLogical().CreateEvent()) {
@@ -189,13 +192,15 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
189 FlushWork(); 192 FlushWork();
190 gpu_memory->FlushCaching(); 193 gpu_memory->FlushCaching();
191 194
195 query_cache.NotifySegment(true);
196
192#if ANDROID 197#if ANDROID
193 if (Settings::IsGPULevelHigh()) { 198 if (Settings::IsGPULevelHigh()) {
194 // This is problematic on Android, disable on GPU Normal. 199 // This is problematic on Android, disable on GPU Normal.
195 query_cache.UpdateCounters(); 200 // query_cache.UpdateCounters();
196 } 201 }
197#else 202#else
198 query_cache.UpdateCounters(); 203 // query_cache.UpdateCounters();
199#endif 204#endif
200 205
201 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; 206 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
@@ -207,13 +212,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
207 pipeline->SetEngine(maxwell3d, gpu_memory); 212 pipeline->SetEngine(maxwell3d, gpu_memory);
208 pipeline->Configure(is_indexed); 213 pipeline->Configure(is_indexed);
209 214
210 BeginTransformFeedback();
211
212 UpdateDynamicStates(); 215 UpdateDynamicStates();
213 216
217 HandleTransformFeedback();
218 query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
219 maxwell3d->regs.zpass_pixel_count_enable);
214 draw_func(); 220 draw_func();
215
216 EndTransformFeedback();
217} 221}
218 222
219void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { 223void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@@ -241,6 +245,14 @@ void RasterizerVulkan::DrawIndirect() {
241 const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); 245 const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
242 const auto& buffer = indirect_buffer.first; 246 const auto& buffer = indirect_buffer.first;
243 const auto& offset = indirect_buffer.second; 247 const auto& offset = indirect_buffer.second;
248 if (params.is_byte_count) {
249 scheduler.Record([buffer_obj = buffer->Handle(), offset,
250 stride = params.stride](vk::CommandBuffer cmdbuf) {
251 cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0,
252 static_cast<u32>(stride));
253 });
254 return;
255 }
244 if (params.include_count) { 256 if (params.include_count) {
245 const auto count = buffer_cache.GetDrawIndirectCount(); 257 const auto count = buffer_cache.GetDrawIndirectCount();
246 const auto& draw_buffer = count.first; 258 const auto& draw_buffer = count.first;
@@ -280,13 +292,15 @@ void RasterizerVulkan::DrawTexture() {
280 SCOPE_EXIT({ gpu.TickWork(); }); 292 SCOPE_EXIT({ gpu.TickWork(); });
281 FlushWork(); 293 FlushWork();
282 294
295 query_cache.NotifySegment(true);
296
283#if ANDROID 297#if ANDROID
284 if (Settings::IsGPULevelHigh()) { 298 if (Settings::IsGPULevelHigh()) {
285 // This is problematic on Android, disable on GPU Normal. 299 // This is problematic on Android, disable on GPU Normal.
286 query_cache.UpdateCounters(); 300 // query_cache.UpdateCounters();
287 } 301 }
288#else 302#else
289 query_cache.UpdateCounters(); 303 // query_cache.UpdateCounters();
290#endif 304#endif
291 305
292 texture_cache.SynchronizeGraphicsDescriptors(); 306 texture_cache.SynchronizeGraphicsDescriptors();
@@ -294,6 +308,8 @@ void RasterizerVulkan::DrawTexture() {
294 308
295 UpdateDynamicStates(); 309 UpdateDynamicStates();
296 310
311 query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
312 maxwell3d->regs.zpass_pixel_count_enable);
297 const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); 313 const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
298 const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); 314 const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
299 const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); 315 const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
@@ -319,12 +335,16 @@ void RasterizerVulkan::Clear(u32 layer_count) {
319#if ANDROID 335#if ANDROID
320 if (Settings::IsGPULevelHigh()) { 336 if (Settings::IsGPULevelHigh()) {
321 // This is problematic on Android, disable on GPU Normal. 337 // This is problematic on Android, disable on GPU Normal.
322 query_cache.UpdateCounters(); 338 // query_cache.UpdateCounters();
323 } 339 }
324#else 340#else
325 query_cache.UpdateCounters(); 341 // query_cache.UpdateCounters();
326#endif 342#endif
327 343
344 query_cache.NotifySegment(true);
345 query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
346 maxwell3d->regs.zpass_pixel_count_enable);
347
328 auto& regs = maxwell3d->regs; 348 auto& regs = maxwell3d->regs;
329 const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || 349 const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
330 regs.clear_surface.A; 350 regs.clear_surface.A;
@@ -482,13 +502,13 @@ void RasterizerVulkan::DispatchCompute() {
482 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); 502 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
483} 503}
484 504
485void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 505void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
486 query_cache.ResetCounter(type); 506 query_cache.CounterReset(type);
487} 507}
488 508
489void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, 509void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
490 std::optional<u64> timestamp) { 510 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
491 query_cache.Query(gpu_addr, type, timestamp); 511 query_cache.CounterReport(gpu_addr, type, flags, payload, subreport);
492} 512}
493 513
494void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 514void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
@@ -669,8 +689,8 @@ void RasterizerVulkan::SignalReference() {
669 fence_manager.SignalReference(); 689 fence_manager.SignalReference();
670} 690}
671 691
672void RasterizerVulkan::ReleaseFences() { 692void RasterizerVulkan::ReleaseFences(bool force) {
673 fence_manager.WaitPendingFences(); 693 fence_manager.WaitPendingFences(force);
674} 694}
675 695
676void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, 696void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size,
@@ -694,6 +714,8 @@ void RasterizerVulkan::WaitForIdle() {
694 flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; 714 flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
695 } 715 }
696 716
717 query_cache.NotifyWFI();
718
697 scheduler.RequestOutsideRenderPassOperationContext(); 719 scheduler.RequestOutsideRenderPassOperationContext();
698 scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { 720 scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) {
699 cmdbuf.SetEvent(event, flags); 721 cmdbuf.SetEvent(event, flags);
@@ -737,19 +759,7 @@ void RasterizerVulkan::TickFrame() {
737 759
738bool RasterizerVulkan::AccelerateConditionalRendering() { 760bool RasterizerVulkan::AccelerateConditionalRendering() {
739 gpu_memory->FlushCaching(); 761 gpu_memory->FlushCaching();
740 if (Settings::IsGPULevelHigh()) { 762 return query_cache.AccelerateHostConditionalRendering();
741 // TODO(Blinkhawk): Reimplement Host conditional rendering.
742 return false;
743 }
744 // Medium / Low Hack: stub any checks on queries written into the buffer cache.
745 const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
746 Maxwell::ReportSemaphore::Compare cmp;
747 if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
748 VideoCommon::CacheType::BufferCache |
749 VideoCommon::CacheType::QueryCache)) {
750 return true;
751 }
752 return false;
753} 763}
754 764
755bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, 765bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
@@ -795,6 +805,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
795 if (!image_view) { 805 if (!image_view) {
796 return false; 806 return false;
797 } 807 }
808 query_cache.NotifySegment(false);
798 screen_info.image = image_view->ImageHandle(); 809 screen_info.image = image_view->ImageHandle();
799 screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); 810 screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
800 screen_info.width = image_view->size.width; 811 screen_info.width = image_view->size.width;
@@ -933,31 +944,18 @@ void RasterizerVulkan::UpdateDynamicStates() {
933 } 944 }
934} 945}
935 946
936void RasterizerVulkan::BeginTransformFeedback() { 947void RasterizerVulkan::HandleTransformFeedback() {
937 const auto& regs = maxwell3d->regs; 948 const auto& regs = maxwell3d->regs;
938 if (regs.transform_feedback_enabled == 0) {
939 return;
940 }
941 if (!device.IsExtTransformFeedbackSupported()) { 949 if (!device.IsExtTransformFeedbackSupported()) {
942 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); 950 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
943 return; 951 return;
944 } 952 }
945 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || 953 query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
946 regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); 954 regs.transform_feedback_enabled);
947 scheduler.Record( 955 if (regs.transform_feedback_enabled != 0) {
948 [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 956 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
949} 957 regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
950
951void RasterizerVulkan::EndTransformFeedback() {
952 const auto& regs = maxwell3d->regs;
953 if (regs.transform_feedback_enabled == 0) {
954 return;
955 }
956 if (!device.IsExtTransformFeedbackSupported()) {
957 return;
958 } 958 }
959 scheduler.Record(
960 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
961} 959}
962 960
963void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 961void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b31982485..ffd44c68d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -84,8 +84,8 @@ public:
84 void DrawTexture() override; 84 void DrawTexture() override;
85 void Clear(u32 layer_count) override; 85 void Clear(u32 layer_count) override;
86 void DispatchCompute() override; 86 void DispatchCompute() override;
87 void ResetCounter(VideoCore::QueryType type) override; 87 void ResetCounter(VideoCommon::QueryType type) override;
88 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 88 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
89 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 89 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
90 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 90 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
91 void FlushAll() override; 91 void FlushAll() override;
@@ -106,7 +106,7 @@ public:
106 void SyncOperation(std::function<void()>&& func) override; 106 void SyncOperation(std::function<void()>&& func) override;
107 void SignalSyncPoint(u32 value) override; 107 void SignalSyncPoint(u32 value) override;
108 void SignalReference() override; 108 void SignalReference() override;
109 void ReleaseFences() override; 109 void ReleaseFences(bool force = true) override;
110 void FlushAndInvalidateRegion( 110 void FlushAndInvalidateRegion(
111 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 111 VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
112 void WaitForIdle() override; 112 void WaitForIdle() override;
@@ -146,9 +146,7 @@ private:
146 146
147 void UpdateDynamicStates(); 147 void UpdateDynamicStates();
148 148
149 void BeginTransformFeedback(); 149 void HandleTransformFeedback();
150
151 void EndTransformFeedback();
152 150
153 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 151 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
154 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 152 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -195,8 +193,9 @@ private:
195 TextureCache texture_cache; 193 TextureCache texture_cache;
196 BufferCacheRuntime buffer_cache_runtime; 194 BufferCacheRuntime buffer_cache_runtime;
197 BufferCache buffer_cache; 195 BufferCache buffer_cache;
198 PipelineCache pipeline_cache; 196 QueryCacheRuntime query_cache_runtime;
199 QueryCache query_cache; 197 QueryCache query_cache;
198 PipelineCache pipeline_cache;
200 AccelerateDMA accelerate_dma; 199 AccelerateDMA accelerate_dma;
201 FenceManager fence_manager; 200 FenceManager fence_manager;
202 201
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 89fd31b4f..3be7837f4 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() {
243#if ANDROID 243#if ANDROID
244 if (Settings::IsGPULevelHigh()) { 244 if (Settings::IsGPULevelHigh()) {
245 // This is problematic on Android, disable on GPU Normal. 245 // This is problematic on Android, disable on GPU Normal.
246 query_cache->UpdateCounters(); 246 query_cache->NotifySegment(true);
247 } 247 }
248#else 248#else
249 query_cache->UpdateCounters(); 249 query_cache->NotifySegment(true);
250#endif 250#endif
251 } 251 }
252} 252}
@@ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() {
261#if ANDROID 261#if ANDROID
262 if (Settings::IsGPULevelHigh()) { 262 if (Settings::IsGPULevelHigh()) {
263 // This is problematic on Android, disable on GPU Normal. 263 // This is problematic on Android, disable on GPU Normal.
264 query_cache->DisableStreams(); 264 // query_cache->DisableStreams();
265 } 265 }
266#else 266#else
267 query_cache->DisableStreams(); 267 // query_cache->DisableStreams();
268#endif 268#endif
269 query_cache->NotifySegment(false);
269 EndRenderPass(); 270 EndRenderPass();
270} 271}
271 272
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 475c682eb..c87e5fb07 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/polyfill_thread.h" 16#include "common/polyfill_thread.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h" 17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
18#include "video_core/renderer_vulkan/vk_query_cache.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -24,7 +25,6 @@ class Device;
24class Framebuffer; 25class Framebuffer;
25class GraphicsPipeline; 26class GraphicsPipeline;
26class StateTracker; 27class StateTracker;
27class QueryCache;
28 28
29/// The scheduler abstracts command buffer and fence management with an interface that's able to do 29/// The scheduler abstracts command buffer and fence management with an interface that's able to do
30/// OpenGL-like operations on Vulkan command buffers. 30/// OpenGL-like operations on Vulkan command buffers.
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 6c7fa34e5..16f0425be 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -61,6 +61,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
61 61
62// Define miscellaneous extensions which may be used by the implementation here. 62// Define miscellaneous extensions which may be used by the implementation here.
63#define FOR_EACH_VK_EXTENSION(EXTENSION) \ 63#define FOR_EACH_VK_EXTENSION(EXTENSION) \
64 EXTENSION(EXT, CONDITIONAL_RENDERING, conditional_rendering) \
64 EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ 65 EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \
65 EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ 66 EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \
66 EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ 67 EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \
@@ -93,6 +94,7 @@ VK_DEFINE_HANDLE(VmaAllocator)
93 94
94// Define extensions where the absence of the extension may result in a degraded experience. 95// Define extensions where the absence of the extension may result in a degraded experience.
95#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ 96#define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \
97 EXTENSION_NAME(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME) \
96 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ 98 EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \
97 EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ 99 EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \
98 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ 100 EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \
@@ -536,6 +538,10 @@ public:
536 return extensions.shader_atomic_int64; 538 return extensions.shader_atomic_int64;
537 } 539 }
538 540
541 bool IsExtConditionalRendering() const {
542 return extensions.conditional_rendering;
543 }
544
539 bool HasTimelineSemaphore() const; 545 bool HasTimelineSemaphore() const;
540 546
541 /// Returns the minimum supported version of SPIR-V. 547 /// Returns the minimum supported version of SPIR-V.
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index c3f388d89..5a08a92e1 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -75,6 +75,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
75 X(vkBeginCommandBuffer); 75 X(vkBeginCommandBuffer);
76 X(vkBindBufferMemory); 76 X(vkBindBufferMemory);
77 X(vkBindImageMemory); 77 X(vkBindImageMemory);
78 X(vkCmdBeginConditionalRenderingEXT);
78 X(vkCmdBeginQuery); 79 X(vkCmdBeginQuery);
79 X(vkCmdBeginRenderPass); 80 X(vkCmdBeginRenderPass);
80 X(vkCmdBeginTransformFeedbackEXT); 81 X(vkCmdBeginTransformFeedbackEXT);
@@ -91,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
91 X(vkCmdCopyBufferToImage); 92 X(vkCmdCopyBufferToImage);
92 X(vkCmdCopyImage); 93 X(vkCmdCopyImage);
93 X(vkCmdCopyImageToBuffer); 94 X(vkCmdCopyImageToBuffer);
95 X(vkCmdCopyQueryPoolResults);
94 X(vkCmdDispatch); 96 X(vkCmdDispatch);
95 X(vkCmdDispatchIndirect); 97 X(vkCmdDispatchIndirect);
96 X(vkCmdDraw); 98 X(vkCmdDraw);
@@ -99,6 +101,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
99 X(vkCmdDrawIndexedIndirect); 101 X(vkCmdDrawIndexedIndirect);
100 X(vkCmdDrawIndirectCount); 102 X(vkCmdDrawIndirectCount);
101 X(vkCmdDrawIndexedIndirectCount); 103 X(vkCmdDrawIndexedIndirectCount);
104 X(vkCmdEndConditionalRenderingEXT);
102 X(vkCmdEndQuery); 105 X(vkCmdEndQuery);
103 X(vkCmdEndRenderPass); 106 X(vkCmdEndRenderPass);
104 X(vkCmdEndTransformFeedbackEXT); 107 X(vkCmdEndTransformFeedbackEXT);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 049fa8038..27d94a7d5 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -185,6 +185,7 @@ struct DeviceDispatch : InstanceDispatch {
185 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; 185 PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
186 PFN_vkBindBufferMemory vkBindBufferMemory{}; 186 PFN_vkBindBufferMemory vkBindBufferMemory{};
187 PFN_vkBindImageMemory vkBindImageMemory{}; 187 PFN_vkBindImageMemory vkBindImageMemory{};
188 PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT{};
188 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; 189 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
189 PFN_vkCmdBeginQuery vkCmdBeginQuery{}; 190 PFN_vkCmdBeginQuery vkCmdBeginQuery{};
190 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; 191 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
@@ -202,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch {
202 PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; 203 PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{};
203 PFN_vkCmdCopyImage vkCmdCopyImage{}; 204 PFN_vkCmdCopyImage vkCmdCopyImage{};
204 PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; 205 PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{};
206 PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults{};
205 PFN_vkCmdDispatch vkCmdDispatch{}; 207 PFN_vkCmdDispatch vkCmdDispatch{};
206 PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; 208 PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{};
207 PFN_vkCmdDraw vkCmdDraw{}; 209 PFN_vkCmdDraw vkCmdDraw{};
@@ -210,6 +212,7 @@ struct DeviceDispatch : InstanceDispatch {
210 PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; 212 PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{};
211 PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; 213 PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{};
212 PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; 214 PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{};
215 PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
213 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; 216 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
214 PFN_vkCmdEndQuery vkCmdEndQuery{}; 217 PFN_vkCmdEndQuery vkCmdEndQuery{};
215 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; 218 PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
@@ -1270,6 +1273,13 @@ public:
1270 regions.data()); 1273 regions.data());
1271 } 1274 }
1272 1275
1276 void CopyQueryPoolResults(VkQueryPool query_pool, u32 first_query, u32 query_count,
1277 VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize stride,
1278 VkQueryResultFlags flags) const noexcept {
1279 dld->vkCmdCopyQueryPoolResults(handle, query_pool, first_query, query_count, dst_buffer,
1280 dst_offset, stride, flags);
1281 }
1282
1273 void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, 1283 void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size,
1274 u32 data) const noexcept { 1284 u32 data) const noexcept {
1275 dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); 1285 dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data);
@@ -1448,6 +1458,15 @@ public:
1448 counter_buffers, counter_buffer_offsets); 1458 counter_buffers, counter_buffer_offsets);
1449 } 1459 }
1450 1460
1461 void BeginConditionalRenderingEXT(
1462 const VkConditionalRenderingBeginInfoEXT& info) const noexcept {
1463 dld->vkCmdBeginConditionalRenderingEXT(handle, &info);
1464 }
1465
1466 void EndConditionalRenderingEXT() const noexcept {
1467 dld->vkCmdEndConditionalRenderingEXT(handle);
1468 }
1469
1451 void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { 1470 void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept {
1452 const VkDebugUtilsLabelEXT label_info{ 1471 const VkDebugUtilsLabelEXT label_info{
1453 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, 1472 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,