diff options
Diffstat (limited to 'src')
35 files changed, 1555 insertions, 337 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 4ecaf550b..3fde3cae6 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -130,13 +130,17 @@ void LogSettings() { | |||
| 130 | log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); | 130 | log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | void UpdateGPUAccuracy() { | ||
| 134 | values.current_gpu_accuracy = values.gpu_accuracy.GetValue(); | ||
| 135 | } | ||
| 136 | |||
| 133 | bool IsGPULevelExtreme() { | 137 | bool IsGPULevelExtreme() { |
| 134 | return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme; | 138 | return values.current_gpu_accuracy == GpuAccuracy::Extreme; |
| 135 | } | 139 | } |
| 136 | 140 | ||
| 137 | bool IsGPULevelHigh() { | 141 | bool IsGPULevelHigh() { |
| 138 | return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme || | 142 | return values.current_gpu_accuracy == GpuAccuracy::Extreme || |
| 139 | values.gpu_accuracy.GetValue() == GpuAccuracy::High; | 143 | values.current_gpu_accuracy == GpuAccuracy::High; |
| 140 | } | 144 | } |
| 141 | 145 | ||
| 142 | bool IsFastmemEnabled() { | 146 | bool IsFastmemEnabled() { |
diff --git a/src/common/settings.h b/src/common/settings.h index 82ec9077e..ae5e5d2b8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -307,6 +307,7 @@ struct Values { | |||
| 307 | Specialization::Default, | 307 | Specialization::Default, |
| 308 | true, | 308 | true, |
| 309 | true}; | 309 | true}; |
| 310 | GpuAccuracy current_gpu_accuracy{GpuAccuracy::High}; | ||
| 310 | SwitchableSetting<AnisotropyMode, true> max_anisotropy{ | 311 | SwitchableSetting<AnisotropyMode, true> max_anisotropy{ |
| 311 | linkage, AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, | 312 | linkage, AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, |
| 312 | "max_anisotropy", Category::RendererAdvanced}; | 313 | "max_anisotropy", Category::RendererAdvanced}; |
| @@ -522,6 +523,7 @@ struct Values { | |||
| 522 | 523 | ||
| 523 | extern Values values; | 524 | extern Values values; |
| 524 | 525 | ||
| 526 | void UpdateGPUAccuracy(); | ||
| 525 | bool IsGPULevelExtreme(); | 527 | bool IsGPULevelExtreme(); |
| 526 | bool IsGPULevelHigh(); | 528 | bool IsGPULevelHigh(); |
| 527 | 529 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 8be7bd594..f91b7d1e4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -272,13 +272,20 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 272 | if (!cpu_addr) { | 272 | if (!cpu_addr) { |
| 273 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | 273 | return {&slot_buffers[NULL_BUFFER_ID], 0}; |
| 274 | } | 274 | } |
| 275 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 275 | return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); |
| 276 | } | ||
| 277 | |||
| 278 | template <class P> | ||
| 279 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, | ||
| 280 | ObtainBufferSynchronize sync_info, | ||
| 281 | ObtainBufferOperation post_op) { | ||
| 282 | const BufferId buffer_id = FindBuffer(cpu_addr, size); | ||
| 276 | Buffer& buffer = slot_buffers[buffer_id]; | 283 | Buffer& buffer = slot_buffers[buffer_id]; |
| 277 | 284 | ||
| 278 | // synchronize op | 285 | // synchronize op |
| 279 | switch (sync_info) { | 286 | switch (sync_info) { |
| 280 | case ObtainBufferSynchronize::FullSynchronize: | 287 | case ObtainBufferSynchronize::FullSynchronize: |
| 281 | SynchronizeBuffer(buffer, *cpu_addr, size); | 288 | SynchronizeBuffer(buffer, cpu_addr, size); |
| 282 | break; | 289 | break; |
| 283 | default: | 290 | default: |
| 284 | break; | 291 | break; |
| @@ -286,11 +293,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 286 | 293 | ||
| 287 | switch (post_op) { | 294 | switch (post_op) { |
| 288 | case ObtainBufferOperation::MarkAsWritten: | 295 | case ObtainBufferOperation::MarkAsWritten: |
| 289 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | 296 | MarkWrittenBuffer(buffer_id, cpu_addr, size); |
| 290 | break; | 297 | break; |
| 291 | case ObtainBufferOperation::DiscardWrite: { | 298 | case ObtainBufferOperation::DiscardWrite: { |
| 292 | VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); | 299 | VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); |
| 293 | VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); | 300 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); |
| 294 | IntervalType interval{cpu_addr_start, cpu_addr_end}; | 301 | IntervalType interval{cpu_addr_start, cpu_addr_end}; |
| 295 | ClearDownload(interval); | 302 | ClearDownload(interval); |
| 296 | common_ranges.subtract(interval); | 303 | common_ranges.subtract(interval); |
| @@ -300,7 +307,7 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 300 | break; | 307 | break; |
| 301 | } | 308 | } |
| 302 | 309 | ||
| 303 | return {&buffer, buffer.Offset(*cpu_addr)}; | 310 | return {&buffer, buffer.Offset(cpu_addr)}; |
| 304 | } | 311 | } |
| 305 | 312 | ||
| 306 | template <class P> | 313 | template <class P> |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 0b7135d49..9507071e5 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -295,6 +295,10 @@ public: | |||
| 295 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | 295 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, |
| 296 | ObtainBufferSynchronize sync_info, | 296 | ObtainBufferSynchronize sync_info, |
| 297 | ObtainBufferOperation post_op); | 297 | ObtainBufferOperation post_op); |
| 298 | |||
| 299 | [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | ||
| 300 | ObtainBufferSynchronize sync_info, | ||
| 301 | ObtainBufferOperation post_op); | ||
| 298 | void FlushCachedWrites(); | 302 | void FlushCachedWrites(); |
| 299 | 303 | ||
| 300 | /// Return true when there are uncommitted buffers to be downloaded | 304 | /// Return true when there are uncommitted buffers to be downloaded |
| @@ -335,6 +339,14 @@ public: | |||
| 335 | 339 | ||
| 336 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | 340 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); |
| 337 | 341 | ||
| 342 | template <typename Func> | ||
| 343 | void BufferOperations(Func&& func) { | ||
| 344 | do { | ||
| 345 | channel_state->has_deleted_buffers = false; | ||
| 346 | func(); | ||
| 347 | } while (channel_state->has_deleted_buffers); | ||
| 348 | } | ||
| 349 | |||
| 338 | std::recursive_mutex mutex; | 350 | std::recursive_mutex mutex; |
| 339 | Runtime& runtime; | 351 | Runtime& runtime; |
| 340 | 352 | ||
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 46bc9e322..5574e1fba 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h | |||
| @@ -51,7 +51,7 @@ public: | |||
| 51 | virtual void CreateChannel(Tegra::Control::ChannelState& channel); | 51 | virtual void CreateChannel(Tegra::Control::ChannelState& channel); |
| 52 | 52 | ||
| 53 | /// Bind a channel for execution. | 53 | /// Bind a channel for execution. |
| 54 | void BindToChannel(s32 id); | 54 | virtual void BindToChannel(s32 id); |
| 55 | 55 | ||
| 56 | /// Erase channel's state. | 56 | /// Erase channel's state. |
| 57 | void EraseChannel(s32 id); | 57 | void EraseChannel(s32 id); |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 06e349e43..922c399e6 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -20,8 +20,6 @@ | |||
| 20 | 20 | ||
| 21 | namespace Tegra::Engines { | 21 | namespace Tegra::Engines { |
| 22 | 22 | ||
| 23 | using VideoCore::QueryType; | ||
| 24 | |||
| 25 | /// First register id that is actually a Macro call. | 23 | /// First register id that is actually a Macro call. |
| 26 | constexpr u32 MacroRegistersStart = 0xE00; | 24 | constexpr u32 MacroRegistersStart = 0xE00; |
| 27 | 25 | ||
| @@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 500 | } | 498 | } |
| 501 | 499 | ||
| 502 | void Maxwell3D::ProcessQueryGet() { | 500 | void Maxwell3D::ProcessQueryGet() { |
| 501 | VideoCommon::QueryPropertiesFlags flags{}; | ||
| 502 | if (regs.report_semaphore.query.short_query == 0) { | ||
| 503 | flags |= VideoCommon::QueryPropertiesFlags::HasTimeout; | ||
| 504 | } | ||
| 505 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||
| 506 | const VideoCommon::QueryType query_type = | ||
| 507 | static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value()); | ||
| 508 | const u32 payload = regs.report_semaphore.payload; | ||
| 509 | const u32 subreport = regs.report_semaphore.query.sub_report; | ||
| 503 | switch (regs.report_semaphore.query.operation) { | 510 | switch (regs.report_semaphore.query.operation) { |
| 504 | case Regs::ReportSemaphore::Operation::Release: | 511 | case Regs::ReportSemaphore::Operation::Release: |
| 505 | if (regs.report_semaphore.query.short_query != 0) { | 512 | if (regs.report_semaphore.query.short_query != 0) { |
| 506 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | 513 | flags |= VideoCommon::QueryPropertiesFlags::IsAFence; |
| 507 | const u32 payload = regs.report_semaphore.payload; | ||
| 508 | std::function<void()> operation([this, sequence_address, payload] { | ||
| 509 | memory_manager.Write<u32>(sequence_address, payload); | ||
| 510 | }); | ||
| 511 | rasterizer->SignalFence(std::move(operation)); | ||
| 512 | } else { | ||
| 513 | struct LongQueryResult { | ||
| 514 | u64_le value; | ||
| 515 | u64_le timestamp; | ||
| 516 | }; | ||
| 517 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||
| 518 | const u32 payload = regs.report_semaphore.payload; | ||
| 519 | [this, sequence_address, payload] { | ||
| 520 | memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); | ||
| 521 | memory_manager.Write<u64>(sequence_address, payload); | ||
| 522 | }(); | ||
| 523 | } | 514 | } |
| 515 | rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||
| 524 | break; | 516 | break; |
| 525 | case Regs::ReportSemaphore::Operation::Acquire: | 517 | case Regs::ReportSemaphore::Operation::Acquire: |
| 526 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that | 518 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |
| @@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 528 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); | 520 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |
| 529 | break; | 521 | break; |
| 530 | case Regs::ReportSemaphore::Operation::ReportOnly: | 522 | case Regs::ReportSemaphore::Operation::ReportOnly: |
| 531 | if (const std::optional<u64> result = GetQueryResult()) { | 523 | rasterizer->Query(sequence_address, query_type, flags, payload, subreport); |
| 532 | // If the query returns an empty optional it means it's cached and deferred. | ||
| 533 | // In this case we have a non-empty result, so we stamp it immediately. | ||
| 534 | StampQueryResult(*result, regs.report_semaphore.query.short_query == 0); | ||
| 535 | } | ||
| 536 | break; | 524 | break; |
| 537 | case Regs::ReportSemaphore::Operation::Trap: | 525 | case Regs::ReportSemaphore::Operation::Trap: |
| 538 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | 526 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); |
| @@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 544 | } | 532 | } |
| 545 | 533 | ||
| 546 | void Maxwell3D::ProcessQueryCondition() { | 534 | void Maxwell3D::ProcessQueryCondition() { |
| 535 | if (rasterizer->AccelerateConditionalRendering()) { | ||
| 536 | execute_on = true; | ||
| 537 | return; | ||
| 538 | } | ||
| 547 | const GPUVAddr condition_address{regs.render_enable.Address()}; | 539 | const GPUVAddr condition_address{regs.render_enable.Address()}; |
| 548 | switch (regs.render_enable_override) { | 540 | switch (regs.render_enable_override) { |
| 549 | case Regs::RenderEnable::Override::AlwaysRender: | 541 | case Regs::RenderEnable::Override::AlwaysRender: |
| @@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 553 | execute_on = false; | 545 | execute_on = false; |
| 554 | break; | 546 | break; |
| 555 | case Regs::RenderEnable::Override::UseRenderEnable: { | 547 | case Regs::RenderEnable::Override::UseRenderEnable: { |
| 556 | if (rasterizer->AccelerateConditionalRendering()) { | ||
| 557 | execute_on = true; | ||
| 558 | return; | ||
| 559 | } | ||
| 560 | switch (regs.render_enable.mode) { | 548 | switch (regs.render_enable.mode) { |
| 561 | case Regs::RenderEnable::Mode::True: { | 549 | case Regs::RenderEnable::Mode::True: { |
| 562 | execute_on = true; | 550 | execute_on = true; |
| @@ -606,7 +594,13 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 606 | #endif | 594 | #endif |
| 607 | switch (regs.clear_report_value) { | 595 | switch (regs.clear_report_value) { |
| 608 | case Regs::ClearReport::ZPassPixelCount: | 596 | case Regs::ClearReport::ZPassPixelCount: |
| 609 | rasterizer->ResetCounter(QueryType::SamplesPassed); | 597 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); |
| 598 | break; | ||
| 599 | case Regs::ClearReport::PrimitivesGenerated: | ||
| 600 | rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||
| 601 | break; | ||
| 602 | case Regs::ClearReport::VtgPrimitivesOut: | ||
| 603 | rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); | ||
| 610 | break; | 604 | break; |
| 611 | default: | 605 | default: |
| 612 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | 606 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); |
| @@ -620,28 +614,6 @@ void Maxwell3D::ProcessSyncPoint() { | |||
| 620 | rasterizer->SignalSyncPoint(sync_point); | 614 | rasterizer->SignalSyncPoint(sync_point); |
| 621 | } | 615 | } |
| 622 | 616 | ||
| 623 | std::optional<u64> Maxwell3D::GetQueryResult() { | ||
| 624 | switch (regs.report_semaphore.query.report) { | ||
| 625 | case Regs::ReportSemaphore::Report::Payload: | ||
| 626 | return regs.report_semaphore.payload; | ||
| 627 | case Regs::ReportSemaphore::Report::ZPassPixelCount64: | ||
| 628 | #if ANDROID | ||
| 629 | if (!Settings::IsGPULevelHigh()) { | ||
| 630 | // This is problematic on Android, disable on GPU Normal. | ||
| 631 | return 120; | ||
| 632 | } | ||
| 633 | #endif | ||
| 634 | // Deferred. | ||
| 635 | rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, | ||
| 636 | system.GPU().GetTicks()); | ||
| 637 | return std::nullopt; | ||
| 638 | default: | ||
| 639 | LOG_DEBUG(HW_GPU, "Unimplemented query report type {}", | ||
| 640 | regs.report_semaphore.query.report.Value()); | ||
| 641 | return 1; | ||
| 642 | } | ||
| 643 | } | ||
| 644 | |||
| 645 | void Maxwell3D::ProcessCBBind(size_t stage_index) { | 617 | void Maxwell3D::ProcessCBBind(size_t stage_index) { |
| 646 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader | 618 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader |
| 647 | // stage. | 619 | // stage. |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 6c19354e1..17faacc37 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -3182,9 +3182,6 @@ private: | |||
| 3182 | /// Handles writes to syncing register. | 3182 | /// Handles writes to syncing register. |
| 3183 | void ProcessSyncPoint(); | 3183 | void ProcessSyncPoint(); |
| 3184 | 3184 | ||
| 3185 | /// Returns a query's value or an empty object if the value will be deferred through a cache. | ||
| 3186 | std::optional<u64> GetQueryResult(); | ||
| 3187 | |||
| 3188 | void RefreshParametersImpl(); | 3185 | void RefreshParametersImpl(); |
| 3189 | 3186 | ||
| 3190 | bool IsMethodExecutable(u32 method); | 3187 | bool IsMethodExecutable(u32 method); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 279f0daa1..422d4d859 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() { | |||
| 362 | const auto type = regs.launch_dma.semaphore_type; | 362 | const auto type = regs.launch_dma.semaphore_type; |
| 363 | const GPUVAddr address = regs.semaphore.address; | 363 | const GPUVAddr address = regs.semaphore.address; |
| 364 | const u32 payload = regs.semaphore.payload; | 364 | const u32 payload = regs.semaphore.payload; |
| 365 | VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence}; | ||
| 365 | switch (type) { | 366 | switch (type) { |
| 366 | case LaunchDMA::SemaphoreType::NONE: | 367 | case LaunchDMA::SemaphoreType::NONE: |
| 367 | break; | 368 | break; |
| 368 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { | 369 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { |
| 369 | std::function<void()> operation( | 370 | rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0); |
| 370 | [this, address, payload] { memory_manager.Write<u32>(address, payload); }); | ||
| 371 | rasterizer->SignalFence(std::move(operation)); | ||
| 372 | break; | 371 | break; |
| 373 | } | 372 | } |
| 374 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { | 373 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { |
| 375 | std::function<void()> operation([this, address, payload] { | 374 | rasterizer->Query(address, VideoCommon::QueryType::Payload, |
| 376 | memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); | 375 | flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); |
| 377 | memory_manager.Write<u64>(address, payload); | ||
| 378 | }); | ||
| 379 | rasterizer->SignalFence(std::move(operation)); | ||
| 380 | break; | 376 | break; |
| 381 | } | 377 | } |
| 382 | default: | 378 | default: |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 6de2543b7..582738234 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -82,10 +82,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 82 | if (op == GpuSemaphoreOperation::WriteLong) { | 82 | if (op == GpuSemaphoreOperation::WriteLong) { |
| 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 84 | const u32 payload = regs.semaphore_sequence; | 84 | const u32 payload = regs.semaphore_sequence; |
| 85 | [this, sequence_address, payload] { | 85 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); |
| 86 | memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); | ||
| 87 | memory_manager.Write<u64>(sequence_address, payload); | ||
| 88 | }(); | ||
| 89 | } else { | 86 | } else { |
| 90 | do { | 87 | do { |
| 91 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | 88 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |
| @@ -120,10 +117,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 120 | void Puller::ProcessSemaphoreRelease() { | 117 | void Puller::ProcessSemaphoreRelease() { |
| 121 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 118 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 122 | const u32 payload = regs.semaphore_release; | 119 | const u32 payload = regs.semaphore_release; |
| 123 | std::function<void()> operation([this, sequence_address, payload] { | 120 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); |
| 124 | memory_manager.Write<u32>(sequence_address, payload); | ||
| 125 | }); | ||
| 126 | rasterizer->SignalFence(std::move(operation)); | ||
| 127 | } | 121 | } |
| 128 | 122 | ||
| 129 | void Puller::ProcessSemaphoreAcquire() { | 123 | void Puller::ProcessSemaphoreAcquire() { |
| @@ -132,7 +126,6 @@ void Puller::ProcessSemaphoreAcquire() { | |||
| 132 | while (word != value) { | 126 | while (word != value) { |
| 133 | regs.acquire_active = true; | 127 | regs.acquire_active = true; |
| 134 | regs.acquire_value = value; | 128 | regs.acquire_value = value; |
| 135 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 136 | rasterizer->ReleaseFences(); | 129 | rasterizer->ReleaseFences(); |
| 137 | word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 130 | word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); |
| 138 | // TODO(kemathe73) figure out how to do the acquire_timeout | 131 | // TODO(kemathe73) figure out how to do the acquire_timeout |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index ab20ff30f..8459a3092 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -104,9 +104,28 @@ public: | |||
| 104 | SignalFence(std::move(func)); | 104 | SignalFence(std::move(func)); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | void WaitPendingFences() { | 107 | void WaitPendingFences(bool force) { |
| 108 | if constexpr (!can_async_check) { | 108 | if constexpr (!can_async_check) { |
| 109 | TryReleasePendingFences<true>(); | 109 | if (force) { |
| 110 | TryReleasePendingFences<true>(); | ||
| 111 | } else { | ||
| 112 | TryReleasePendingFences<false>(); | ||
| 113 | } | ||
| 114 | } else { | ||
| 115 | if (!force) { | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | std::mutex wait_mutex; | ||
| 119 | std::condition_variable wait_cv; | ||
| 120 | std::atomic<bool> wait_finished{}; | ||
| 121 | std::function<void()> func([&] { | ||
| 122 | std::scoped_lock lk(wait_mutex); | ||
| 123 | wait_finished.store(true, std::memory_order_relaxed); | ||
| 124 | wait_cv.notify_all(); | ||
| 125 | }); | ||
| 126 | SignalFence(std::move(func)); | ||
| 127 | std::unique_lock lk(wait_mutex); | ||
| 128 | wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); | ||
| 110 | } | 129 | } |
| 111 | } | 130 | } |
| 112 | 131 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c192e33b2..11549d448 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -102,7 +102,8 @@ struct GPU::Impl { | |||
| 102 | 102 | ||
| 103 | /// Signal the ending of command list. | 103 | /// Signal the ending of command list. |
| 104 | void OnCommandListEnd() { | 104 | void OnCommandListEnd() { |
| 105 | rasterizer->ReleaseFences(); | 105 | rasterizer->ReleaseFences(false); |
| 106 | Settings::UpdateGPUAccuracy(); | ||
| 106 | } | 107 | } |
| 107 | 108 | ||
| 108 | /// Request a host GPU memory flush from the CPU. | 109 | /// Request a host GPU memory flush from the CPU. |
| @@ -220,6 +221,7 @@ struct GPU::Impl { | |||
| 220 | /// This can be used to launch any necessary threads and register any necessary | 221 | /// This can be used to launch any necessary threads and register any necessary |
| 221 | /// core timing events. | 222 | /// core timing events. |
| 222 | void Start() { | 223 | void Start() { |
| 224 | Settings::UpdateGPUAccuracy(); | ||
| 223 | gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); | 225 | gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); |
| 224 | } | 226 | } |
| 225 | 227 | ||
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c4d459077..fb24b6532 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -41,6 +41,7 @@ set(SHADER_FILES | |||
| 41 | pitch_unswizzle.comp | 41 | pitch_unswizzle.comp |
| 42 | present_bicubic.frag | 42 | present_bicubic.frag |
| 43 | present_gaussian.frag | 43 | present_gaussian.frag |
| 44 | resolve_conditional_render.comp | ||
| 44 | smaa_edge_detection.vert | 45 | smaa_edge_detection.vert |
| 45 | smaa_edge_detection.frag | 46 | smaa_edge_detection.frag |
| 46 | smaa_blending_weight_calculation.vert | 47 | smaa_blending_weight_calculation.vert |
diff --git a/src/video_core/host_shaders/resolve_conditional_render.comp b/src/video_core/host_shaders/resolve_conditional_render.comp new file mode 100644 index 000000000..307e77d1a --- /dev/null +++ b/src/video_core/host_shaders/resolve_conditional_render.comp | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #version 450 | ||
| 5 | |||
| 6 | layout(local_size_x = 1) in; | ||
| 7 | |||
| 8 | layout(std430, binding = 0) buffer Query { | ||
| 9 | uvec2 initial; | ||
| 10 | uvec2 unknown; | ||
| 11 | uvec2 current; | ||
| 12 | }; | ||
| 13 | |||
| 14 | layout(std430, binding = 1) buffer Result { | ||
| 15 | uint result; | ||
| 16 | }; | ||
| 17 | |||
| 18 | void main() { | ||
| 19 | result = all(equal(initial, current)) ? 1 : 0; | ||
| 20 | } | ||
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 6272a4652..e980af171 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -319,6 +319,25 @@ private: | |||
| 319 | } | 319 | } |
| 320 | }; | 320 | }; |
| 321 | 321 | ||
| 322 | class HLE_DrawIndirectByteCount final : public HLEMacroImpl { | ||
| 323 | public: | ||
| 324 | explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 325 | |||
| 326 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 327 | maxwell3d.RefreshParameters(); | ||
| 328 | |||
| 329 | maxwell3d.regs.draw.begin = parameters[0]; | ||
| 330 | maxwell3d.regs.draw_auto_stride = parameters[1]; | ||
| 331 | maxwell3d.regs.draw_auto_byte_count = parameters[2]; | ||
| 332 | |||
| 333 | if (maxwell3d.ShouldExecute()) { | ||
| 334 | maxwell3d.draw_manager->DrawArray( | ||
| 335 | maxwell3d.regs.draw.topology, 0, | ||
| 336 | maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | }; | ||
| 340 | |||
| 322 | class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | 341 | class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { |
| 323 | public: | 342 | public: |
| 324 | explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | 343 | explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |
| @@ -536,6 +555,11 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | |||
| 536 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | 555 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
| 537 | return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); | 556 | return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); |
| 538 | })); | 557 | })); |
| 558 | builders.emplace(0xB5F74EDB717278ECULL, | ||
| 559 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | ||
| 560 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||
| 561 | return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__); | ||
| 562 | })); | ||
| 539 | } | 563 | } |
| 540 | 564 | ||
| 541 | HLEMacro::~HLEMacro() = default; | 565 | HLEMacro::~HLEMacro() = default; |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 7047e2e63..9fcaeeac7 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -25,6 +25,13 @@ | |||
| 25 | #include "video_core/rasterizer_interface.h" | 25 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | 26 | #include "video_core/texture_cache/slot_vector.h" |
| 27 | 27 | ||
| 28 | namespace VideoCore { | ||
| 29 | enum class QueryType { | ||
| 30 | SamplesPassed, | ||
| 31 | }; | ||
| 32 | constexpr std::size_t NumQueryTypes = 1; | ||
| 33 | } // namespace VideoCore | ||
| 34 | |||
| 28 | namespace VideoCommon { | 35 | namespace VideoCommon { |
| 29 | 36 | ||
| 30 | using AsyncJobId = SlotId; | 37 | using AsyncJobId = SlotId; |
| @@ -98,10 +105,10 @@ private: | |||
| 98 | }; | 105 | }; |
| 99 | 106 | ||
| 100 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 107 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 101 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 108 | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 102 | public: | 109 | public: |
| 103 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, | 110 | explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, |
| 104 | Core::Memory::Memory& cpu_memory_) | 111 | Core::Memory::Memory& cpu_memory_) |
| 105 | : rasterizer{rasterizer_}, | 112 | : rasterizer{rasterizer_}, |
| 106 | // Use reinterpret_cast instead of static_cast as workaround for | 113 | // Use reinterpret_cast instead of static_cast as workaround for |
| 107 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | 114 | // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cb8029a4f..2ba7cbb0d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/polyfill_thread.h" | 11 | #include "common/polyfill_thread.h" |
| 12 | #include "video_core/query_cache/types.h" | ||
| 12 | #include "video_core/cache_types.h" | 13 | #include "video_core/cache_types.h" |
| 13 | #include "video_core/engines/fermi_2d.h" | 14 | #include "video_core/engines/fermi_2d.h" |
| 14 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| @@ -26,11 +27,6 @@ struct ChannelState; | |||
| 26 | 27 | ||
| 27 | namespace VideoCore { | 28 | namespace VideoCore { |
| 28 | 29 | ||
| 29 | enum class QueryType { | ||
| 30 | SamplesPassed, | ||
| 31 | }; | ||
| 32 | constexpr std::size_t NumQueryTypes = 1; | ||
| 33 | |||
| 34 | enum class LoadCallbackStage { | 30 | enum class LoadCallbackStage { |
| 35 | Prepare, | 31 | Prepare, |
| 36 | Build, | 32 | Build, |
| @@ -58,10 +54,10 @@ public: | |||
| 58 | virtual void DispatchCompute() = 0; | 54 | virtual void DispatchCompute() = 0; |
| 59 | 55 | ||
| 60 | /// Resets the counter of a query | 56 | /// Resets the counter of a query |
| 61 | virtual void ResetCounter(QueryType type) = 0; | 57 | virtual void ResetCounter(VideoCommon::QueryType type) = 0; |
| 62 | 58 | ||
| 63 | /// Records a GPU query and caches it | 59 | /// Records a GPU query and caches it |
| 64 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 60 | virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; |
| 65 | 61 | ||
| 66 | /// Signal an uniform buffer binding | 62 | /// Signal an uniform buffer binding |
| 67 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 63 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| @@ -83,7 +79,7 @@ public: | |||
| 83 | virtual void SignalReference() = 0; | 79 | virtual void SignalReference() = 0; |
| 84 | 80 | ||
| 85 | /// Release all pending fences. | 81 | /// Release all pending fences. |
| 86 | virtual void ReleaseFences() = 0; | 82 | virtual void ReleaseFences(bool force = true) = 0; |
| 87 | 83 | ||
| 88 | /// Notify rasterizer that all caches should be flushed to Switch memory | 84 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 89 | virtual void FlushAll() = 0; | 85 | virtual void FlushAll() = 0; |
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 92ecf6682..65cd5aa06 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp | |||
| @@ -26,16 +26,18 @@ void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | |||
| 26 | void RasterizerNull::DrawTexture() {} | 26 | void RasterizerNull::DrawTexture() {} |
| 27 | void RasterizerNull::Clear(u32 layer_count) {} | 27 | void RasterizerNull::Clear(u32 layer_count) {} |
| 28 | void RasterizerNull::DispatchCompute() {} | 28 | void RasterizerNull::DispatchCompute() {} |
| 29 | void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} | 29 | void RasterizerNull::ResetCounter(VideoCommon::QueryType type) {} |
| 30 | void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | 30 | void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 31 | std::optional<u64> timestamp) { | 31 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 32 | if (!gpu_memory) { | 32 | if (!gpu_memory) { |
| 33 | return; | 33 | return; |
| 34 | } | 34 | } |
| 35 | 35 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | |
| 36 | gpu_memory->Write(gpu_addr, u64{0}); | 36 | u64 ticks = m_gpu.GetTicks(); |
| 37 | if (timestamp) { | 37 | gpu_memory->Write<u64>(gpu_addr + 8, ticks); |
| 38 | gpu_memory->Write(gpu_addr + 8, *timestamp); | 38 | gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); |
| 39 | } else { | ||
| 40 | gpu_memory->Write<u32>(gpu_addr, payload); | ||
| 39 | } | 41 | } |
| 40 | } | 42 | } |
| 41 | void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 43 | void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| @@ -74,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||
| 74 | syncpoint_manager.IncrementHost(value); | 76 | syncpoint_manager.IncrementHost(value); |
| 75 | } | 77 | } |
| 76 | void RasterizerNull::SignalReference() {} | 78 | void RasterizerNull::SignalReference() {} |
| 77 | void RasterizerNull::ReleaseFences() {} | 79 | void RasterizerNull::ReleaseFences(bool) {} |
| 78 | void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | 80 | void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} |
| 79 | void RasterizerNull::WaitForIdle() {} | 81 | void RasterizerNull::WaitForIdle() {} |
| 80 | void RasterizerNull::FragmentBarrier() {} | 82 | void RasterizerNull::FragmentBarrier() {} |
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 93b9a6971..57a8c4c85 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h | |||
| @@ -42,8 +42,8 @@ public: | |||
| 42 | void DrawTexture() override; | 42 | void DrawTexture() override; |
| 43 | void Clear(u32 layer_count) override; | 43 | void Clear(u32 layer_count) override; |
| 44 | void DispatchCompute() override; | 44 | void DispatchCompute() override; |
| 45 | void ResetCounter(VideoCore::QueryType type) override; | 45 | void ResetCounter(VideoCommon::QueryType type) override; |
| 46 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 46 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; |
| 47 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 47 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 48 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 48 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 49 | void FlushAll() override; | 49 | void FlushAll() override; |
| @@ -63,7 +63,7 @@ public: | |||
| 63 | void SyncOperation(std::function<void()>&& func) override; | 63 | void SyncOperation(std::function<void()>&& func) override; |
| 64 | void SignalSyncPoint(u32 value) override; | 64 | void SignalSyncPoint(u32 value) override; |
| 65 | void SignalReference() override; | 65 | void SignalReference() override; |
| 66 | void ReleaseFences() override; | 66 | void ReleaseFences(bool force) override; |
| 67 | void FlushAndInvalidateRegion( | 67 | void FlushAndInvalidateRegion( |
| 68 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 68 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 69 | void WaitForIdle() override; | 69 | void WaitForIdle() override; |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 99d7347f5..ec142d48e 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -27,7 +27,7 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 27 | } // Anonymous namespace | 27 | } // Anonymous namespace |
| 28 | 28 | ||
| 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | 29 | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) |
| 30 | : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | 30 | : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} |
| 31 | 31 | ||
| 32 | QueryCache::~QueryCache() = default; | 32 | QueryCache::~QueryCache() = default; |
| 33 | 33 | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 872513f22..0721e0b3d 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -26,7 +26,7 @@ class RasterizerOpenGL; | |||
| 26 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | 26 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; |
| 27 | 27 | ||
| 28 | class QueryCache final | 28 | class QueryCache final |
| 29 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 29 | : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { |
| 30 | public: | 30 | public: |
| 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | 31 | explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); |
| 32 | ~QueryCache(); | 32 | ~QueryCache(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd03efecd..a975bbe75 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -396,13 +396,31 @@ void RasterizerOpenGL::DispatchCompute() { | |||
| 396 | has_written_global_memory |= pipeline->WritesGlobalMemory(); | 396 | has_written_global_memory |= pipeline->WritesGlobalMemory(); |
| 397 | } | 397 | } |
| 398 | 398 | ||
| 399 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 399 | void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { |
| 400 | query_cache.ResetCounter(type); | 400 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { |
| 401 | query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); | ||
| 402 | } | ||
| 401 | } | 403 | } |
| 402 | 404 | ||
| 403 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | 405 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 404 | std::optional<u64> timestamp) { | 406 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 405 | query_cache.Query(gpu_addr, type, timestamp); | 407 | if (type == VideoCommon::QueryType::ZPassPixelCount64) { |
| 408 | std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout) | ||
| 409 | ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt }; | ||
| 410 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||
| 411 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | ||
| 412 | } else { | ||
| 413 | query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); | ||
| 414 | } | ||
| 415 | return; | ||
| 416 | } | ||
| 417 | if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||
| 418 | u64 ticks = gpu.GetTicks(); | ||
| 419 | gpu_memory->Write<u64>(gpu_addr + 8, ticks); | ||
| 420 | gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||
| 421 | } else { | ||
| 422 | gpu_memory->Write<u32>(gpu_addr, payload); | ||
| 423 | } | ||
| 406 | } | 424 | } |
| 407 | 425 | ||
| 408 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 426 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| @@ -573,8 +591,8 @@ void RasterizerOpenGL::SignalReference() { | |||
| 573 | fence_manager.SignalOrdering(); | 591 | fence_manager.SignalOrdering(); |
| 574 | } | 592 | } |
| 575 | 593 | ||
| 576 | void RasterizerOpenGL::ReleaseFences() { | 594 | void RasterizerOpenGL::ReleaseFences(bool force) { |
| 577 | fence_manager.WaitPendingFences(); | 595 | fence_manager.WaitPendingFences(force); |
| 578 | } | 596 | } |
| 579 | 597 | ||
| 580 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | 598 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8eda2ddba..05e048e15 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -86,8 +86,8 @@ public: | |||
| 86 | void DrawTexture() override; | 86 | void DrawTexture() override; |
| 87 | void Clear(u32 layer_count) override; | 87 | void Clear(u32 layer_count) override; |
| 88 | void DispatchCompute() override; | 88 | void DispatchCompute() override; |
| 89 | void ResetCounter(VideoCore::QueryType type) override; | 89 | void ResetCounter(VideoCommon::QueryType type) override; |
| 90 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 90 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; |
| 91 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 91 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 92 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 92 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 93 | void FlushAll() override; | 93 | void FlushAll() override; |
| @@ -107,7 +107,7 @@ public: | |||
| 107 | void SyncOperation(std::function<void()>&& func) override; | 107 | void SyncOperation(std::function<void()>&& func) override; |
| 108 | void SignalSyncPoint(u32 value) override; | 108 | void SignalSyncPoint(u32 value) override; |
| 109 | void SignalReference() override; | 109 | void SignalReference() override; |
| 110 | void ReleaseFences() override; | 110 | void ReleaseFences(bool force = true) override; |
| 111 | void FlushAndInvalidateRegion( | 111 | void FlushAndInvalidateRegion( |
| 112 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 112 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 113 | void WaitForIdle() override; | 113 | void WaitForIdle() override; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e15865d16..d8148e89a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| 61 | if (device.IsExtTransformFeedbackSupported()) { | 61 | if (device.IsExtTransformFeedbackSupported()) { |
| 62 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 62 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 63 | } | 63 | } |
| 64 | if (device.IsExtConditionalRendering()) { | ||
| 65 | flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; | ||
| 66 | } | ||
| 64 | const VkBufferCreateInfo buffer_ci = { | 67 | const VkBufferCreateInfo buffer_ci = { |
| 65 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 68 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 66 | .pNext = nullptr, | 69 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 54ee030ce..97cd4521d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" |
| 15 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | ||
| 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 16 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 17 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 18 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -302,6 +303,52 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 302 | return {staging.buffer, staging.offset}; | 303 | return {staging.buffer, staging.offset}; |
| 303 | } | 304 | } |
| 304 | 305 | ||
| 306 | ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, | ||
| 307 | Scheduler& scheduler_, | ||
| 308 | DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||
| 309 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||
| 310 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, | ||
| 311 | RESOLVE_CONDITIONAL_RENDER_COMP_SPV), | ||
| 312 | scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||
| 313 | |||
| 314 | void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, | ||
| 315 | u32 src_offset, bool compare_to_zero) { | ||
| 316 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 317 | |||
| 318 | const size_t compare_size = compare_to_zero ? 8 : 24; | ||
| 319 | |||
| 320 | compute_pass_descriptor_queue.Acquire(); | ||
| 321 | compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); | ||
| 322 | compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); | ||
| 323 | const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||
| 324 | |||
| 325 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 326 | scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||
| 327 | static constexpr VkMemoryBarrier read_barrier{ | ||
| 328 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 329 | .pNext = nullptr, | ||
| 330 | .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 331 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 332 | }; | ||
| 333 | static constexpr VkMemoryBarrier write_barrier{ | ||
| 334 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 335 | .pNext = nullptr, | ||
| 336 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 337 | .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||
| 338 | }; | ||
| 339 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 340 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||
| 341 | |||
| 342 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 343 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||
| 344 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 345 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 346 | cmdbuf.Dispatch(1, 1, 1); | ||
| 347 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 348 | VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier); | ||
| 349 | }); | ||
| 350 | } | ||
| 351 | |||
| 305 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 352 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
| 306 | DescriptorPool& descriptor_pool_, | 353 | DescriptorPool& descriptor_pool_, |
| 307 | StagingBufferPool& staging_buffer_pool_, | 354 | StagingBufferPool& staging_buffer_pool_, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index dd3927376..c62f30d30 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -82,6 +82,19 @@ private: | |||
| 82 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | 82 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; |
| 83 | }; | 83 | }; |
| 84 | 84 | ||
| 85 | class ConditionalRenderingResolvePass final : public ComputePass { | ||
| 86 | public: | ||
| 87 | explicit ConditionalRenderingResolvePass( | ||
| 88 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||
| 89 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||
| 90 | |||
| 91 | void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero); | ||
| 92 | |||
| 93 | private: | ||
| 94 | Scheduler& scheduler; | ||
| 95 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||
| 96 | }; | ||
| 97 | |||
| 85 | class ASTCDecoderPass final : public ComputePass { | 98 | class ASTCDecoderPass final : public ComputePass { |
| 86 | public: | 99 | public: |
| 87 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 100 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 145359d4e..14fc5ad71 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "video_core/fence_manager.h" | 8 | #include "video_core/fence_manager.h" |
| 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
| 13 | class System; | 14 | class System; |
| @@ -20,7 +21,6 @@ class RasterizerInterface; | |||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| 21 | 22 | ||
| 22 | class Device; | 23 | class Device; |
| 23 | class QueryCache; | ||
| 24 | class Scheduler; | 24 | class Scheduler; |
| 25 | 25 | ||
| 26 | class InnerFence : public VideoCommon::FenceBase { | 26 | class InnerFence : public VideoCommon::FenceBase { |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 29e0b797b..42f571007 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -1,139 +1,1223 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | ||
| 5 | #include <cstddef> | 4 | #include <cstddef> |
| 5 | #include <limits> | ||
| 6 | #include <map> | ||
| 7 | #include <memory> | ||
| 8 | #include <span> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 6 | #include <utility> | 11 | #include <utility> |
| 7 | #include <vector> | 12 | #include <vector> |
| 8 | 13 | ||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "video_core/query_cache/query_cache.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 23 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_device.h" | 27 | #include "video_core/vulkan_common/vulkan_device.h" |
| 28 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 29 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 30 | ||
| 15 | namespace Vulkan { | 31 | namespace Vulkan { |
| 16 | 32 | ||
| 17 | using VideoCore::QueryType; | 33 | using VideoCommon::QueryType; |
| 18 | 34 | ||
| 19 | namespace { | 35 | namespace { |
| 36 | class SamplesQueryBank : public VideoCommon::BankBase { | ||
| 37 | public: | ||
| 38 | static constexpr size_t BANK_SIZE = 256; | ||
| 39 | static constexpr size_t QUERY_SIZE = 8; | ||
| 40 | SamplesQueryBank(const Device& device_, size_t index_) | ||
| 41 | : BankBase(BANK_SIZE), device{device_}, index{index_} { | ||
| 42 | const auto& dev = device.GetLogical(); | ||
| 43 | query_pool = dev.CreateQueryPool({ | ||
| 44 | .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, | ||
| 45 | .pNext = nullptr, | ||
| 46 | .flags = 0, | ||
| 47 | .queryType = VK_QUERY_TYPE_OCCLUSION, | ||
| 48 | .queryCount = BANK_SIZE, | ||
| 49 | .pipelineStatistics = 0, | ||
| 50 | }); | ||
| 51 | Reset(); | ||
| 52 | } | ||
| 20 | 53 | ||
| 21 | constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; | 54 | ~SamplesQueryBank() = default; |
| 22 | 55 | ||
| 23 | constexpr VkQueryType GetTarget(QueryType type) { | 56 | void Reset() override { |
| 24 | return QUERY_TARGETS[static_cast<std::size_t>(type)]; | 57 | ASSERT(references == 0); |
| 25 | } | 58 | VideoCommon::BankBase::Reset(); |
| 59 | const auto& dev = device.GetLogical(); | ||
| 60 | dev.ResetQueryPool(*query_pool, 0, BANK_SIZE); | ||
| 61 | host_results.fill(0ULL); | ||
| 62 | next_bank = 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | void Sync(size_t start, size_t size) { | ||
| 66 | const auto& dev = device.GetLogical(); | ||
| 67 | const VkResult query_result = dev.GetQueryResults( | ||
| 68 | *query_pool, static_cast<u32>(start), static_cast<u32>(size), sizeof(u64) * size, | ||
| 69 | &host_results[start], sizeof(u64), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | ||
| 70 | switch (query_result) { | ||
| 71 | case VK_SUCCESS: | ||
| 72 | return; | ||
| 73 | case VK_ERROR_DEVICE_LOST: | ||
| 74 | device.ReportLoss(); | ||
| 75 | [[fallthrough]]; | ||
| 76 | default: | ||
| 77 | throw vk::Exception(query_result); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | VkQueryPool GetInnerPool() { | ||
| 82 | return *query_pool; | ||
| 83 | } | ||
| 84 | |||
| 85 | size_t GetIndex() const { | ||
| 86 | return index; | ||
| 87 | } | ||
| 88 | |||
| 89 | const std::array<u64, BANK_SIZE>& GetResults() const { | ||
| 90 | return host_results; | ||
| 91 | } | ||
| 92 | |||
| 93 | size_t next_bank; | ||
| 94 | |||
| 95 | private: | ||
| 96 | const Device& device; | ||
| 97 | const size_t index; | ||
| 98 | vk::QueryPool query_pool; | ||
| 99 | std::array<u64, BANK_SIZE> host_results; | ||
| 100 | }; | ||
| 101 | |||
| 102 | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | ||
| 103 | |||
| 104 | struct HostSyncValues { | ||
| 105 | VAddr address; | ||
| 106 | size_t size; | ||
| 107 | size_t offset; | ||
| 108 | |||
| 109 | static constexpr bool GeneratesBaseBuffer = false; | ||
| 110 | }; | ||
| 111 | |||
| 112 | template <typename Traits> | ||
| 113 | class SamplesStreamer : public BaseStreamer { | ||
| 114 | public: | ||
| 115 | SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | ||
| 116 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) | ||
| 117 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | ||
| 118 | memory_allocator{memory_allocator_} { | ||
| 119 | BuildResolveBuffer(); | ||
| 120 | current_bank = nullptr; | ||
| 121 | current_query = nullptr; | ||
| 122 | } | ||
| 123 | |||
| 124 | void StartCounter() override { | ||
| 125 | if (has_started) { | ||
| 126 | return; | ||
| 127 | } | ||
| 128 | ReserveHostQuery(); | ||
| 129 | scheduler.Record([query_pool = current_query_pool, | ||
| 130 | query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { | ||
| 131 | const bool use_precise = Settings::IsGPULevelHigh(); | ||
| 132 | cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index), | ||
| 133 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | ||
| 134 | }); | ||
| 135 | has_started = true; | ||
| 136 | } | ||
| 137 | |||
| 138 | void PauseCounter() override { | ||
| 139 | if (!has_started) { | ||
| 140 | return; | ||
| 141 | } | ||
| 142 | scheduler.Record([query_pool = current_query_pool, | ||
| 143 | query_index = current_bank_slot](vk::CommandBuffer cmdbuf) { | ||
| 144 | cmdbuf.EndQuery(query_pool, static_cast<u32>(query_index)); | ||
| 145 | }); | ||
| 146 | has_started = false; | ||
| 147 | } | ||
| 148 | |||
| 149 | void ResetCounter() override { | ||
| 150 | if (has_started) { | ||
| 151 | PauseCounter(); | ||
| 152 | } | ||
| 153 | AbandonCurrentQuery(); | ||
| 154 | } | ||
| 155 | |||
| 156 | void CloseCounter() override { | ||
| 157 | PauseCounter(); | ||
| 158 | } | ||
| 159 | |||
| 160 | bool HasPendingSync() override { | ||
| 161 | return !pending_sync.empty(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void SyncWrites() override { | ||
| 165 | if (sync_values_stash.empty()) { | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | |||
| 169 | for (size_t i = 0; i < sync_values_stash.size(); i++) { | ||
| 170 | runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], *resolve_buffers[i]); | ||
| 171 | } | ||
| 172 | |||
| 173 | sync_values_stash.clear(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void PresyncWrites() override { | ||
| 177 | if (pending_sync.empty()) { | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | PauseCounter(); | ||
| 181 | sync_values_stash.clear(); | ||
| 182 | sync_values_stash.emplace_back(); | ||
| 183 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); | ||
| 184 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | ||
| 185 | std::unordered_map<size_t, std::pair<size_t, size_t>> offsets; | ||
| 186 | size_t this_bank_slot = std::numeric_limits<size_t>::max(); | ||
| 187 | size_t resolve_slots_remaining = resolve_slots; | ||
| 188 | size_t resolve_buffer_index = 0; | ||
| 189 | ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start, | ||
| 190 | size_t amount) { | ||
| 191 | size_t bank_id = bank->GetIndex(); | ||
| 192 | if (this_bank_slot != bank_id) { | ||
| 193 | this_bank_slot = bank_id; | ||
| 194 | if (resolve_slots_remaining == 0) { | ||
| 195 | resolve_buffer_index++; | ||
| 196 | if (resolve_buffer_index >= resolve_buffers.size()) { | ||
| 197 | BuildResolveBuffer(); | ||
| 198 | } | ||
| 199 | resolve_slots_remaining = resolve_slots; | ||
| 200 | sync_values_stash.emplace_back(); | ||
| 201 | sync_values = sync_values = &sync_values_stash.back(); | ||
| 202 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | ||
| 203 | } | ||
| 204 | resolve_slots_remaining--; | ||
| 205 | } | ||
| 206 | auto& resolve_buffer = resolve_buffers[resolve_buffer_index]; | ||
| 207 | const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * | ||
| 208 | (resolve_slots - resolve_slots_remaining - 1); | ||
| 209 | VkQueryPool query_pool = bank->GetInnerPool(); | ||
| 210 | scheduler.Record([start, amount, base_offset, query_pool, | ||
| 211 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { | ||
| 212 | size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; | ||
| 213 | const VkBufferMemoryBarrier copy_query_pool_barrier{ | ||
| 214 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 215 | .pNext = nullptr, | ||
| 216 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 217 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 218 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 219 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 220 | .buffer = buffer, | ||
| 221 | .offset = final_offset, | ||
| 222 | .size = amount * SamplesQueryBank::QUERY_SIZE, | ||
| 223 | }; | ||
| 224 | |||
| 225 | cmdbuf.CopyQueryPoolResults( | ||
| 226 | query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer, | ||
| 227 | static_cast<u32>(final_offset), SamplesQueryBank::QUERY_SIZE, | ||
| 228 | VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); | ||
| 229 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 230 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier); | ||
| 231 | }); | ||
| 232 | offsets[bank_id] = {sync_values_stash.size() - 1, base_offset}; | ||
| 233 | }); | ||
| 234 | |||
| 235 | // Convert queries | ||
| 236 | for (auto q : pending_sync) { | ||
| 237 | auto* query = GetQuery(q); | ||
| 238 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { | ||
| 239 | continue; | ||
| 240 | } | ||
| 241 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { | ||
| 242 | continue; | ||
| 243 | } | ||
| 244 | if (query->size_slots > 1) { | ||
| 245 | // This is problematic. | ||
| 246 | UNIMPLEMENTED(); | ||
| 247 | } | ||
| 248 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | ||
| 249 | auto loc_data = offsets[query->start_bank_id]; | ||
| 250 | sync_values_stash[loc_data.first].emplace_back(HostSyncValues{ | ||
| 251 | .address = query->guest_address, | ||
| 252 | .size = SamplesQueryBank::QUERY_SIZE, | ||
| 253 | .offset = loc_data.second + query->start_slot * SamplesQueryBank::QUERY_SIZE, | ||
| 254 | }); | ||
| 255 | } | ||
| 256 | |||
| 257 | AbandonCurrentQuery(); | ||
| 258 | pending_sync.clear(); | ||
| 259 | } | ||
| 260 | |||
| 261 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 262 | [[maybe_unused]] std::optional<u32> subreport) override { | ||
| 263 | auto index = BuildQuery(); | ||
| 264 | auto* new_query = GetQuery(index); | ||
| 265 | new_query->guest_address = address; | ||
| 266 | new_query->value = 100; | ||
| 267 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | ||
| 268 | if (has_timestamp) { | ||
| 269 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 270 | } | ||
| 271 | if (!current_query) { | ||
| 272 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 273 | return index; | ||
| 274 | } | ||
| 275 | new_query->start_bank_id = current_query->start_bank_id; | ||
| 276 | new_query->size_banks = current_query->size_banks; | ||
| 277 | new_query->start_slot = current_query->start_slot; | ||
| 278 | new_query->size_slots = current_query->size_slots; | ||
| 279 | ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 280 | bank->AddReference(amount); | ||
| 281 | }); | ||
| 282 | pending_sync.push_back(index); | ||
| 283 | pending_flush_queries.push_back(index); | ||
| 284 | return index; | ||
| 285 | } | ||
| 286 | |||
| 287 | bool HasUnsyncedQueries() override { | ||
| 288 | return !pending_flush_queries.empty(); | ||
| 289 | } | ||
| 290 | |||
| 291 | void PushUnsyncedQueries() override { | ||
| 292 | PauseCounter(); | ||
| 293 | { | ||
| 294 | std::scoped_lock lk(flush_guard); | ||
| 295 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | ||
| 296 | } | ||
| 297 | } | ||
| 298 | |||
| 299 | void PopUnsyncedQueries() override { | ||
| 300 | std::vector<size_t> current_flush_queries; | ||
| 301 | { | ||
| 302 | std::scoped_lock lk(flush_guard); | ||
| 303 | current_flush_queries = std::move(pending_flush_sets.front()); | ||
| 304 | pending_flush_sets.pop_front(); | ||
| 305 | } | ||
| 306 | ApplyBanksWideOp<false>( | ||
| 307 | current_flush_queries, | ||
| 308 | [](SamplesQueryBank* bank, size_t start, size_t amount) { bank->Sync(start, amount); }); | ||
| 309 | for (auto q : current_flush_queries) { | ||
| 310 | auto* query = GetQuery(q); | ||
| 311 | u64 total = 0; | ||
| 312 | ApplyBankOp(query, [&total](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 313 | const auto& results = bank->GetResults(); | ||
| 314 | for (size_t i = 0; i < amount; i++) { | ||
| 315 | total += results[start + i]; | ||
| 316 | } | ||
| 317 | }); | ||
| 318 | query->value = total; | ||
| 319 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | private: | ||
| 324 | template <typename Func> | ||
| 325 | void ApplyBankOp(VideoCommon::HostQueryBase* query, Func&& func) { | ||
| 326 | size_t size_slots = query->size_slots; | ||
| 327 | if (size_slots == 0) { | ||
| 328 | return; | ||
| 329 | } | ||
| 330 | size_t bank_id = query->start_bank_id; | ||
| 331 | size_t banks_set = query->size_banks; | ||
| 332 | size_t start_slot = query->start_slot; | ||
| 333 | for (size_t i = 0; i < banks_set; i++) { | ||
| 334 | auto& the_bank = bank_pool.GetBank(bank_id); | ||
| 335 | size_t amount = std::min(the_bank.Size() - start_slot, size_slots); | ||
| 336 | func(&the_bank, start_slot, amount); | ||
| 337 | bank_id = the_bank.next_bank - 1; | ||
| 338 | start_slot = 0; | ||
| 339 | size_slots -= amount; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | template <bool is_ordered, typename Func> | ||
| 344 | void ApplyBanksWideOp(std::vector<size_t>& queries, Func&& func) { | ||
| 345 | std::conditional_t<is_ordered, std::map<size_t, std::pair<size_t, size_t>>, | ||
| 346 | std::unordered_map<size_t, std::pair<size_t, size_t>>> | ||
| 347 | indexer; | ||
| 348 | for (auto q : queries) { | ||
| 349 | auto* query = GetQuery(q); | ||
| 350 | ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 351 | auto id = bank->GetIndex(); | ||
| 352 | auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), | ||
| 353 | std::numeric_limits<size_t>::min()); | ||
| 354 | auto& current_pair = pair.first->second; | ||
| 355 | current_pair.first = std::min(current_pair.first, start); | ||
| 356 | current_pair.second = std::max(current_pair.second, amount + start); | ||
| 357 | }); | ||
| 358 | } | ||
| 359 | for (auto& cont : indexer) { | ||
| 360 | func(&bank_pool.GetBank(cont.first), cont.second.first, | ||
| 361 | cont.second.second - cont.second.first); | ||
| 362 | } | ||
| 363 | } | ||
| 364 | |||
| 365 | void ReserveBank() { | ||
| 366 | current_bank_id = | ||
| 367 | bank_pool.ReserveBank([this](std::deque<SamplesQueryBank>& queue, size_t index) { | ||
| 368 | queue.emplace_back(device, index); | ||
| 369 | }); | ||
| 370 | if (current_bank) { | ||
| 371 | current_bank->next_bank = current_bank_id + 1; | ||
| 372 | } | ||
| 373 | current_bank = &bank_pool.GetBank(current_bank_id); | ||
| 374 | current_query_pool = current_bank->GetInnerPool(); | ||
| 375 | } | ||
| 376 | |||
| 377 | size_t ReserveBankSlot() { | ||
| 378 | if (!current_bank || current_bank->IsClosed()) { | ||
| 379 | ReserveBank(); | ||
| 380 | } | ||
| 381 | auto [built, index] = current_bank->Reserve(); | ||
| 382 | current_bank_slot = index; | ||
| 383 | return index; | ||
| 384 | } | ||
| 385 | |||
| 386 | void ReserveHostQuery() { | ||
| 387 | size_t new_slot = ReserveBankSlot(); | ||
| 388 | current_bank->AddReference(1); | ||
| 389 | if (current_query) { | ||
| 390 | size_t bank_id = current_query->start_bank_id; | ||
| 391 | size_t banks_set = current_query->size_banks - 1; | ||
| 392 | bool found = bank_id == current_bank_id; | ||
| 393 | while (!found && banks_set > 0) { | ||
| 394 | SamplesQueryBank& some_bank = bank_pool.GetBank(bank_id); | ||
| 395 | bank_id = some_bank.next_bank - 1; | ||
| 396 | found = bank_id == current_bank_id; | ||
| 397 | banks_set--; | ||
| 398 | } | ||
| 399 | if (!found) { | ||
| 400 | current_query->size_banks++; | ||
| 401 | } | ||
| 402 | current_query->size_slots++; | ||
| 403 | } else { | ||
| 404 | current_query_id = BuildQuery(); | ||
| 405 | current_query = GetQuery(current_query_id); | ||
| 406 | current_query->start_bank_id = static_cast<u32>(current_bank_id); | ||
| 407 | current_query->size_banks = 1; | ||
| 408 | current_query->start_slot = new_slot; | ||
| 409 | current_query->size_slots = 1; | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | void Free(size_t query_id) override { | ||
| 414 | std::scoped_lock lk(guard); | ||
| 415 | auto* query = GetQuery(query_id); | ||
| 416 | ApplyBankOp(query, [](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 417 | bank->CloseReference(amount); | ||
| 418 | }); | ||
| 419 | ReleaseQuery(query_id); | ||
| 420 | } | ||
| 421 | |||
| 422 | void AbandonCurrentQuery() { | ||
| 423 | if (!current_query) { | ||
| 424 | return; | ||
| 425 | } | ||
| 426 | Free(current_query_id); | ||
| 427 | current_query = nullptr; | ||
| 428 | current_query_id = 0; | ||
| 429 | } | ||
| 430 | |||
| 431 | void BuildResolveBuffer() { | ||
| 432 | const VkBufferCreateInfo buffer_ci = { | ||
| 433 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 434 | .pNext = nullptr, | ||
| 435 | .flags = 0, | ||
| 436 | .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, | ||
| 437 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||
| 438 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 439 | .queueFamilyIndexCount = 0, | ||
| 440 | .pQueueFamilyIndices = nullptr, | ||
| 441 | }; | ||
| 442 | resolve_buffers.emplace_back( | ||
| 443 | std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); | ||
| 444 | } | ||
| 445 | |||
| 446 | static constexpr size_t resolve_slots = 8; | ||
| 447 | |||
| 448 | QueryCacheRuntime& runtime; | ||
| 449 | const Device& device; | ||
| 450 | Scheduler& scheduler; | ||
| 451 | const MemoryAllocator& memory_allocator; | ||
| 452 | VideoCommon::BankPool<SamplesQueryBank> bank_pool; | ||
| 453 | std::deque<vk::Buffer> resolve_buffers; | ||
| 454 | std::deque<std::vector<HostSyncValues>> sync_values_stash; | ||
| 455 | |||
| 456 | // syncing queue | ||
| 457 | std::vector<size_t> pending_sync; | ||
| 458 | |||
| 459 | // flush levels | ||
| 460 | std::vector<size_t> pending_flush_queries; | ||
| 461 | std::deque<std::vector<size_t>> pending_flush_sets; | ||
| 462 | |||
| 463 | // State Machine | ||
| 464 | size_t current_bank_slot; | ||
| 465 | size_t current_bank_id; | ||
| 466 | SamplesQueryBank* current_bank; | ||
| 467 | VkQueryPool current_query_pool; | ||
| 468 | size_t current_query_id; | ||
| 469 | VideoCommon::HostQueryBase* current_query; | ||
| 470 | bool has_started{}; | ||
| 471 | std::mutex flush_guard; | ||
| 472 | }; | ||
| 473 | |||
| 474 | // Transform feedback queries | ||
| 475 | class TFBQueryBank : public VideoCommon::BankBase { | ||
| 476 | public: | ||
| 477 | static constexpr size_t BANK_SIZE = 1024; | ||
| 478 | static constexpr size_t QUERY_SIZE = 4; | ||
| 479 | TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) | ||
| 480 | : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { | ||
| 481 | const VkBufferCreateInfo buffer_ci = { | ||
| 482 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 483 | .pNext = nullptr, | ||
| 484 | .flags = 0, | ||
| 485 | .size = QUERY_SIZE * BANK_SIZE, | ||
| 486 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 487 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 488 | .queueFamilyIndexCount = 0, | ||
| 489 | .pQueueFamilyIndices = nullptr, | ||
| 490 | }; | ||
| 491 | buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 492 | } | ||
| 493 | |||
| 494 | ~TFBQueryBank() = default; | ||
| 495 | |||
| 496 | void Reset() override { | ||
| 497 | ASSERT(references == 0); | ||
| 498 | VideoCommon::BankBase::Reset(); | ||
| 499 | } | ||
| 500 | |||
| 501 | void Sync(StagingBufferRef& stagging_buffer, size_t extra_offset, size_t start, size_t size) { | ||
| 502 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 503 | scheduler.Record([this, dst_buffer = stagging_buffer.buffer, extra_offset, start, | ||
| 504 | size](vk::CommandBuffer cmdbuf) { | ||
| 505 | std::array<VkBufferCopy, 1> copy{VkBufferCopy{ | ||
| 506 | .srcOffset = start * QUERY_SIZE, | ||
| 507 | .dstOffset = extra_offset, | ||
| 508 | .size = size * QUERY_SIZE, | ||
| 509 | }}; | ||
| 510 | cmdbuf.CopyBuffer(*buffer, dst_buffer, copy); | ||
| 511 | }); | ||
| 512 | } | ||
| 513 | |||
| 514 | size_t GetIndex() const { | ||
| 515 | return index; | ||
| 516 | } | ||
| 517 | |||
| 518 | VkBuffer GetBuffer() const { | ||
| 519 | return *buffer; | ||
| 520 | } | ||
| 521 | |||
| 522 | private: | ||
| 523 | Scheduler& scheduler; | ||
| 524 | const size_t index; | ||
| 525 | vk::Buffer buffer; | ||
| 526 | }; | ||
| 527 | |||
| 528 | template <typename Traits> | ||
| 529 | class TFBCounterStreamer : public BaseStreamer { | ||
| 530 | public: | ||
| 531 | TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, | ||
| 532 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, | ||
| 533 | StagingBufferPool& staging_pool_) | ||
| 534 | : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | ||
| 535 | memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { | ||
| 536 | buffers_count = 0; | ||
| 537 | current_bank = nullptr; | ||
| 538 | counter_buffers.fill(VK_NULL_HANDLE); | ||
| 539 | offsets.fill(0); | ||
| 540 | const VkBufferCreateInfo buffer_ci = { | ||
| 541 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 542 | .pNext = nullptr, | ||
| 543 | .flags = 0, | ||
| 544 | .size = TFBQueryBank::QUERY_SIZE * NUM_STREAMS, | ||
| 545 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 546 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT, | ||
| 547 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 548 | .queueFamilyIndexCount = 0, | ||
| 549 | .pQueueFamilyIndices = nullptr, | ||
| 550 | }; | ||
| 551 | |||
| 552 | counters_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 553 | for (auto& c : counter_buffers) { | ||
| 554 | c = *counters_buffer; | ||
| 555 | } | ||
| 556 | size_t base_offset = 0; | ||
| 557 | for (auto& o : offsets) { | ||
| 558 | o = base_offset; | ||
| 559 | base_offset += TFBQueryBank::QUERY_SIZE; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | void StartCounter() override { | ||
| 564 | FlushBeginTFB(); | ||
| 565 | has_started = true; | ||
| 566 | } | ||
| 567 | |||
| 568 | void PauseCounter() override { | ||
| 569 | CloseCounter(); | ||
| 570 | } | ||
| 571 | |||
| 572 | void ResetCounter() override { | ||
| 573 | CloseCounter(); | ||
| 574 | } | ||
| 575 | |||
| 576 | void CloseCounter() override { | ||
| 577 | if (has_flushed_end_pending) { | ||
| 578 | FlushEndTFB(); | ||
| 579 | } | ||
| 580 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 581 | if (regs.transform_feedback_enabled == 0) { | ||
| 582 | streams_mask = 0; | ||
| 583 | has_started = false; | ||
| 584 | } | ||
| 585 | }); | ||
| 586 | } | ||
| 587 | |||
| 588 | bool HasPendingSync() override { | ||
| 589 | return !pending_sync.empty(); | ||
| 590 | } | ||
| 591 | |||
| 592 | void SyncWrites() override { | ||
| 593 | CloseCounter(); | ||
| 594 | std::unordered_map<size_t, std::vector<HostSyncValues>> sync_values_stash; | ||
| 595 | for (auto q : pending_sync) { | ||
| 596 | auto* query = GetQuery(q); | ||
| 597 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { | ||
| 598 | continue; | ||
| 599 | } | ||
| 600 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { | ||
| 601 | continue; | ||
| 602 | } | ||
| 603 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | ||
| 604 | sync_values_stash.try_emplace(query->start_bank_id); | ||
| 605 | sync_values_stash[query->start_bank_id].emplace_back(HostSyncValues{ | ||
| 606 | .address = query->guest_address, | ||
| 607 | .size = TFBQueryBank::QUERY_SIZE, | ||
| 608 | .offset = query->start_slot * TFBQueryBank::QUERY_SIZE, | ||
| 609 | }); | ||
| 610 | } | ||
| 611 | for (auto& p : sync_values_stash) { | ||
| 612 | auto& bank = bank_pool.GetBank(p.first); | ||
| 613 | runtime.template SyncValues<HostSyncValues>(p.second, bank.GetBuffer()); | ||
| 614 | } | ||
| 615 | pending_sync.clear(); | ||
| 616 | } | ||
| 617 | |||
| 618 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 619 | std::optional<u32> subreport_) override { | ||
| 620 | auto index = BuildQuery(); | ||
| 621 | auto* new_query = GetQuery(index); | ||
| 622 | new_query->guest_address = address; | ||
| 623 | new_query->value = 0; | ||
| 624 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | ||
| 625 | if (has_timestamp) { | ||
| 626 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||
| 627 | } | ||
| 628 | if (!subreport_) { | ||
| 629 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 630 | return index; | ||
| 631 | } | ||
| 632 | const size_t subreport = static_cast<size_t>(*subreport_); | ||
| 633 | UpdateBuffers(); | ||
| 634 | if ((streams_mask & (1ULL << subreport)) == 0) { | ||
| 635 | new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 636 | return index; | ||
| 637 | } | ||
| 638 | CloseCounter(); | ||
| 639 | auto [bank_slot, data_slot] = ProduceCounterBuffer(subreport); | ||
| 640 | new_query->start_bank_id = static_cast<u32>(bank_slot); | ||
| 641 | new_query->size_banks = 1; | ||
| 642 | new_query->start_slot = static_cast<u32>(data_slot); | ||
| 643 | new_query->size_slots = 1; | ||
| 644 | pending_sync.push_back(index); | ||
| 645 | pending_flush_queries.push_back(index); | ||
| 646 | return index; | ||
| 647 | } | ||
| 648 | |||
| 649 | bool HasUnsyncedQueries() override { | ||
| 650 | return !pending_flush_queries.empty(); | ||
| 651 | } | ||
| 652 | |||
| 653 | void PushUnsyncedQueries() override { | ||
| 654 | CloseCounter(); | ||
| 655 | auto staging_ref = staging_pool.Request( | ||
| 656 | pending_flush_queries.size() * TFBQueryBank::QUERY_SIZE, MemoryUsage::Download, true); | ||
| 657 | size_t offset_base = staging_ref.offset; | ||
| 658 | for (auto q : pending_flush_queries) { | ||
| 659 | auto* query = GetQuery(q); | ||
| 660 | auto& bank = bank_pool.GetBank(query->start_bank_id); | ||
| 661 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); | ||
| 662 | offset_base += TFBQueryBank::QUERY_SIZE; | ||
| 663 | bank.CloseReference(); | ||
| 664 | } | ||
| 665 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 666 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 667 | .pNext = nullptr, | ||
| 668 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 669 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 670 | }; | ||
| 671 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 672 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 673 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 674 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 675 | }); | ||
| 676 | |||
| 677 | std::scoped_lock lk(flush_guard); | ||
| 678 | for (auto& str : free_queue) { | ||
| 679 | staging_pool.FreeDeferred(str); | ||
| 680 | } | ||
| 681 | free_queue.clear(); | ||
| 682 | download_buffers.emplace_back(staging_ref); | ||
| 683 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | ||
| 684 | } | ||
| 685 | |||
| 686 | void PopUnsyncedQueries() override { | ||
| 687 | StagingBufferRef staging_ref; | ||
| 688 | std::vector<size_t> flushed_queries; | ||
| 689 | { | ||
| 690 | std::scoped_lock lk(flush_guard); | ||
| 691 | staging_ref = download_buffers.front(); | ||
| 692 | flushed_queries = std::move(pending_flush_sets.front()); | ||
| 693 | download_buffers.pop_front(); | ||
| 694 | pending_flush_sets.pop_front(); | ||
| 695 | } | ||
| 696 | |||
| 697 | size_t offset_base = staging_ref.offset; | ||
| 698 | for (auto q : flushed_queries) { | ||
| 699 | auto* query = GetQuery(q); | ||
| 700 | u32 result = 0; | ||
| 701 | std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32)); | ||
| 702 | query->value = static_cast<u64>(result); | ||
| 703 | query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; | ||
| 704 | offset_base += TFBQueryBank::QUERY_SIZE; | ||
| 705 | } | ||
| 706 | |||
| 707 | { | ||
| 708 | std::scoped_lock lk(flush_guard); | ||
| 709 | free_queue.emplace_back(staging_ref); | ||
| 710 | } | ||
| 711 | } | ||
| 712 | |||
| 713 | private: | ||
| 714 | void FlushBeginTFB() { | ||
| 715 | if (has_flushed_end_pending) [[unlikely]] { | ||
| 716 | return; | ||
| 717 | } | ||
| 718 | has_flushed_end_pending = true; | ||
| 719 | if (!has_started || buffers_count == 0) { | ||
| 720 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 721 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 722 | }); | ||
| 723 | UpdateBuffers(); | ||
| 724 | return; | ||
| 725 | } | ||
| 726 | scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | ||
| 727 | cmdbuf.BeginTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); | ||
| 728 | }); | ||
| 729 | UpdateBuffers(); | ||
| 730 | } | ||
| 731 | |||
| 732 | void FlushEndTFB() { | ||
| 733 | if (!has_flushed_end_pending) [[unlikely]] { | ||
| 734 | UNREACHABLE(); | ||
| 735 | return; | ||
| 736 | } | ||
| 737 | has_flushed_end_pending = false; | ||
| 738 | |||
| 739 | if (buffers_count == 0) { | ||
| 740 | scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 741 | cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 742 | }); | ||
| 743 | } else { | ||
| 744 | scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) { | ||
| 745 | cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data()); | ||
| 746 | }); | ||
| 747 | } | ||
| 748 | } | ||
| 749 | |||
| 750 | void UpdateBuffers() { | ||
| 751 | runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 752 | buffers_count = 0; | ||
| 753 | for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; | ||
| 754 | i++) { | ||
| 755 | const auto& tf = regs.transform_feedback; | ||
| 756 | if (tf.buffers[i].enable == 0) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | const size_t stream = tf.controls[i].stream; | ||
| 760 | streams_mask |= 1ULL << stream; | ||
| 761 | buffers_count = std::max<size_t>(buffers_count, stream + 1); | ||
| 762 | } | ||
| 763 | }); | ||
| 764 | } | ||
| 765 | |||
| 766 | std::pair<size_t, size_t> ProduceCounterBuffer(size_t stream) { | ||
| 767 | if (current_bank == nullptr || current_bank->IsClosed()) { | ||
| 768 | current_bank_id = | ||
| 769 | bank_pool.ReserveBank([this](std::deque<TFBQueryBank>& queue, size_t index) { | ||
| 770 | queue.emplace_back(scheduler, memory_allocator, index); | ||
| 771 | }); | ||
| 772 | current_bank = &bank_pool.GetBank(current_bank_id); | ||
| 773 | } | ||
| 774 | auto [dont_care, slot] = current_bank->Reserve(); | ||
| 775 | current_bank->AddReference(); | ||
| 776 | |||
| 777 | static constexpr VkMemoryBarrier READ_BARRIER{ | ||
| 778 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 779 | .pNext = nullptr, | ||
| 780 | .srcAccessMask = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, | ||
| 781 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 782 | }; | ||
| 783 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 784 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 785 | .pNext = nullptr, | ||
| 786 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 787 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, | ||
| 788 | }; | ||
| 789 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 790 | scheduler.Record([dst_buffer = current_bank->GetBuffer(), | ||
| 791 | src_buffer = counter_buffers[stream], src_offset = offsets[stream], | ||
| 792 | slot](vk::CommandBuffer cmdbuf) { | ||
| 793 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, | ||
| 794 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); | ||
| 795 | std::array<VkBufferCopy, 1> copy{VkBufferCopy{ | ||
| 796 | .srcOffset = src_offset, | ||
| 797 | .dstOffset = slot * TFBQueryBank::QUERY_SIZE, | ||
| 798 | .size = TFBQueryBank::QUERY_SIZE, | ||
| 799 | }}; | ||
| 800 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 801 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 802 | 0, WRITE_BARRIER); | ||
| 803 | }); | ||
| 804 | return {current_bank_id, slot}; | ||
| 805 | } | ||
| 806 | |||
| 807 | static constexpr size_t NUM_STREAMS = 4; | ||
| 808 | static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; | ||
| 809 | |||
| 810 | QueryCacheRuntime& runtime; | ||
| 811 | const Device& device; | ||
| 812 | Scheduler& scheduler; | ||
| 813 | const MemoryAllocator& memory_allocator; | ||
| 814 | StagingBufferPool& staging_pool; | ||
| 815 | VideoCommon::BankPool<TFBQueryBank> bank_pool; | ||
| 816 | size_t current_bank_id; | ||
| 817 | TFBQueryBank* current_bank; | ||
| 818 | vk::Buffer counters_buffer; | ||
| 819 | |||
| 820 | // syncing queue | ||
| 821 | std::vector<size_t> pending_sync; | ||
| 822 | |||
| 823 | // flush levels | ||
| 824 | std::vector<size_t> pending_flush_queries; | ||
| 825 | std::deque<StagingBufferRef> download_buffers; | ||
| 826 | std::deque<std::vector<size_t>> pending_flush_sets; | ||
| 827 | std::vector<StagingBufferRef> free_queue; | ||
| 828 | std::mutex flush_guard; | ||
| 829 | |||
| 830 | // state machine | ||
| 831 | bool has_started{}; | ||
| 832 | bool has_flushed_end_pending{}; | ||
| 833 | size_t buffers_count{}; | ||
| 834 | std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | ||
| 835 | std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | ||
| 836 | u64 streams_mask; | ||
| 837 | }; | ||
| 838 | |||
| 839 | } // namespace | ||
| 840 | |||
| 841 | struct QueryCacheRuntimeImpl { | ||
| 842 | QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | ||
| 843 | Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, | ||
| 844 | const Device& device_, const MemoryAllocator& memory_allocator_, | ||
| 845 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 846 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 847 | DescriptorPool& descriptor_pool) | ||
| 848 | : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, | ||
| 849 | buffer_cache{buffer_cache_}, device{device_}, | ||
| 850 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | ||
| 851 | guest_streamer(0, runtime), | ||
| 852 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device, | ||
| 853 | scheduler, memory_allocator), | ||
| 854 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | ||
| 855 | scheduler, memory_allocator, staging_pool), | ||
| 856 | hcr_setup{}, hcr_is_set{}, is_hcr_running{} { | ||
| 857 | |||
| 858 | hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; | ||
| 859 | hcr_setup.pNext = nullptr; | ||
| 860 | hcr_setup.flags = 0; | ||
| 861 | |||
| 862 | conditional_resolve_pass = std::make_unique<ConditionalRenderingResolvePass>( | ||
| 863 | device, scheduler, descriptor_pool, compute_pass_descriptor_queue); | ||
| 864 | |||
| 865 | const VkBufferCreateInfo buffer_ci = { | ||
| 866 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 867 | .pNext = nullptr, | ||
| 868 | .flags = 0, | ||
| 869 | .size = sizeof(u32), | ||
| 870 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | ||
| 871 | VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT, | ||
| 872 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 873 | .queueFamilyIndexCount = 0, | ||
| 874 | .pQueueFamilyIndices = nullptr, | ||
| 875 | }; | ||
| 876 | hcr_resolve_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 877 | } | ||
| 26 | 878 | ||
| 27 | } // Anonymous namespace | 879 | VideoCore::RasterizerInterface* rasterizer; |
| 880 | Core::Memory::Memory& cpu_memory; | ||
| 881 | Vulkan::BufferCache& buffer_cache; | ||
| 28 | 882 | ||
| 29 | QueryPool::QueryPool(const Device& device_, Scheduler& scheduler, QueryType type_) | 883 | const Device& device; |
| 30 | : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} | 884 | const MemoryAllocator& memory_allocator; |
| 885 | Scheduler& scheduler; | ||
| 886 | StagingBufferPool& staging_pool; | ||
| 31 | 887 | ||
| 32 | QueryPool::~QueryPool() = default; | 888 | // Streamers |
| 889 | VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; | ||
| 890 | SamplesStreamer<QueryCacheParams> sample_streamer; | ||
| 891 | TFBCounterStreamer<QueryCacheParams> tfb_streamer; | ||
| 33 | 892 | ||
| 34 | std::pair<VkQueryPool, u32> QueryPool::Commit() { | 893 | std::vector<std::pair<VAddr, VAddr>> little_cache; |
| 35 | std::size_t index; | 894 | std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |
| 36 | do { | 895 | std::vector<size_t> redirect_cache; |
| 37 | index = CommitResource(); | 896 | std::vector<std::vector<VkBufferCopy>> copies_setup; |
| 38 | } while (usage[index]); | ||
| 39 | usage[index] = true; | ||
| 40 | 897 | ||
| 41 | return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; | 898 | // Host conditional rendering data |
| 899 | std::unique_ptr<ConditionalRenderingResolvePass> conditional_resolve_pass; | ||
| 900 | vk::Buffer hcr_resolve_buffer; | ||
| 901 | VkConditionalRenderingBeginInfoEXT hcr_setup; | ||
| 902 | VkBuffer hcr_buffer; | ||
| 903 | size_t hcr_offset; | ||
| 904 | bool hcr_is_set; | ||
| 905 | bool is_hcr_running; | ||
| 906 | |||
| 907 | // maxwell3d | ||
| 908 | Tegra::Engines::Maxwell3D* maxwell3d; | ||
| 909 | }; | ||
| 910 | |||
| 911 | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||
| 912 | Core::Memory::Memory& cpu_memory_, | ||
| 913 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||
| 914 | const MemoryAllocator& memory_allocator_, | ||
| 915 | Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 916 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 917 | DescriptorPool& descriptor_pool) { | ||
| 918 | impl = std::make_unique<QueryCacheRuntimeImpl>( | ||
| 919 | *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | ||
| 920 | staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | ||
| 921 | } | ||
| 922 | |||
| 923 | void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { | ||
| 924 | impl->maxwell3d = maxwell3d; | ||
| 42 | } | 925 | } |
| 43 | 926 | ||
| 44 | void QueryPool::Allocate(std::size_t begin, std::size_t end) { | 927 | template <typename Func> |
| 45 | usage.resize(end); | 928 | void QueryCacheRuntime::View3DRegs(Func&& func) { |
| 929 | func(impl->maxwell3d->regs); | ||
| 930 | } | ||
| 46 | 931 | ||
| 47 | pools.push_back(device.GetLogical().CreateQueryPool({ | 932 | void QueryCacheRuntime::EndHostConditionalRendering() { |
| 48 | .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, | 933 | PauseHostConditionalRendering(); |
| 49 | .pNext = nullptr, | 934 | impl->hcr_is_set = false; |
| 50 | .flags = 0, | 935 | impl->is_hcr_running = false; |
| 51 | .queryType = GetTarget(type), | 936 | impl->hcr_buffer = nullptr; |
| 52 | .queryCount = static_cast<u32>(end - begin), | 937 | impl->hcr_offset = 0; |
| 53 | .pipelineStatistics = 0, | ||
| 54 | })); | ||
| 55 | } | 938 | } |
| 56 | 939 | ||
| 57 | void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { | 940 | void QueryCacheRuntime::PauseHostConditionalRendering() { |
| 58 | const auto it = | 941 | if (!impl->hcr_is_set) { |
| 59 | std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { | 942 | return; |
| 60 | return query_pool == *pool; | 943 | } |
| 944 | if (impl->is_hcr_running) { | ||
| 945 | impl->scheduler.Record( | ||
| 946 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndConditionalRenderingEXT(); }); | ||
| 947 | } | ||
| 948 | impl->is_hcr_running = false; | ||
| 949 | } | ||
| 950 | |||
| 951 | void QueryCacheRuntime::ResumeHostConditionalRendering() { | ||
| 952 | if (!impl->hcr_is_set) { | ||
| 953 | return; | ||
| 954 | } | ||
| 955 | if (!impl->is_hcr_running) { | ||
| 956 | impl->scheduler.Record([hcr_setup = impl->hcr_setup](vk::CommandBuffer cmdbuf) { | ||
| 957 | cmdbuf.BeginConditionalRenderingEXT(hcr_setup); | ||
| 61 | }); | 958 | }); |
| 959 | } | ||
| 960 | impl->is_hcr_running = true; | ||
| 961 | } | ||
| 62 | 962 | ||
| 63 | if (it != std::end(pools)) { | 963 | void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, |
| 64 | const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); | 964 | bool is_equal) { |
| 65 | usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; | 965 | { |
| 966 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 967 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||
| 968 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; | ||
| 969 | const auto [buffer, offset] = | ||
| 970 | impl->buffer_cache.ObtainCPUBuffer(object.address, 8, sync_info, post_op); | ||
| 971 | impl->hcr_buffer = buffer->Handle(); | ||
| 972 | impl->hcr_offset = offset; | ||
| 973 | } | ||
| 974 | if (impl->hcr_is_set) { | ||
| 975 | if (impl->hcr_setup.buffer == impl->hcr_buffer && | ||
| 976 | impl->hcr_setup.offset == impl->hcr_offset) { | ||
| 977 | ResumeHostConditionalRendering(); | ||
| 978 | return; | ||
| 979 | } | ||
| 980 | PauseHostConditionalRendering(); | ||
| 66 | } | 981 | } |
| 982 | impl->hcr_setup.buffer = impl->hcr_buffer; | ||
| 983 | impl->hcr_setup.offset = impl->hcr_offset; | ||
| 984 | impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0; | ||
| 985 | impl->hcr_is_set = true; | ||
| 986 | impl->is_hcr_running = false; | ||
| 987 | ResumeHostConditionalRendering(); | ||
| 67 | } | 988 | } |
| 68 | 989 | ||
| 69 | QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, | 990 | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { |
| 70 | Core::Memory::Memory& cpu_memory_, const Device& device_, | 991 | VkBuffer to_resolve; |
| 71 | Scheduler& scheduler_) | 992 | u32 to_resolve_offset; |
| 72 | : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_}, | 993 | { |
| 73 | query_pools{ | 994 | std::scoped_lock lk(impl->buffer_cache.mutex); |
| 74 | QueryPool{device_, scheduler_, QueryType::SamplesPassed}, | 995 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::NoSynchronize; |
| 75 | } {} | 996 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 76 | 997 | const auto [buffer, offset] = | |
| 77 | QueryCache::~QueryCache() { | 998 | impl->buffer_cache.ObtainCPUBuffer(address, 24, sync_info, post_op); |
| 78 | // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class | 999 | to_resolve = buffer->Handle(); |
| 79 | // destructor is called. The query cache should be redesigned to have a proper ownership model | 1000 | to_resolve_offset = static_cast<u32>(offset); |
| 80 | // instead of using shared pointers. | 1001 | } |
| 81 | for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) { | 1002 | if (impl->is_hcr_running) { |
| 82 | auto& stream = Stream(static_cast<QueryType>(query_type)); | 1003 | PauseHostConditionalRendering(); |
| 83 | stream.Update(false); | ||
| 84 | stream.Reset(); | ||
| 85 | } | 1004 | } |
| 1005 | impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, | ||
| 1006 | to_resolve_offset, false); | ||
| 1007 | impl->hcr_setup.buffer = *impl->hcr_resolve_buffer; | ||
| 1008 | impl->hcr_setup.offset = 0; | ||
| 1009 | impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; | ||
| 1010 | impl->hcr_is_set = true; | ||
| 1011 | impl->is_hcr_running = false; | ||
| 1012 | ResumeHostConditionalRendering(); | ||
| 86 | } | 1013 | } |
| 87 | 1014 | ||
| 88 | std::pair<VkQueryPool, u32> QueryCache::AllocateQuery(QueryType type) { | 1015 | bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, |
| 89 | return query_pools[static_cast<std::size_t>(type)].Commit(); | 1016 | [[maybe_unused]] bool qc_dirty) { |
| 1017 | if (!impl->device.IsExtConditionalRendering()) { | ||
| 1018 | return false; | ||
| 1019 | } | ||
| 1020 | HostConditionalRenderingCompareValueImpl(object_1, false); | ||
| 1021 | return true; | ||
| 90 | } | 1022 | } |
| 91 | 1023 | ||
| 92 | void QueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { | 1024 | bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
| 93 | query_pools[static_cast<std::size_t>(type)].Reserve(query); | 1025 | VideoCommon::LookupData object_2, |
| 1026 | bool qc_dirty, bool equal_check) { | ||
| 1027 | if (!impl->device.IsExtConditionalRendering()) { | ||
| 1028 | return false; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | const auto check_in_bc = [&](VAddr address) { | ||
| 1032 | return impl->buffer_cache.IsRegionGpuModified(address, 8); | ||
| 1033 | }; | ||
| 1034 | const auto check_value = [&](VAddr address) { | ||
| 1035 | u8* ptr = impl->cpu_memory.GetPointer(address); | ||
| 1036 | u64 value{}; | ||
| 1037 | std::memcpy(&value, ptr, sizeof(value)); | ||
| 1038 | return value == 0; | ||
| 1039 | }; | ||
| 1040 | std::array<VideoCommon::LookupData*, 2> objects{&object_1, &object_2}; | ||
| 1041 | std::array<bool, 2> is_in_bc{}; | ||
| 1042 | std::array<bool, 2> is_in_qc{}; | ||
| 1043 | std::array<bool, 2> is_in_ac{}; | ||
| 1044 | std::array<bool, 2> is_null{}; | ||
| 1045 | { | ||
| 1046 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 1047 | for (size_t i = 0; i < 2; i++) { | ||
| 1048 | is_in_qc[i] = objects[i]->found_query != nullptr; | ||
| 1049 | is_in_bc[i] = !is_in_qc[i] && check_in_bc(objects[i]->address); | ||
| 1050 | is_in_ac[i] = is_in_qc[i] || is_in_bc[i]; | ||
| 1051 | } | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | if (!is_in_ac[0] && !is_in_ac[1]) { | ||
| 1055 | EndHostConditionalRendering(); | ||
| 1056 | return false; | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | if (!qc_dirty && !is_in_bc[0] && !is_in_bc[1]) { | ||
| 1060 | EndHostConditionalRendering(); | ||
| 1061 | return false; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | for (size_t i = 0; i < 2; i++) { | ||
| 1065 | is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | for (size_t i = 0; i < 2; i++) { | ||
| 1069 | if (is_null[i]) { | ||
| 1070 | size_t j = (i + 1) % 2; | ||
| 1071 | HostConditionalRenderingCompareValueImpl(*objects[j], equal_check); | ||
| 1072 | return true; | ||
| 1073 | } | ||
| 1074 | } | ||
| 1075 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); | ||
| 1076 | return true; | ||
| 94 | } | 1077 | } |
| 95 | 1078 | ||
| 96 | HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | 1079 | QueryCacheRuntime::~QueryCacheRuntime() = default; |
| 97 | QueryType type_) | 1080 | |
| 98 | : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, | 1081 | VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryType query_type) { |
| 99 | query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { | 1082 | switch (query_type) { |
| 100 | const vk::Device* logical = &cache.GetDevice().GetLogical(); | 1083 | case QueryType::Payload: |
| 101 | cache.GetScheduler().Record([logical, query_ = query](vk::CommandBuffer cmdbuf) { | 1084 | return &impl->guest_streamer; |
| 102 | const bool use_precise = Settings::IsGPULevelHigh(); | 1085 | case QueryType::ZPassPixelCount64: |
| 103 | logical->ResetQueryPool(query_.first, query_.second, 1); | 1086 | return &impl->sample_streamer; |
| 104 | cmdbuf.BeginQuery(query_.first, query_.second, | 1087 | case QueryType::StreamingByteCount: |
| 105 | use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); | 1088 | return &impl->tfb_streamer; |
| 106 | }); | 1089 | default: |
| 1090 | return nullptr; | ||
| 1091 | } | ||
| 107 | } | 1092 | } |
| 108 | 1093 | ||
| 109 | HostCounter::~HostCounter() { | 1094 | void QueryCacheRuntime::Barriers(bool is_prebarrier) { |
| 110 | cache.Reserve(type, query); | 1095 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 1096 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1097 | .pNext = nullptr, | ||
| 1098 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1099 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 1100 | }; | ||
| 1101 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 1102 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1103 | .pNext = nullptr, | ||
| 1104 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 1105 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1106 | }; | ||
| 1107 | if (is_prebarrier) { | ||
| 1108 | impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 1109 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 1110 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); | ||
| 1111 | }); | ||
| 1112 | } else { | ||
| 1113 | impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { | ||
| 1114 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1115 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); | ||
| 1116 | }); | ||
| 1117 | } | ||
| 111 | } | 1118 | } |
| 112 | 1119 | ||
| 113 | void HostCounter::EndQuery() { | 1120 | template <typename SyncValuesType> |
| 114 | cache.GetScheduler().Record([query_ = query](vk::CommandBuffer cmdbuf) { | 1121 | void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer) { |
| 115 | cmdbuf.EndQuery(query_.first, query_.second); | 1122 | if (values.size() == 0) { |
| 1123 | return; | ||
| 1124 | } | ||
| 1125 | impl->redirect_cache.clear(); | ||
| 1126 | impl->little_cache.clear(); | ||
| 1127 | size_t total_size = 0; | ||
| 1128 | for (auto& sync_val : values) { | ||
| 1129 | total_size += sync_val.size; | ||
| 1130 | bool found = false; | ||
| 1131 | VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | ||
| 1132 | VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | ||
| 1133 | for (size_t i = 0; i < impl->little_cache.size(); i++) { | ||
| 1134 | const auto set_found = [&] { | ||
| 1135 | impl->redirect_cache.push_back(i); | ||
| 1136 | found = true; | ||
| 1137 | }; | ||
| 1138 | auto& loc = impl->little_cache[i]; | ||
| 1139 | if (base < loc.second && loc.first < base_end) { | ||
| 1140 | set_found(); | ||
| 1141 | break; | ||
| 1142 | } | ||
| 1143 | if (loc.first == base_end) { | ||
| 1144 | loc.first = base; | ||
| 1145 | set_found(); | ||
| 1146 | break; | ||
| 1147 | } | ||
| 1148 | if (loc.second == base) { | ||
| 1149 | loc.second = base_end; | ||
| 1150 | set_found(); | ||
| 1151 | break; | ||
| 1152 | } | ||
| 1153 | } | ||
| 1154 | if (!found) { | ||
| 1155 | impl->redirect_cache.push_back(impl->little_cache.size()); | ||
| 1156 | impl->little_cache.emplace_back(base, base_end); | ||
| 1157 | } | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | // Vulkan part. | ||
| 1161 | std::scoped_lock lk(impl->buffer_cache.mutex); | ||
| 1162 | impl->buffer_cache.BufferOperations([&] { | ||
| 1163 | impl->buffers_to_upload_to.clear(); | ||
| 1164 | for (auto& pair : impl->little_cache) { | ||
| 1165 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | ||
| 1166 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; | ||
| 1167 | const auto [buffer, offset] = impl->buffer_cache.ObtainCPUBuffer( | ||
| 1168 | pair.first, static_cast<u32>(pair.second - pair.first), sync_info, post_op); | ||
| 1169 | impl->buffers_to_upload_to.emplace_back(buffer->Handle(), offset); | ||
| 1170 | } | ||
| 116 | }); | 1171 | }); |
| 117 | } | ||
| 118 | 1172 | ||
| 119 | u64 HostCounter::BlockingQuery(bool async) const { | 1173 | VkBuffer src_buffer; |
| 120 | if (!async) { | 1174 | [[maybe_unused]] StagingBufferRef ref; |
| 121 | cache.GetScheduler().Wait(tick); | 1175 | impl->copies_setup.clear(); |
| 122 | } | 1176 | impl->copies_setup.resize(impl->little_cache.size()); |
| 123 | u64 data; | 1177 | if constexpr (SyncValuesType::GeneratesBaseBuffer) { |
| 124 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 1178 | ref = impl->staging_pool.Request(total_size, MemoryUsage::Upload); |
| 125 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 1179 | size_t current_offset = ref.offset; |
| 126 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | 1180 | size_t accumulated_size = 0; |
| 127 | 1181 | for (size_t i = 0; i < values.size(); i++) { | |
| 128 | switch (query_result) { | 1182 | size_t which_copy = impl->redirect_cache[i]; |
| 129 | case VK_SUCCESS: | 1183 | impl->copies_setup[which_copy].emplace_back(VkBufferCopy{ |
| 130 | return data; | 1184 | .srcOffset = current_offset + accumulated_size, |
| 131 | case VK_ERROR_DEVICE_LOST: | 1185 | .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address - |
| 132 | cache.GetDevice().ReportLoss(); | 1186 | impl->little_cache[which_copy].first, |
| 133 | [[fallthrough]]; | 1187 | .size = values[i].size, |
| 134 | default: | 1188 | }); |
| 135 | throw vk::Exception(query_result); | 1189 | std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value, |
| 1190 | values[i].size); | ||
| 1191 | accumulated_size += values[i].size; | ||
| 1192 | } | ||
| 1193 | src_buffer = ref.buffer; | ||
| 1194 | } else { | ||
| 1195 | for (size_t i = 0; i < values.size(); i++) { | ||
| 1196 | size_t which_copy = impl->redirect_cache[i]; | ||
| 1197 | impl->copies_setup[which_copy].emplace_back(VkBufferCopy{ | ||
| 1198 | .srcOffset = values[i].offset, | ||
| 1199 | .dstOffset = impl->buffers_to_upload_to[which_copy].second + values[i].address - | ||
| 1200 | impl->little_cache[which_copy].first, | ||
| 1201 | .size = values[i].size, | ||
| 1202 | }); | ||
| 1203 | } | ||
| 1204 | src_buffer = base_src_buffer; | ||
| 136 | } | 1205 | } |
| 1206 | |||
| 1207 | impl->scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 1208 | impl->scheduler.Record([src_buffer, dst_buffers = std::move(impl->buffers_to_upload_to), | ||
| 1209 | vk_copies = std::move(impl->copies_setup)](vk::CommandBuffer cmdbuf) { | ||
| 1210 | size_t size = dst_buffers.size(); | ||
| 1211 | for (size_t i = 0; i < size; i++) { | ||
| 1212 | cmdbuf.CopyBuffer(src_buffer, dst_buffers[i].first, vk_copies[i]); | ||
| 1213 | } | ||
| 1214 | }); | ||
| 137 | } | 1215 | } |
| 138 | 1216 | ||
| 139 | } // namespace Vulkan | 1217 | } // namespace Vulkan |
| 1218 | |||
| 1219 | namespace VideoCommon { | ||
| 1220 | |||
| 1221 | template class QueryCacheBase<Vulkan::QueryCacheParams>; | ||
| 1222 | |||
| 1223 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index c1b9552eb..9ad2929d7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -1,101 +1,74 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <cstddef> | ||
| 7 | #include <memory> | 6 | #include <memory> |
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | 7 | ||
| 11 | #include "common/common_types.h" | 8 | #include "video_core/query_cache/query_cache_base.h" |
| 12 | #include "video_core/query_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 13 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 15 | 10 | ||
| 16 | namespace VideoCore { | 11 | namespace VideoCore { |
| 17 | class RasterizerInterface; | 12 | class RasterizerInterface; |
| 18 | } | 13 | } |
| 19 | 14 | ||
| 15 | namespace VideoCommon { | ||
| 16 | class StreamerInterface; | ||
| 17 | } | ||
| 18 | |||
| 20 | namespace Vulkan { | 19 | namespace Vulkan { |
| 21 | 20 | ||
| 22 | class CachedQuery; | ||
| 23 | class Device; | 21 | class Device; |
| 24 | class HostCounter; | ||
| 25 | class QueryCache; | ||
| 26 | class Scheduler; | 22 | class Scheduler; |
| 23 | class StagingBufferPool; | ||
| 27 | 24 | ||
| 28 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | 25 | struct QueryCacheRuntimeImpl; |
| 29 | 26 | ||
| 30 | class QueryPool final : public ResourcePool { | 27 | class QueryCacheRuntime { |
| 31 | public: | 28 | public: |
| 32 | explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); | 29 | explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |
| 33 | ~QueryPool() override; | 30 | Core::Memory::Memory& cpu_memory_, |
| 31 | Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||
| 32 | const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||
| 33 | StagingBufferPool& staging_pool_, | ||
| 34 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 35 | DescriptorPool& descriptor_pool); | ||
| 36 | ~QueryCacheRuntime(); | ||
| 34 | 37 | ||
| 35 | std::pair<VkQueryPool, u32> Commit(); | 38 | template <typename SyncValuesType> |
| 39 | void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr); | ||
| 36 | 40 | ||
| 37 | void Reserve(std::pair<VkQueryPool, u32> query); | 41 | void Barriers(bool is_prebarrier); |
| 38 | 42 | ||
| 39 | protected: | 43 | void EndHostConditionalRendering(); |
| 40 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 41 | 44 | ||
| 42 | private: | 45 | void PauseHostConditionalRendering(); |
| 43 | static constexpr std::size_t GROW_STEP = 512; | ||
| 44 | 46 | ||
| 45 | const Device& device; | 47 | void ResumeHostConditionalRendering(); |
| 46 | const VideoCore::QueryType type; | ||
| 47 | 48 | ||
| 48 | std::vector<vk::QueryPool> pools; | 49 | bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); |
| 49 | std::vector<bool> usage; | ||
| 50 | }; | ||
| 51 | 50 | ||
| 52 | class QueryCache final | 51 | bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, |
| 53 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | 52 | VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); |
| 54 | public: | ||
| 55 | explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 56 | Core::Memory::Memory& cpu_memory_, const Device& device_, | ||
| 57 | Scheduler& scheduler_); | ||
| 58 | ~QueryCache(); | ||
| 59 | |||
| 60 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); | ||
| 61 | 53 | ||
| 62 | void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); | 54 | VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); |
| 63 | 55 | ||
| 64 | const Device& GetDevice() const noexcept { | 56 | void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d); |
| 65 | return device; | ||
| 66 | } | ||
| 67 | 57 | ||
| 68 | Scheduler& GetScheduler() const noexcept { | 58 | template <typename Func> |
| 69 | return scheduler; | 59 | void View3DRegs(Func&& func); |
| 70 | } | ||
| 71 | 60 | ||
| 72 | private: | 61 | private: |
| 73 | const Device& device; | 62 | void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |
| 74 | Scheduler& scheduler; | 63 | void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); |
| 75 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | 64 | friend struct QueryCacheRuntimeImpl; |
| 65 | std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||
| 76 | }; | 66 | }; |
| 77 | 67 | ||
| 78 | class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | 68 | struct QueryCacheParams { |
| 79 | public: | 69 | using RuntimeType = Vulkan::QueryCacheRuntime; |
| 80 | explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | ||
| 81 | VideoCore::QueryType type_); | ||
| 82 | ~HostCounter(); | ||
| 83 | |||
| 84 | void EndQuery(); | ||
| 85 | |||
| 86 | private: | ||
| 87 | u64 BlockingQuery(bool async = false) const override; | ||
| 88 | |||
| 89 | QueryCache& cache; | ||
| 90 | const VideoCore::QueryType type; | ||
| 91 | const std::pair<VkQueryPool, u32> query; | ||
| 92 | const u64 tick; | ||
| 93 | }; | 70 | }; |
| 94 | 71 | ||
| 95 | class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { | 72 | using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; |
| 96 | public: | ||
| 97 | explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) | ||
| 98 | : CachedQueryBase{cpu_addr_, host_ptr_} {} | ||
| 99 | }; | ||
| 100 | 73 | ||
| 101 | } // namespace Vulkan | 74 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 01e76a82c..e8862ba04 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 24 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 25 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 25 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 28 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 29 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 30 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 170 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 171 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 171 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | 172 | guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |
| 172 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | 173 | buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |
| 174 | query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | ||
| 175 | staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||
| 176 | query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | ||
| 173 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | 177 | pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, |
| 174 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | 178 | render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |
| 175 | query_cache{*this, cpu_memory_, device, scheduler}, | ||
| 176 | accelerate_dma(buffer_cache, texture_cache, scheduler), | 179 | accelerate_dma(buffer_cache, texture_cache, scheduler), |
| 177 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 180 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 178 | wfi_event(device.GetLogical().CreateEvent()) { | 181 | wfi_event(device.GetLogical().CreateEvent()) { |
| @@ -189,13 +192,15 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 189 | FlushWork(); | 192 | FlushWork(); |
| 190 | gpu_memory->FlushCaching(); | 193 | gpu_memory->FlushCaching(); |
| 191 | 194 | ||
| 195 | query_cache.NotifySegment(true); | ||
| 196 | |||
| 192 | #if ANDROID | 197 | #if ANDROID |
| 193 | if (Settings::IsGPULevelHigh()) { | 198 | if (Settings::IsGPULevelHigh()) { |
| 194 | // This is problematic on Android, disable on GPU Normal. | 199 | // This is problematic on Android, disable on GPU Normal. |
| 195 | query_cache.UpdateCounters(); | 200 | // query_cache.UpdateCounters(); |
| 196 | } | 201 | } |
| 197 | #else | 202 | #else |
| 198 | query_cache.UpdateCounters(); | 203 | // query_cache.UpdateCounters(); |
| 199 | #endif | 204 | #endif |
| 200 | 205 | ||
| 201 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | 206 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |
| @@ -207,13 +212,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 207 | pipeline->SetEngine(maxwell3d, gpu_memory); | 212 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 208 | pipeline->Configure(is_indexed); | 213 | pipeline->Configure(is_indexed); |
| 209 | 214 | ||
| 210 | BeginTransformFeedback(); | ||
| 211 | |||
| 212 | UpdateDynamicStates(); | 215 | UpdateDynamicStates(); |
| 213 | 216 | ||
| 217 | HandleTransformFeedback(); | ||
| 218 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 219 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 214 | draw_func(); | 220 | draw_func(); |
| 215 | |||
| 216 | EndTransformFeedback(); | ||
| 217 | } | 221 | } |
| 218 | 222 | ||
| 219 | void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | 223 | void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { |
| @@ -241,6 +245,14 @@ void RasterizerVulkan::DrawIndirect() { | |||
| 241 | const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); | 245 | const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); |
| 242 | const auto& buffer = indirect_buffer.first; | 246 | const auto& buffer = indirect_buffer.first; |
| 243 | const auto& offset = indirect_buffer.second; | 247 | const auto& offset = indirect_buffer.second; |
| 248 | if (params.is_byte_count) { | ||
| 249 | scheduler.Record([buffer_obj = buffer->Handle(), offset, | ||
| 250 | stride = params.stride](vk::CommandBuffer cmdbuf) { | ||
| 251 | cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, | ||
| 252 | static_cast<u32>(stride)); | ||
| 253 | }); | ||
| 254 | return; | ||
| 255 | } | ||
| 244 | if (params.include_count) { | 256 | if (params.include_count) { |
| 245 | const auto count = buffer_cache.GetDrawIndirectCount(); | 257 | const auto count = buffer_cache.GetDrawIndirectCount(); |
| 246 | const auto& draw_buffer = count.first; | 258 | const auto& draw_buffer = count.first; |
| @@ -280,13 +292,15 @@ void RasterizerVulkan::DrawTexture() { | |||
| 280 | SCOPE_EXIT({ gpu.TickWork(); }); | 292 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 281 | FlushWork(); | 293 | FlushWork(); |
| 282 | 294 | ||
| 295 | query_cache.NotifySegment(true); | ||
| 296 | |||
| 283 | #if ANDROID | 297 | #if ANDROID |
| 284 | if (Settings::IsGPULevelHigh()) { | 298 | if (Settings::IsGPULevelHigh()) { |
| 285 | // This is problematic on Android, disable on GPU Normal. | 299 | // This is problematic on Android, disable on GPU Normal. |
| 286 | query_cache.UpdateCounters(); | 300 | // query_cache.UpdateCounters(); |
| 287 | } | 301 | } |
| 288 | #else | 302 | #else |
| 289 | query_cache.UpdateCounters(); | 303 | // query_cache.UpdateCounters(); |
| 290 | #endif | 304 | #endif |
| 291 | 305 | ||
| 292 | texture_cache.SynchronizeGraphicsDescriptors(); | 306 | texture_cache.SynchronizeGraphicsDescriptors(); |
| @@ -294,6 +308,8 @@ void RasterizerVulkan::DrawTexture() { | |||
| 294 | 308 | ||
| 295 | UpdateDynamicStates(); | 309 | UpdateDynamicStates(); |
| 296 | 310 | ||
| 311 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 312 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 297 | const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); | 313 | const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); |
| 298 | const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); | 314 | const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); |
| 299 | const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); | 315 | const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); |
| @@ -319,12 +335,16 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 319 | #if ANDROID | 335 | #if ANDROID |
| 320 | if (Settings::IsGPULevelHigh()) { | 336 | if (Settings::IsGPULevelHigh()) { |
| 321 | // This is problematic on Android, disable on GPU Normal. | 337 | // This is problematic on Android, disable on GPU Normal. |
| 322 | query_cache.UpdateCounters(); | 338 | // query_cache.UpdateCounters(); |
| 323 | } | 339 | } |
| 324 | #else | 340 | #else |
| 325 | query_cache.UpdateCounters(); | 341 | // query_cache.UpdateCounters(); |
| 326 | #endif | 342 | #endif |
| 327 | 343 | ||
| 344 | query_cache.NotifySegment(true); | ||
| 345 | query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||
| 346 | maxwell3d->regs.zpass_pixel_count_enable); | ||
| 347 | |||
| 328 | auto& regs = maxwell3d->regs; | 348 | auto& regs = maxwell3d->regs; |
| 329 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | 349 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || |
| 330 | regs.clear_surface.A; | 350 | regs.clear_surface.A; |
| @@ -482,13 +502,13 @@ void RasterizerVulkan::DispatchCompute() { | |||
| 482 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | 502 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); |
| 483 | } | 503 | } |
| 484 | 504 | ||
| 485 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 505 | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { |
| 486 | query_cache.ResetCounter(type); | 506 | query_cache.CounterReset(type); |
| 487 | } | 507 | } |
| 488 | 508 | ||
| 489 | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | 509 | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, |
| 490 | std::optional<u64> timestamp) { | 510 | VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |
| 491 | query_cache.Query(gpu_addr, type, timestamp); | 511 | query_cache.CounterReport(gpu_addr, type, flags, payload, subreport); |
| 492 | } | 512 | } |
| 493 | 513 | ||
| 494 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 514 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| @@ -669,8 +689,8 @@ void RasterizerVulkan::SignalReference() { | |||
| 669 | fence_manager.SignalReference(); | 689 | fence_manager.SignalReference(); |
| 670 | } | 690 | } |
| 671 | 691 | ||
| 672 | void RasterizerVulkan::ReleaseFences() { | 692 | void RasterizerVulkan::ReleaseFences(bool force) { |
| 673 | fence_manager.WaitPendingFences(); | 693 | fence_manager.WaitPendingFences(force); |
| 674 | } | 694 | } |
| 675 | 695 | ||
| 676 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | 696 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, |
| @@ -694,6 +714,8 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 694 | flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; | 714 | flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; |
| 695 | } | 715 | } |
| 696 | 716 | ||
| 717 | query_cache.NotifyWFI(); | ||
| 718 | |||
| 697 | scheduler.RequestOutsideRenderPassOperationContext(); | 719 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 698 | scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { | 720 | scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { |
| 699 | cmdbuf.SetEvent(event, flags); | 721 | cmdbuf.SetEvent(event, flags); |
| @@ -737,19 +759,7 @@ void RasterizerVulkan::TickFrame() { | |||
| 737 | 759 | ||
| 738 | bool RasterizerVulkan::AccelerateConditionalRendering() { | 760 | bool RasterizerVulkan::AccelerateConditionalRendering() { |
| 739 | gpu_memory->FlushCaching(); | 761 | gpu_memory->FlushCaching(); |
| 740 | if (Settings::IsGPULevelHigh()) { | 762 | return query_cache.AccelerateHostConditionalRendering(); |
| 741 | // TODO(Blinkhawk): Reimplement Host conditional rendering. | ||
| 742 | return false; | ||
| 743 | } | ||
| 744 | // Medium / Low Hack: stub any checks on queries written into the buffer cache. | ||
| 745 | const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; | ||
| 746 | Maxwell::ReportSemaphore::Compare cmp; | ||
| 747 | if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), | ||
| 748 | VideoCommon::CacheType::BufferCache | | ||
| 749 | VideoCommon::CacheType::QueryCache)) { | ||
| 750 | return true; | ||
| 751 | } | ||
| 752 | return false; | ||
| 753 | } | 763 | } |
| 754 | 764 | ||
| 755 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 765 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| @@ -795,6 +805,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 795 | if (!image_view) { | 805 | if (!image_view) { |
| 796 | return false; | 806 | return false; |
| 797 | } | 807 | } |
| 808 | query_cache.NotifySegment(false); | ||
| 798 | screen_info.image = image_view->ImageHandle(); | 809 | screen_info.image = image_view->ImageHandle(); |
| 799 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); | 810 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); |
| 800 | screen_info.width = image_view->size.width; | 811 | screen_info.width = image_view->size.width; |
| @@ -933,31 +944,18 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 933 | } | 944 | } |
| 934 | } | 945 | } |
| 935 | 946 | ||
| 936 | void RasterizerVulkan::BeginTransformFeedback() { | 947 | void RasterizerVulkan::HandleTransformFeedback() { |
| 937 | const auto& regs = maxwell3d->regs; | 948 | const auto& regs = maxwell3d->regs; |
| 938 | if (regs.transform_feedback_enabled == 0) { | ||
| 939 | return; | ||
| 940 | } | ||
| 941 | if (!device.IsExtTransformFeedbackSupported()) { | 949 | if (!device.IsExtTransformFeedbackSupported()) { |
| 942 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 950 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |
| 943 | return; | 951 | return; |
| 944 | } | 952 | } |
| 945 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | 953 | query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, |
| 946 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); | 954 | regs.transform_feedback_enabled); |
| 947 | scheduler.Record( | 955 | if (regs.transform_feedback_enabled != 0) { |
| 948 | [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 956 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || |
| 949 | } | 957 | regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); |
| 950 | |||
| 951 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 952 | const auto& regs = maxwell3d->regs; | ||
| 953 | if (regs.transform_feedback_enabled == 0) { | ||
| 954 | return; | ||
| 955 | } | ||
| 956 | if (!device.IsExtTransformFeedbackSupported()) { | ||
| 957 | return; | ||
| 958 | } | 958 | } |
| 959 | scheduler.Record( | ||
| 960 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||
| 961 | } | 959 | } |
| 962 | 960 | ||
| 963 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 961 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b31982485..ffd44c68d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -84,8 +84,8 @@ public: | |||
| 84 | void DrawTexture() override; | 84 | void DrawTexture() override; |
| 85 | void Clear(u32 layer_count) override; | 85 | void Clear(u32 layer_count) override; |
| 86 | void DispatchCompute() override; | 86 | void DispatchCompute() override; |
| 87 | void ResetCounter(VideoCore::QueryType type) override; | 87 | void ResetCounter(VideoCommon::QueryType type) override; |
| 88 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 88 | void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; |
| 89 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 89 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| 90 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | 90 | void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |
| 91 | void FlushAll() override; | 91 | void FlushAll() override; |
| @@ -106,7 +106,7 @@ public: | |||
| 106 | void SyncOperation(std::function<void()>&& func) override; | 106 | void SyncOperation(std::function<void()>&& func) override; |
| 107 | void SignalSyncPoint(u32 value) override; | 107 | void SignalSyncPoint(u32 value) override; |
| 108 | void SignalReference() override; | 108 | void SignalReference() override; |
| 109 | void ReleaseFences() override; | 109 | void ReleaseFences(bool force = true) override; |
| 110 | void FlushAndInvalidateRegion( | 110 | void FlushAndInvalidateRegion( |
| 111 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 111 | VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 112 | void WaitForIdle() override; | 112 | void WaitForIdle() override; |
| @@ -146,9 +146,7 @@ private: | |||
| 146 | 146 | ||
| 147 | void UpdateDynamicStates(); | 147 | void UpdateDynamicStates(); |
| 148 | 148 | ||
| 149 | void BeginTransformFeedback(); | 149 | void HandleTransformFeedback(); |
| 150 | |||
| 151 | void EndTransformFeedback(); | ||
| 152 | 150 | ||
| 153 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 151 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 154 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 152 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -195,8 +193,9 @@ private: | |||
| 195 | TextureCache texture_cache; | 193 | TextureCache texture_cache; |
| 196 | BufferCacheRuntime buffer_cache_runtime; | 194 | BufferCacheRuntime buffer_cache_runtime; |
| 197 | BufferCache buffer_cache; | 195 | BufferCache buffer_cache; |
| 198 | PipelineCache pipeline_cache; | 196 | QueryCacheRuntime query_cache_runtime; |
| 199 | QueryCache query_cache; | 197 | QueryCache query_cache; |
| 198 | PipelineCache pipeline_cache; | ||
| 200 | AccelerateDMA accelerate_dma; | 199 | AccelerateDMA accelerate_dma; |
| 201 | FenceManager fence_manager; | 200 | FenceManager fence_manager; |
| 202 | 201 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 89fd31b4f..3be7837f4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() { | |||
| 243 | #if ANDROID | 243 | #if ANDROID |
| 244 | if (Settings::IsGPULevelHigh()) { | 244 | if (Settings::IsGPULevelHigh()) { |
| 245 | // This is problematic on Android, disable on GPU Normal. | 245 | // This is problematic on Android, disable on GPU Normal. |
| 246 | query_cache->UpdateCounters(); | 246 | query_cache->NotifySegment(true); |
| 247 | } | 247 | } |
| 248 | #else | 248 | #else |
| 249 | query_cache->UpdateCounters(); | 249 | query_cache->NotifySegment(true); |
| 250 | #endif | 250 | #endif |
| 251 | } | 251 | } |
| 252 | } | 252 | } |
| @@ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() { | |||
| 261 | #if ANDROID | 261 | #if ANDROID |
| 262 | if (Settings::IsGPULevelHigh()) { | 262 | if (Settings::IsGPULevelHigh()) { |
| 263 | // This is problematic on Android, disable on GPU Normal. | 263 | // This is problematic on Android, disable on GPU Normal. |
| 264 | query_cache->DisableStreams(); | 264 | // query_cache->DisableStreams(); |
| 265 | } | 265 | } |
| 266 | #else | 266 | #else |
| 267 | query_cache->DisableStreams(); | 267 | // query_cache->DisableStreams(); |
| 268 | #endif | 268 | #endif |
| 269 | query_cache->NotifySegment(false); | ||
| 269 | EndRenderPass(); | 270 | EndRenderPass(); |
| 270 | } | 271 | } |
| 271 | 272 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 475c682eb..c87e5fb07 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/polyfill_thread.h" | 16 | #include "common/polyfill_thread.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 18 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -24,7 +25,6 @@ class Device; | |||
| 24 | class Framebuffer; | 25 | class Framebuffer; |
| 25 | class GraphicsPipeline; | 26 | class GraphicsPipeline; |
| 26 | class StateTracker; | 27 | class StateTracker; |
| 27 | class QueryCache; | ||
| 28 | 28 | ||
| 29 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do | 29 | /// The scheduler abstracts command buffer and fence management with an interface that's able to do |
| 30 | /// OpenGL-like operations on Vulkan command buffers. | 30 | /// OpenGL-like operations on Vulkan command buffers. |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 6c7fa34e5..16f0425be 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -61,6 +61,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||
| 61 | 61 | ||
| 62 | // Define miscellaneous extensions which may be used by the implementation here. | 62 | // Define miscellaneous extensions which may be used by the implementation here. |
| 63 | #define FOR_EACH_VK_EXTENSION(EXTENSION) \ | 63 | #define FOR_EACH_VK_EXTENSION(EXTENSION) \ |
| 64 | EXTENSION(EXT, CONDITIONAL_RENDERING, conditional_rendering) \ | ||
| 64 | EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ | 65 | EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization) \ |
| 65 | EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ | 66 | EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted) \ |
| 66 | EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ | 67 | EXTENSION(EXT, MEMORY_BUDGET, memory_budget) \ |
| @@ -93,6 +94,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||
| 93 | 94 | ||
| 94 | // Define extensions where the absence of the extension may result in a degraded experience. | 95 | // Define extensions where the absence of the extension may result in a degraded experience. |
| 95 | #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ | 96 | #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ |
| 97 | EXTENSION_NAME(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME) \ | ||
| 96 | EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ | 98 | EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ |
| 97 | EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ | 99 | EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME) \ |
| 98 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ | 100 | EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ |
| @@ -536,6 +538,10 @@ public: | |||
| 536 | return extensions.shader_atomic_int64; | 538 | return extensions.shader_atomic_int64; |
| 537 | } | 539 | } |
| 538 | 540 | ||
| 541 | bool IsExtConditionalRendering() const { | ||
| 542 | return extensions.conditional_rendering; | ||
| 543 | } | ||
| 544 | |||
| 539 | bool HasTimelineSemaphore() const; | 545 | bool HasTimelineSemaphore() const; |
| 540 | 546 | ||
| 541 | /// Returns the minimum supported version of SPIR-V. | 547 | /// Returns the minimum supported version of SPIR-V. |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index c3f388d89..5a08a92e1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -75,6 +75,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 75 | X(vkBeginCommandBuffer); | 75 | X(vkBeginCommandBuffer); |
| 76 | X(vkBindBufferMemory); | 76 | X(vkBindBufferMemory); |
| 77 | X(vkBindImageMemory); | 77 | X(vkBindImageMemory); |
| 78 | X(vkCmdBeginConditionalRenderingEXT); | ||
| 78 | X(vkCmdBeginQuery); | 79 | X(vkCmdBeginQuery); |
| 79 | X(vkCmdBeginRenderPass); | 80 | X(vkCmdBeginRenderPass); |
| 80 | X(vkCmdBeginTransformFeedbackEXT); | 81 | X(vkCmdBeginTransformFeedbackEXT); |
| @@ -91,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 91 | X(vkCmdCopyBufferToImage); | 92 | X(vkCmdCopyBufferToImage); |
| 92 | X(vkCmdCopyImage); | 93 | X(vkCmdCopyImage); |
| 93 | X(vkCmdCopyImageToBuffer); | 94 | X(vkCmdCopyImageToBuffer); |
| 95 | X(vkCmdCopyQueryPoolResults); | ||
| 94 | X(vkCmdDispatch); | 96 | X(vkCmdDispatch); |
| 95 | X(vkCmdDispatchIndirect); | 97 | X(vkCmdDispatchIndirect); |
| 96 | X(vkCmdDraw); | 98 | X(vkCmdDraw); |
| @@ -99,6 +101,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 99 | X(vkCmdDrawIndexedIndirect); | 101 | X(vkCmdDrawIndexedIndirect); |
| 100 | X(vkCmdDrawIndirectCount); | 102 | X(vkCmdDrawIndirectCount); |
| 101 | X(vkCmdDrawIndexedIndirectCount); | 103 | X(vkCmdDrawIndexedIndirectCount); |
| 104 | X(vkCmdEndConditionalRenderingEXT); | ||
| 102 | X(vkCmdEndQuery); | 105 | X(vkCmdEndQuery); |
| 103 | X(vkCmdEndRenderPass); | 106 | X(vkCmdEndRenderPass); |
| 104 | X(vkCmdEndTransformFeedbackEXT); | 107 | X(vkCmdEndTransformFeedbackEXT); |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 049fa8038..27d94a7d5 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -185,6 +185,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 185 | PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; | 185 | PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; |
| 186 | PFN_vkBindBufferMemory vkBindBufferMemory{}; | 186 | PFN_vkBindBufferMemory vkBindBufferMemory{}; |
| 187 | PFN_vkBindImageMemory vkBindImageMemory{}; | 187 | PFN_vkBindImageMemory vkBindImageMemory{}; |
| 188 | PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT{}; | ||
| 188 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | 189 | PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; |
| 189 | PFN_vkCmdBeginQuery vkCmdBeginQuery{}; | 190 | PFN_vkCmdBeginQuery vkCmdBeginQuery{}; |
| 190 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; | 191 | PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; |
| @@ -202,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 202 | PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; | 203 | PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; |
| 203 | PFN_vkCmdCopyImage vkCmdCopyImage{}; | 204 | PFN_vkCmdCopyImage vkCmdCopyImage{}; |
| 204 | PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; | 205 | PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; |
| 206 | PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults{}; | ||
| 205 | PFN_vkCmdDispatch vkCmdDispatch{}; | 207 | PFN_vkCmdDispatch vkCmdDispatch{}; |
| 206 | PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; | 208 | PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; |
| 207 | PFN_vkCmdDraw vkCmdDraw{}; | 209 | PFN_vkCmdDraw vkCmdDraw{}; |
| @@ -210,6 +212,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 210 | PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; | 212 | PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; |
| 211 | PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; | 213 | PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; |
| 212 | PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; | 214 | PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; |
| 215 | PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{}; | ||
| 213 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | 216 | PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; |
| 214 | PFN_vkCmdEndQuery vkCmdEndQuery{}; | 217 | PFN_vkCmdEndQuery vkCmdEndQuery{}; |
| 215 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | 218 | PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; |
| @@ -1270,6 +1273,13 @@ public: | |||
| 1270 | regions.data()); | 1273 | regions.data()); |
| 1271 | } | 1274 | } |
| 1272 | 1275 | ||
| 1276 | void CopyQueryPoolResults(VkQueryPool query_pool, u32 first_query, u32 query_count, | ||
| 1277 | VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize stride, | ||
| 1278 | VkQueryResultFlags flags) const noexcept { | ||
| 1279 | dld->vkCmdCopyQueryPoolResults(handle, query_pool, first_query, query_count, dst_buffer, | ||
| 1280 | dst_offset, stride, flags); | ||
| 1281 | } | ||
| 1282 | |||
| 1273 | void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, | 1283 | void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, |
| 1274 | u32 data) const noexcept { | 1284 | u32 data) const noexcept { |
| 1275 | dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); | 1285 | dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); |
| @@ -1448,6 +1458,15 @@ public: | |||
| 1448 | counter_buffers, counter_buffer_offsets); | 1458 | counter_buffers, counter_buffer_offsets); |
| 1449 | } | 1459 | } |
| 1450 | 1460 | ||
| 1461 | void BeginConditionalRenderingEXT( | ||
| 1462 | const VkConditionalRenderingBeginInfoEXT& info) const noexcept { | ||
| 1463 | dld->vkCmdBeginConditionalRenderingEXT(handle, &info); | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | void EndConditionalRenderingEXT() const noexcept { | ||
| 1467 | dld->vkCmdEndConditionalRenderingEXT(handle); | ||
| 1468 | } | ||
| 1469 | |||
| 1451 | void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { | 1470 | void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { |
| 1452 | const VkDebugUtilsLabelEXT label_info{ | 1471 | const VkDebugUtilsLabelEXT label_info{ |
| 1453 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, | 1472 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, |